diff options
| author | leshe4ka46 <alex9102naid1@ya.ru> | 2025-10-19 14:47:59 +0300 |
|---|---|---|
| committer | leshe4ka46 <alex9102naid1@ya.ru> | 2025-10-19 14:47:59 +0300 |
| commit | e17a425dfb3382310fb5863f516dacdca9f44956 (patch) | |
| tree | 7babb3048d0eb20aa1e68e9b29c2acfa794ab96f /unipdf/extractor | |
| parent | 091963a50c3bb2926f559f01c49e8f5bd03d2bfd (diff) | |
fuck the unipdf licencing
Diffstat (limited to 'unipdf/extractor')
| -rw-r--r-- | unipdf/extractor/extractor.go | 1091 |
1 files changed, 1091 insertions, 0 deletions
diff --git a/unipdf/extractor/extractor.go b/unipdf/extractor/extractor.go new file mode 100644 index 0000000..d1ace52 --- /dev/null +++ b/unipdf/extractor/extractor.go @@ -0,0 +1,1091 @@ +// +// Copyright 2020 FoxyUtils ehf. All rights reserved. +// +// This is a commercial product and requires a license to operate. +// A trial license can be obtained at https://unidoc.io +// +// DO NOT EDIT: generated by unitwist Go source code obfuscator. +// +// Use of this source code is governed by the UniDoc End User License Agreement +// terms that can be accessed at https://unidoc.io/eula/ + +// Package extractor is used for quickly extracting PDF content through a simple interface. +// Currently offers functionality for extracting textual content. +package extractor ;import (_de "bytes";_dea "errors";_bc "fmt";_df "github.com/unidoc/unipdf/v4/common";_cb "github.com/unidoc/unipdf/v4/contentstream";_gc "github.com/unidoc/unipdf/v4/core";_bb "github.com/unidoc/unipdf/v4/internal/license";_cbf "github.com/unidoc/unipdf/v4/internal/textencoding"; +_aec "github.com/unidoc/unipdf/v4/internal/transform";_fd "github.com/unidoc/unipdf/v4/model";_fa "golang.org/x/image/draw";_ce "golang.org/x/text/unicode/norm";_ae "image";_ff "image/color";_a "io";_g "math";_d "reflect";_f "regexp";_bg "sort";_c "strings"; +_eb "unicode";_e "unicode/utf8";);func _ddag (_bbag *textWord ,_gfed float64 ,_gdbb ,_gfaf rulingList )*wordBag {_dfbb :=_aggfc (_bbag ._dcggb );_dggc :=[]*textWord {_bbag };_dega :=wordBag {_edaag :map[int ][]*textWord {_dfbb :_dggc },PdfRectangle :_bbag .PdfRectangle ,_dffab :_bbag ._gage ,_fba :_gfed ,_ecea :_gdbb ,_gbef :_gfaf }; +return &_dega ;};func (_bffd *textObject )getFillColor ()_ff .Color {return _gcegb (_bffd ._ceca .ColorspaceNonStroking ,_bffd ._ceca .ColorNonStroking );};func (_eaeg rulingList )findPrimSec (_fegd ,_acccf float64 )*ruling {for _ ,_eegb :=range _eaeg {if _cfdab (_eegb ._faad -_fegd )&&_eegb ._feae -_eedab <=_acccf &&_acccf <=_eegb ._feagb +_eedab {return _eegb ; +};};return nil ;};func (_cbfd rulingList )primaries ()[]float64 {_ddfb :=make (map[float64 ]struct{},len (_cbfd ));for _ ,_fegff :=range _cbfd {_ddfb [_fegff ._faad ]=struct{}{};};_afgb :=make ([]float64 ,len (_ddfb ));_dbac :=0;for _fcaaa :=range _ddfb {_afgb [_dbac ]=_fcaaa ; +_dbac ++;};_bg .Float64s (_afgb );return _afgb ;}; + +// TextMark represents extracted text on a page with information regarding both textual content, +// formatting (font and size) and positioning. +// It is the smallest unit of text on a PDF page, typically a single character. +// +// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text. +// The following code extracts the text on PDF page `page` into `text` then finds the bounding box +// `bbox` of substring `term` in `text`. +// +// ex, _ := New(page) +// // handle errors +// pageText, _, _, err := ex.ExtractPageText() +// // handle errors +// text := pageText.Text() +// textMarks := pageText.Marks() +// +// start := strings.Index(text, term) +// end := start + len(term) +// spanMarks, err := textMarks.RangeOffset(start, end) +// // handle errors +// bbox, ok := spanMarks.BBox() +// // handle errors +type TextMark struct{ + +// Text is the extracted text. +Text string ; + +// Original is the text in the PDF. It has not been decoded like `Text`. +Original string ; + +// BBox is the bounding box of the text. +BBox _fd .PdfRectangle ; + +// Font is the font the text was drawn with. +Font *_fd .PdfFont ; + +// FontSize is the font size the text was drawn with. +FontSize float64 ; + +// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this +// text, textMarks := pageText.Text(), pageText.Marks() +// marks := textMarks.Elements() +// then marks[i].Offset is the offset of marks[i].Text in text. +Offset int ; + +// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert +// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical) +// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews(). +Meta bool ; + +// FillColor is the fill color of the text. +// The color is nil for spaces and line breaks (i.e. the Meta field is true). +FillColor _ff .Color ; + +// StrokeColor is the stroke color of the text. +// The color is nil for spaces and line breaks (i.e. the Meta field is true). +StrokeColor _ff .Color ; + +// Orientation is the text orientation +Orientation int ; + +// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get +// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction. +DirectObject _gc .PdfObject ; + +// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except +// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case +// ObjString spans more than one character string that falls in different TextMark objects. +ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_eddg bool ;_ecfba *TextTable ;}; + +// Append appends `mark` to the mark array. +func (_cgbe *TextMarkArray )Append (mark TextMark ){_cgbe ._dbce =append (_cgbe ._dbce ,mark )}; + +// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause +// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three. +// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object +// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators"). +type RenderMode int ;func (_feff *textObject )getFontDirect (_bbc string )(*_fd .PdfFont ,error ){_bdfec ,_ccc :=_feff .getFontDict (_bbc );if _ccc !=nil {return nil ,_ccc ;};_edeb ,_ccc :=_fd .NewPdfFontFromPdfObject (_bdfec );if _ccc !=nil {_df .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbc ,_ccc ); +};return _edeb ,_ccc ;};var _ebbf string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029"; +func _babb (_bfge ,_ebg bounded )float64 {return _bfge .bbox ().Llx -_ebg .bbox ().Urx }; + +// WriteToFile writes the edited content to `outputPath`. +func (_cbb *Editor )WriteToFile (outputPath string )error {_gdgd ,_gfg :=_cbb ._dgb .ToWriter (nil );if _gfg !=nil {return _bc .Errorf ("\u0066\u0061\u0069\u006c\u0065\u0064\u0020\u0074\u006f\u0020c\u006f\u006e\u0076\u0065\u0072\u0074\u0020t\u006f\u0020\u0077\u0072\u0069\u0074\u0065\u0072\u0020\u0025\u0076",_gfg ); +};_gdgd .WriteToFile (outputPath );return nil ;};func (_ggc *imageExtractContext )extractFormImages (_gge *_gc .PdfObjectName ,_eccfb _cb .GraphicsState ,_fgb *_fd .PdfPageResources )error {_gaa ,_dgag :=_fgb .GetXObjectFormByName (*_gge );if _dgag !=nil {return _dgag ; +};if _gaa ==nil {return nil ;};_ecg ,_dgag :=_gaa .GetContentStream ();if _dgag !=nil {return _dgag ;};_aga :=_gaa .Resources ;if _aga ==nil {_aga =_fgb ;};_dgag =_ggc .extractContentStreamImages (string (_ecg ),_aga );if _dgag !=nil {return _dgag ;};_ggc ._eccf ++; +return nil ;};func _gcdf (_dgac _fd .PdfRectangle ,_fdfaa ,_bedec ,_fcfeg ,_gagc *ruling )gridTile {_fcda :=_dgac .Llx ;_afea :=_dgac .Urx ;_fgec :=_dgac .Lly ;_afgee :=_dgac .Ury ;return gridTile {PdfRectangle :_dgac ,_agbbg :_fdfaa !=nil &&_fdfaa .encloses (_fgec ,_afgee ),_cbag :_bedec !=nil &&_bedec .encloses (_fgec ,_afgee ),_eeeaf :_fcfeg !=nil &&_fcfeg .encloses (_fcda ,_afea ),_eecbb :_gagc !=nil &&_gagc .encloses (_fcda ,_afea )}; +};func (_effcg rulingList )removeDuplicates ()rulingList {if len (_effcg )==0{return nil ;};_effcg .sort ();_acea :=rulingList {_effcg [0]};for _ ,_eaed :=range _effcg [1:]{if _eaed .equals (_acea [len (_acea )-1]){continue ;};_acea =append (_acea ,_eaed ); +};return _acea ;};func (_ebgf *textTable )log (_gacea string ){if !_adfgd {return ;};_df .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_gacea ,_ebgf ._deafe ,_ebgf ._bfffcc ,_ebgf ._bbcb ,_ebgf .PdfRectangle ); +for _cefgf :=0;_cefgf < _ebgf ._bfffcc ;_cefgf ++{for _gdcc :=0;_gdcc < _ebgf ._deafe ;_gdcc ++{_gabc :=_ebgf .get (_gdcc ,_cefgf );if _gabc ==nil {continue ;};_bc .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_gdcc ,_cefgf ,_gabc .PdfRectangle ,_fdafd (_gabc .text (),50),_e .RuneCountInString (_gabc .text ())); +};};};func (_facde *wordBag )sort (){for _ ,_bbde :=range _facde ._edaag {_bg .Slice (_bbde ,func (_aabg ,_eefbd int )bool {return _dfcf (_bbde [_aabg ],_bbde [_eefbd ])< 0});};};func (_acfg *textPara )writeCellText (_dfdaa _a .Writer ){for _ecgcb ,_dbbf :=range _acfg ._bgdfb {_dbbe :=_dbbf .text (); +_cbgef :=_agfcb &&_dbbf .endsInHyphen ()&&_ecgcb !=len (_acfg ._bgdfb )-1;if _cbgef {_dbbe =_eddge (_dbbe );};_dfdaa .Write ([]byte (_dbbe ));if !(_cbgef ||_ecgcb ==len (_acfg ._bgdfb )-1){_dfdaa .Write ([]byte (_fggg (_dbbf ._eefeg ,_acfg ._bgdfb [_ecgcb +1]._eefeg ))); +};};};func (_caggd rulingList )isActualGrid ()(rulingList ,bool ){_fcdge ,_bcaa :=_caggd .augmentGrid ();if !(len (_fcdge )>=_dbfd +1&&len (_bcaa )>=_afecc +1){if _bfdd {_df .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_fcdge ),len (_bcaa ),_dbfd +1,_afecc +1); +};return nil ,false ;};if _bfdd {_df .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_caggd ,len (_fcdge )>=2,len (_bcaa )>=2,len (_fcdge )>=2&&len (_bcaa )>=2); +for _eedag ,_cdgf :=range _caggd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_eedag ,_cdgf );};};if _ebdbc {_edff ,_gaae :=_fcdge [0],_fcdge [len (_fcdge )-1];_ccfbd ,_ecbe :=_bcaa [0],_bcaa [len (_bcaa )-1];if !(_ecee (_edff ._faad -_ccfbd ._feae )&&_ecee (_gaae ._faad -_ccfbd ._feagb )&&_ecee (_ccfbd ._faad -_edff ._feagb )&&_ecee (_ecbe ._faad -_edff ._feae )){if _bfdd {_df .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_edff ,_gaae ,_ccfbd ,_ecbe ); +};return nil ,false ;};}else {if !_fcdge .aligned (){if _addf {_df .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_fcdge )); +};return nil ,false ;};if !_bcaa .aligned (){if _bfdd {_df .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_bcaa )); +};return nil ,false ;};};_dacd :=append (_fcdge ,_bcaa ...);return _dacd ,true ;}; + +// String returns a human readable description of `path`. +func (_ecca *subpath )String ()string {_aggcf :=_ecca ._gcac ;_cafg :=len (_aggcf );if _cafg <=5{return _bc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_cafg ,_aggcf );};return _bc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_cafg ,_aggcf [0],_aggcf [1],_aggcf [_cafg -1]); +};func (_baea *textObject )setTextRenderMode (_cgda int ){if _baea ==nil {return ;};_baea ._ddgb ._bag =RenderMode (_cgda );};func (_fabd *textObject )getCurrentFont ()*_fd .PdfFont {_efggc :=_fabd ._ddgb ._fedd ;if _efggc ==nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e"); +return _fd .DefaultFont ();};return _efggc ;};func _dafd (_bdf *TextMarkArray )[]*TextMarkArray {_bee :=_bdf .Elements ();_edae :=len (_bee );var _bdg _gc .PdfObject ;_bcfb :=[]*TextMarkArray {};_aed :=&TextMarkArray {};_gfcf :=-1;for _cad ,_fgcc :=range _bee {_dgec :=_fgcc .DirectObject ; +_gfcf =_fgcc .Index ;if _dgec ==nil {_bfde :=_dged (_bdf ,_cad );if _bdg !=nil {if _bfde ==-1||_bfde > _cad {_bcfb =append (_bcfb ,_aed );_aed =&TextMarkArray {};};};}else if _bdg ==nil {if _gfcf ==0&&_cad > 0{_bcfb =append (_bcfb ,_aed );_aed =&TextMarkArray {}; +};}else {if _dgec !=_bdg {_bcfb =append (_bcfb ,_aed );_aed =&TextMarkArray {};};};_bdg =_dgec ;_aed .Append (_fgcc );if _cad ==(_edae -1){_bcfb =append (_bcfb ,_aed );};};return _bcfb ;};func (_bcbg *subpath )isQuadrilateral ()bool {if len (_bcbg ._gcac )< 4||len (_bcbg ._gcac )> 5{return false ; +};if len (_bcbg ._gcac )==5{_befbf :=_bcbg ._gcac [0];_aefcf :=_bcbg ._gcac [4];if _befbf .X !=_aefcf .X ||_befbf .Y !=_aefcf .Y {return false ;};};return true ;};var _egbg =map[markKind ]string {_bcfag :"\u0073\u0074\u0072\u006f\u006b\u0065",_febcb :"\u0066\u0069\u006c\u006c",_degca :"\u0061u\u0067\u006d\u0065\u006e\u0074"}; +type lineRuling struct{_acbd rulingKind ;_aada markKind ;_ff .Color ;_cebge ,_cfcd _aec .Point ;};func (_agbc *wordBag )applyRemovals (_cdgd map[int ]map[*textWord ]struct{}){for _acgba ,_abcgd :=range _cdgd {if len (_abcgd )==0{continue ;};_cggbc :=_agbc ._edaag [_acgba ]; +_defc :=len (_cggbc )-len (_abcgd );if _defc ==0{delete (_agbc ._edaag ,_acgba );continue ;};_dfgb :=make ([]*textWord ,_defc );_cfgc :=0;for _ ,_dcbc :=range _cggbc {if _ ,_gad :=_abcgd [_dcbc ];!_gad {_dfgb [_cfgc ]=_dcbc ;_cfgc ++;};};_agbc ._edaag [_acgba ]=_dfgb ; +};};func _bfcbce (_bgcc _aec .Point )*subpath {return &subpath {_gcac :[]_aec .Point {_bgcc }}};func (_dbfef rulingList )blocks (_fdbbf ,_dcaeb *ruling )bool {if _fdbbf ._feae > _dcaeb ._feagb ||_dcaeb ._feae > _fdbbf ._feagb {return false ;};_gffg :=_g .Max (_fdbbf ._feae ,_dcaeb ._feae ); +_cfaeb :=_g .Min (_fdbbf ._feagb ,_dcaeb ._feagb );if _fdbbf ._faad > _dcaeb ._faad {_fdbbf ,_dcaeb =_dcaeb ,_fdbbf ;};for _ ,_abfca :=range _dbfef {if _fdbbf ._faad <=_abfca ._faad +_fbc &&_abfca ._faad <=_dcaeb ._faad +_fbc &&_abfca ._feae <=_cfaeb &&_gffg <=_abfca ._feagb {return true ; +};};return false ;};func (_efea *textTable )newTablePara ()*textPara {_acfd :=_efea .computeBbox ();_afgec :=&textPara {PdfRectangle :_acfd ,_ecedc :_acfd ,_bbdgd :_efea };if _adfgd {_df .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_afgec ); +};return _afgec ;}; + +// String returns a string describing `pt`. +func (_bfffc PageText )String ()string {_egea :=_bc .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_bfffc ._facbc ));_fagce :=[]string {"\u002d"+_egea };for _ ,_bfgd :=range _bfffc ._facbc {_fagce =append (_fagce ,_bfgd .String ()); +};_fagce =append (_fagce ,"\u002b"+_egea );return _c .Join (_fagce ,"\u000a");};type wordBag struct{_fd .PdfRectangle ;_dffab float64 ;_ecea ,_gbef rulingList ;_fba float64 ;_edaag map[int ][]*textWord ;};func (_eeecd lineRuling )yMean ()float64 {return 0.5*(_eeecd ._cebge .Y +_eeecd ._cfcd .Y )}; + + +// TableCell is a cell in a TextTable. +type TableCell struct{_fd .PdfRectangle ; + +// Text is the extracted text. +Text string ; + +// Marks returns the TextMarks corresponding to the text in Text. +Marks TextMarkArray ;};func _cdfc (_cfdf []byte ,_dgeb *_fd .PdfFont )string {_bdb :=_dgeb .BytesToCharcodes (_cfdf );_ggee ,_bfce ,_dcaa :=_dgeb .CharcodesToStrings (_bdb ,"");if _dcaa > 0{_df .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_bfce ,_dcaa ); +};_eab :=_c .Join (_ggee ,"");return _eab ;};func _efba (_cccf []*textWord ,_cdgg float64 ,_accc ,_gebb rulingList )*wordBag {_badfb :=_ddag (_cccf [0],_cdgg ,_accc ,_gebb );for _ ,_aecg :=range _cccf [1:]{_abdb :=_aggfc (_aecg ._dcggb );_badfb ._edaag [_abdb ]=append (_badfb ._edaag [_abdb ],_aecg ); +_badfb .PdfRectangle =_gcacd (_badfb .PdfRectangle ,_aecg .PdfRectangle );};_badfb .sort ();return _badfb ;};type bounded interface{bbox ()_fd .PdfRectangle };func _fadg (_facaf *_fd .StructTreeRoot ,_aefbc map[int ][]*textLine ,_gabe _gc .PdfObject )[]*list {if _facaf ==nil {_df .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c"); +return nil ;};_fggcgb :=[]*_fd .KValue {};var _cfcc func (_fdaaa *_fd .KValue );_cfcc =func (_acdbd *_fd .KValue ){if _egbd :=_acdbd .GetKDict ();_egbd !=nil {if _egbd .S .String ()=="\u004c"{_fggcgb =append (_fggcgb ,_acdbd );}else {for _ ,_egdb :=range _egbd .GetChildren (){_cfcc (_egdb ); +};};};};for _ ,_cfca :=range _facaf .K {_befe :=_fd .NewKValue ();_befe .SetKDict (_cfca );_cfcc (_befe );};_gcffg :=_dcead (_fggcgb ,_aefbc ,_gabe );var _feaa []*list ;for _ ,_fbdcc :=range _gcffg {_dfeaa :=_cgfc (_fbdcc );_feaa =append (_feaa ,_dfeaa ...); +};return _feaa ;};func (_gbfca *textLine )bbox ()_fd .PdfRectangle {return _gbfca .PdfRectangle }; + +// Marks returns the TextMark collection for a page. It represents all the text on the page. +func (_efac PageText )Marks ()*TextMarkArray {return &TextMarkArray {_dbce :_efac ._cgf }};type fontEntry struct{_feea *_fd .PdfFont ;_cbda int64 ;};func (_gfcg *PageText )getParagraphs ()paraList {var _bdbf rulingList ;if _fcce {_ceaab :=_bgfe (_gfcg ._bcgae ); +_bdbf =append (_bdbf ,_ceaab ...);};if _bbgb {_acb :=_adbcg (_gfcg ._ffad );_bdbf =append (_bdbf ,_acb ...);};_bdbf ,_ccgg :=_bdbf .toTilings ();var _degfg paraList ;_egd :=len (_gfcg ._facbc );for _edca :=0;_edca < 360&&_egd > 0;_edca +=90{_fbf :=make ([]*textMark ,0,len (_gfcg ._facbc )-_egd ); +for _ ,_aecc :=range _gfcg ._facbc {if _aecc ._gdcb ==_edca {_fbf =append (_fbf ,_aecc );};};if len (_fbf )> 0{_fgd :=_gcbf (_fbf ,_gfcg ._fdcd ,_bdbf ,_ccgg ,_gfcg ._fcaa ._egfd ==ExtractionModeLayoutNoBreaks );_degfg =append (_degfg ,_fgd ...);_egd -=len (_fbf ); +};};return _degfg ;};func _cgfc (_fgfb *list )[]*list {var _dfga []*list ;for _ ,_abdbe :=range _fgfb ._eegf {switch _abdbe ._ecbgd {case "\u004c\u0049":_gddgg :=_gebbe (_abdbe );_gdgdd :=_cgfc (_abdbe );_efab :=_dead (_gddgg ,"\u0062\u0075\u006c\u006c\u0065\u0074",_gdgdd ); +_fbde :=_gbcd (_gddgg ,"");_efab ._eeca =_fbde ;_dfga =append (_dfga ,_efab );case "\u004c\u0042\u006fd\u0079":return _cgfc (_abdbe );case "\u004c":_gfgd :=_cgfc (_abdbe );_dfga =append (_dfga ,_gfgd ...);return _dfga ;};};return _dfga ;};func (_bfbb *stateStack )pop ()*textState {if _bfbb .empty (){return nil ; +};_eba :=*(*_bfbb )[len (*_bfbb )-1];*_bfbb =(*_bfbb )[:len (*_bfbb )-1];return &_eba ;};type subpath struct{_gcac []_aec .Point ;_gdgde bool ;};func _aeae (_ggfac string ,_afg ,_cgca int ,_gbd string )string {if _cgca > len (_ggfac )-1{return _ggfac [:_afg ]+_gbd ; +};return _ggfac [:_afg ]+_gbd +_ggfac [_cgca :];};func (_bfdb *shapesState )devicePoint (_cecfc ,_aafd float64 )_aec .Point {_adcb :=_bfdb ._fagg .Mult (_bfdb ._cdg );_cecfc ,_aafd =_adcb .Transform (_cecfc ,_aafd );return _aec .NewPoint (_cecfc ,_aafd ); +};func _gggbe (_efbae ,_geda *textPara )bool {if _efbae ._gfadg ||_geda ._gfadg {return true ;};return _cfdab (_efbae .depth ()-_geda .depth ());};func (_eagg *textLine )markWordBoundaries (){_geaa :=_decb *_eagg ._acaea ;for _gddd ,_adcg :=range _eagg ._eecg [1:]{if _babb (_adcg ,_eagg ._eecg [_gddd ])>=_geaa {_adcg ._debgd =true ; +};};}; + +// String returns a human readable description of `s`. +func (_agadbc intSet )String ()string {var _eaecd []int ;for _dbdf :=range _agadbc {if _agadbc .has (_dbdf ){_eaecd =append (_eaecd ,_dbdf );};};_bg .Ints (_eaecd );return _bc .Sprintf ("\u0025\u002b\u0076",_eaecd );};func (_ebda *textObject )renderText (_ffba _gc .PdfObject ,_bce []byte ,_gde int ,_gecd string )error {if _ebda ._gbga {_df .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e"); +return nil ;};_dcdc :=_ebda .getCurrentFont ();_dfag :=_dcdc .BytesToCharcodes (_bce );var (_fcge []string ;_fcab int ;_cefd int ;);if _gecd !=""{_fcge =[]string {_gc .MakeString (_gecd ).Decoded ()};_fcab =1;}else {_fcge ,_fcab ,_cefd =_dcdc .CharcodesToStrings (_dfag ,""); +if _cefd > 0{_df .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_fcab ,_cefd );};};_ebda ._ddgb ._acdb +=_fcab ;_ebda ._ddgb ._dcea +=_cefd ; +_adfd :=_ebda ._ddgb ;_ecbg :=_adfd ._ddeg ;_gceed :=_adfd ._fgcd /100.0;_gcdbg :=_dgecc ;if _dcdc .Subtype ()=="\u0054\u0079\u0070e\u0033"{_gcdbg =1;};_aefa ,_ddb :=_dcdc .GetRuneMetrics (' ');if !_ddb {_aefa ,_ddb =_dcdc .GetCharMetrics (32);};if !_ddb {_aefa ,_ =_fd .DefaultFont ().GetRuneMetrics (' '); +};_bacd :=_aefa .Wx *_gcdbg ;_df .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_bacd ,_fcge ,_dcdc ,_ecbg ); +_cfaf :=_aec .NewMatrix (_ecbg *_gceed ,0,0,_ecbg ,0,_adfd ._abbcb );if _gebg {_df .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_dfag ),_dfag ,_fcge ); +};_df .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_dfag ),_dfag ,len (_fcge ));_ccdf :=_ebda .getFillColor (); +_aaaf :=_ebda .getStrokeColor ();for _bccg ,_dgbb :=range _fcge {_dgeg :=[]rune (_dgbb );if len (_dgeg )==1&&_dgeg [0]=='\x00'{continue ;};_bdcf :=_dfag [_bccg ];_cegb :=_ebda ._ceca .CTM .Mult (_ebda ._def ).Mult (_cfaf );_cecf :=0.0;if len (_dgeg )==1&&_dgeg [0]==32{_cecf =_adfd ._cdeb ; +};_acae ,_aggba :=_dcdc .GetCharMetrics (_bdcf );if !_aggba {_df .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_bdcf ,_dgeg ,_dgeg ,_dcdc ); +return _bc .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_dcdc .String (),_bdcf );};_ebdb :=_aec .Point {X :_acae .Wx *_gcdbg ,Y :_acae .Wy *_gcdbg }; +_efeb :=_aec .Point {X :(_ebdb .X *_ecbg +_cecf )*_gceed };_bfbg :=_aec .Point {X :(_ebdb .X *_ecbg +_adfd ._bdab +_cecf )*_gceed };if _gebg {_df .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ecbg ,_adfd ._bdab ,_adfd ._cdeb ,_gceed ); +_df .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_ebdb ,_efeb ,_bfbg );};_fbgg :=_facc (_efeb );_cbgb :=_facc (_bfbg );_feba :=_ebda ._ceca .CTM .Mult (_ebda ._def ).Mult (_fbgg ); +if _ffdd {_df .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_ebda ._ceca .CTM ,_ebda ._def ,_cbgb ,_ecgaa (_ebda ._ceca .CTM .Mult (_ebda ._def ).Mult (_cbgb )),_fbgg ,_feba ,_ecgaa (_feba )); +};_badf ,_ffdf :=_ebda .newTextMark (_cbf .ExpandLigatures (_dgeg ),_cegb ,_ecgaa (_feba ),_g .Abs (_bacd *_cegb .ScalingFactorX ()),_dcdc ,_ebda ._ddgb ._bdab ,_ccdf ,_aaaf ,_ffba ,_fcge ,_bccg ,_gde );if !_ffdf {_df .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067"); +continue ;};if _dcdc ==nil {_df .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _dcdc .Encoder ()==nil {_df .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_dcdc ); +}else {if _dffa ,_cdad :=_dcdc .Encoder ().CharcodeToRune (_bdcf );_cdad {_badf ._aeeb =string (_dffa );};};_df .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_bccg ,_bdcf ,_badf ,_cegb ); +_ebda ._agfc =append (_ebda ._agfc ,&_badf );_ebda ._def .Concat (_cbgb );};return nil ;}; + +// String returns a description of `t`. +func (_dggef *textTable )String ()string {return _bc .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_dggef ._deafe ,_dggef ._bfffcc ,_dggef ._bbcb );};func (_gfgca *textTable )getComposite (_agccf ,_caeg int )(paraList ,_fd .PdfRectangle ){_gaff ,_bead :=_gfgca ._bbgcf [_fbacd (_agccf ,_caeg )]; +if _adfgd {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_agccf ,_caeg ,_gaff .String ());};if !_bead {return nil ,_fd .PdfRectangle {}; +};return _gaff .parasBBox ();};func _dccbd (_fabc func (*wordBag ,*textWord ,float64 )bool ,_ceea float64 )func (*wordBag ,*textWord )bool {return func (_befd *wordBag ,_bbgd *textWord )bool {return _fabc (_befd ,_bbgd ,_ceea )};};const (_acde =1.0e-6; +_egaff =1.0e-4;_bfcda =10;_gcfb =6;_eabab =0.5;_aeba =0.12;_aadb =0.19;_ebgd =0.04;_acff =1.0;_bbaa =0.04;_dbbg =12;_eade =0.4;_aaea =0.7;_cbge =1.0;_bage =0.1;_edga =1.4;_ccbf =0.46;_decb =0.02;_eddb =0.2;_bbdc =0.5;_caefg =4;_febf =4.0;_bacc =6;_fgae =0.3; +_ecbb =0.01;_fedb =0.02;_dbfd =2;_afecc =2;_bcfe =500;_bccgd =4.0;_gffbb =0.05;_babd =0.1;_eedab =2.0;_fbc =2.0;_fdfb =1.5;_fbdc =3.0;_bcd =0.25;);func (_ffeg *subpath )removeDuplicates (){if len (_ffeg ._gcac )==0{return ;};_fdaf :=[]_aec .Point {_ffeg ._gcac [0]}; +for _ ,_dbfe :=range _ffeg ._gcac [1:]{if !_fcfde (_dbfe ,_fdaf [len (_fdaf )-1]){_fdaf =append (_fdaf ,_dbfe );};};_ffeg ._gcac =_fdaf ;};func (_dced *TextMarkArray )getTextMarkAtOffset (_gfcgf int )*TextMark {for _ ,_bcbae :=range _dced ._dbce {if _bcbae .Offset ==_gfcgf {return &_bcbae ; +};};return nil ;};func _bfcdb (_baeeg map[int ]intSet )[]int {_bdda :=make ([]int ,0,len (_baeeg ));for _gdegf :=range _baeeg {_bdda =append (_bdda ,_gdegf );};_bg .Ints (_bdda );return _bdda ;};var _ecebe =_f .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024"); +func (_geeeg *shapesState )moveTo (_bffb ,_cegf float64 ){_geeeg ._baffg =true ;_geeeg ._fabe =_geeeg .devicePoint (_bffb ,_cegf );if _face {_df .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_bffb ,_cegf ,_geeeg ._fabe ); +};};func (_aedfa *textTable )reduceTiling (_afead gridTiling ,_geedf float64 )*textTable {_egeec :=make ([]int ,0,_aedfa ._bfffcc );_baefe :=make ([]int ,0,_aedfa ._deafe );_daaba :=_afead ._afdaa ;_cafc :=_afead ._cecbb ;for _gccdf :=0;_gccdf < _aedfa ._bfffcc ; +_gccdf ++{_daaf :=_gccdf > 0&&_g .Abs (_cafc [_gccdf -1]-_cafc [_gccdf ])< _geedf &&_aedfa .emptyCompositeRow (_gccdf );if !_daaf {_egeec =append (_egeec ,_gccdf );};};for _gdcbc :=0;_gdcbc < _aedfa ._deafe ;_gdcbc ++{_gbacb :=_gdcbc < _aedfa ._deafe -1&&_g .Abs (_daaba [_gdcbc +1]-_daaba [_gdcbc ])< _geedf &&_aedfa .emptyCompositeColumn (_gdcbc ); +if !_gbacb {_baefe =append (_baefe ,_gdcbc );};};if len (_egeec )==_aedfa ._bfffcc &&len (_baefe )==_aedfa ._deafe {return _aedfa ;};_ebddb :=textTable {_bbcb :_aedfa ._bbcb ,_deafe :len (_baefe ),_bfffcc :len (_egeec ),_bbgcf :make (map[uint64 ]compositeCell ,len (_baefe )*len (_egeec ))}; +if _adfgd {_df .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_aedfa ._deafe ,_aedfa ._bfffcc ,len (_baefe ),len (_egeec ));_df .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_baefe ); +_df .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_egeec );};for _dbgb ,_gacb :=range _egeec {for _fcdab ,_ffbfb :=range _baefe {_aafeg ,_gafbe :=_aedfa .getComposite (_ffbfb ,_gacb );if len (_aafeg )==0{continue ; +};if _adfgd {_bc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fcdab ,_dbgb ,_ffbfb ,_gacb ,_fdafd (_aafeg .merge ().text (),50));};_ebddb .putComposite (_fcdab ,_dbgb ,_aafeg ,_gafbe ); +};};return &_ebddb ;};func (_fdca *textPara )text ()string {_fbad :=new (_de .Buffer );_fdca .writeText (_fbad );return _fbad .String ();}; + +// String returns a human readable description of `ss`. +func (_cffef *shapesState )String ()string {return _bc .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_cffef ._bgf ),_cffef ._baffg );};func _ecebf (_fdea ,_dfbe ,_eabde float64 )rulingKind {if _fdea >=_eabde &&_cecff (_dfbe ,_fdea ){return _caec ; +};if _dfbe >=_eabde &&_cecff (_fdea ,_dfbe ){return _bfdef ;};return _bfba ;};func _fbac (_fbbcb _fd .PdfRectangle )*ruling {return &ruling {_fbgb :_caec ,_faad :_fbbcb .Ury ,_feae :_fbbcb .Llx ,_feagb :_fbbcb .Urx };};type event struct{_aeef float64 ; +_ceedf bool ;_cfgde int ;};func (_ddgag *textPara )writeText (_bffbg _a .Writer ){if _ddgag ._bbdgd ==nil {_ddgag .writeCellText (_bffbg );return ;};for _bgdfbe :=0;_bgdfbe < _ddgag ._bbdgd ._bfffcc ;_bgdfbe ++{for _aggdc :=0;_aggdc < _ddgag ._bbdgd ._deafe ; +_aggdc ++{_dcaaa :=_ddgag ._bbdgd .get (_aggdc ,_bgdfbe );if _dcaaa ==nil {_bffbg .Write ([]byte ("\u0009"));}else {_eead (_dcaaa ._bgdfb );_dcaaa .writeCellText (_bffbg );};_bffbg .Write ([]byte ("\u0020"));};if _bgdfbe < _ddgag ._bbdgd ._bfffcc -1{_bffbg .Write ([]byte ("\u000a")); +};};}; + +// Text gets the extracted text contained in `l`. +func (_cfaec *list )Text ()string {_ebcg :=&_c .Builder {};_bece :="";_fdebf (_cfaec ,_ebcg ,&_bece );return _ebcg .String ();};func (_ccdfg intSet )has (_fcegc int )bool {_ ,_cfea :=_ccdfg [_fcegc ];return _cfea };func (_gggf *textWord )bbox ()_fd .PdfRectangle {return _gggf .PdfRectangle }; +func _cgcg (_bad *TextMarkArray )string {_egab :="";for _ ,_cda :=range _bad .Elements (){_egab +=_cda .Text ;};return _egab ;};func (_dgga *wordBag )removeDuplicates (){if _bgede {_df .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_dgga .text ()); +};for _ ,_cdggc :=range _dgga .depthIndexes (){if len (_dgga ._edaag [_cdggc ])==0{continue ;};_cagef :=_dgga ._edaag [_cdggc ][0];_agcd :=_eddb *_cagef ._gage ;_gfebb :=_cagef ._dcggb ;for _ ,_babe :=range _dgga .depthBand (_gfebb ,_gfebb +_agcd ){_bgdbg :=map[*textWord ]struct{}{}; +_gbabg :=_dgga ._edaag [_babe ];for _ ,_aafa :=range _gbabg {if _ ,_eccb :=_bgdbg [_aafa ];_eccb {continue ;};for _ ,_acfcg :=range _gbabg {if _ ,_efdd :=_bgdbg [_acfcg ];_efdd {continue ;};if _acfcg !=_aafa &&_acfcg ._gabac ==_aafa ._gabac &&_g .Abs (_acfcg .Llx -_aafa .Llx )< _agcd &&_g .Abs (_acfcg .Urx -_aafa .Urx )< _agcd &&_g .Abs (_acfcg .Lly -_aafa .Lly )< _agcd &&_g .Abs (_acfcg .Ury -_aafa .Ury )< _agcd {_bgdbg [_acfcg ]=struct{}{}; +};};};if len (_bgdbg )> 0{_fgccce :=0;for _ ,_fdcb :=range _gbabg {if _ ,_cbddb :=_bgdbg [_fdcb ];!_cbddb {_gbabg [_fgccce ]=_fdcb ;_fgccce ++;};};_dgga ._edaag [_babe ]=_gbabg [:len (_gbabg )-len (_bgdbg )];if len (_dgga ._edaag [_babe ])==0{delete (_dgga ._edaag ,_babe ); +};};};};};func (_ccbgc rulingList )primMinMax ()(float64 ,float64 ){_bgca ,_ebfd :=_ccbgc [0]._faad ,_ccbgc [0]._faad ;for _ ,_ffca :=range _ccbgc [1:]{if _ffca ._faad < _bgca {_bgca =_ffca ._faad ;}else if _ffca ._faad > _ebfd {_ebfd =_ffca ._faad ;}; +};return _bgca ,_ebfd ;};func _cfefc (_cedb *textLine )bool {_ccfbc :=true ;_dffef :=-1;for _ ,_fdaa :=range _cedb ._eecg {for _ ,_dagg :=range _fdaa ._fcdae {_ebgc :=_dagg ._cgaa ;if _dffef ==-1{_dffef =_ebgc ;}else {if _dffef !=_ebgc {_ccfbc =false ; +break ;};};};};return _ccfbc ;};func (_cgbb *textPara )taken ()bool {return _cgbb ==nil ||_cgbb ._ddega };func (_cacf *shapesState )addPoint (_ffeb ,_daeb float64 ){_ceff :=_cacf .establishSubpath ();_dagfb :=_cacf .devicePoint (_ffeb ,_daeb );if _ceff ==nil {_cacf ._baffg =true ; +_cacf ._fabe =_dagfb ;}else {_ceff .add (_dagfb );};};func (_cegd *wordBag )depthBand (_daebe ,_gdge float64 )[]int {if len (_cegd ._edaag )==0{return nil ;};return _cegd .depthRange (_cegd .getDepthIdx (_daebe ),_cegd .getDepthIdx (_gdge ));}; + +// String returns a description of `l`. +func (_bcge *textLine )String ()string {return _bc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_bcge ._eefeg ,_bcge .PdfRectangle ,_bcge ._acaea ,_bcge .text ()); +};func (_fafab paraList )reorder (_fgag []int ){_cffbf :=make (paraList ,len (_fafab ));for _dafc ,_gae :=range _fgag {_cffbf [_dafc ]=_fafab [_gae ];};copy (_fafab ,_cffbf );};func _dged (_ead *TextMarkArray ,_deg int )int {_ace :=_ead .Elements ();_ecga :=_deg -1; +_agf :=_deg +1;_fge :=-1;if _ecga >=0{_gcedf :=_ace [_ecga ];_cfaa :=_gcedf .ObjString ;_fcae :=len (_cfaa );_aaf :=_gcedf .Index ;if _aaf +1< _fcae {return _ecga ;};};if _agf < len (_ace ){_gbf :=_ace [_agf ];_fac :=_gbf .ObjString ;if _fac [0]!=_gbf .Text {return _agf ; +};};if _fge ==-1&&_ace [_deg ].Text =="\u0020"{return _ecga ;};return _fge ;};func (_agfae *textTable )growTable (){_cbgeb :=func (_ageb paraList ){_agfae ._bfffcc ++;for _fbcb :=0;_fbcb < _agfae ._deafe ;_fbcb ++{_cgfb :=_ageb [_fbcb ];_agfae .put (_fbcb ,_agfae ._bfffcc -1,_cgfb ); +};};_aeac :=func (_aagc paraList ){_agfae ._deafe ++;for _bdbd :=0;_bdbd < _agfae ._bfffcc ;_bdbd ++{_bcef :=_aagc [_bdbd ];_agfae .put (_agfae ._deafe -1,_bdbd ,_bcef );};};if _dabe {_agfae .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _dcdbf :=0; +;_dcdbf ++{_cgaab :=false ;_cdfg :=_agfae .getDown ();_aebd :=_agfae .getRight ();if _dabe {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dcdbf ,_agfae );_bc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_cdfg ); +_bc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_aebd );};if _cdfg !=nil &&_aebd !=nil {_afdf :=_cdfg [len (_cdfg )-1];if !_afdf .taken ()&&_afdf ==_aebd [len (_aebd )-1]{_cbgeb (_cdfg );if _aebd =_agfae .getRight (); +_aebd !=nil {_aeac (_aebd );_agfae .put (_agfae ._deafe -1,_agfae ._bfffcc -1,_afdf );};_cgaab =true ;};};if !_cgaab &&_cdfg !=nil {_cbgeb (_cdfg );_cgaab =true ;};if !_cgaab &&_aebd !=nil {_aeac (_aebd );_cgaab =true ;};if !_cgaab {break ;};};};func _facc (_gdf _aec .Point )_aec .Matrix {return _aec .TranslationMatrix (_gdf .X ,_gdf .Y )}; +func (_dcbcb gridTiling )complete ()bool {for _ ,_gbgae :=range _dcbcb ._cdeeb {for _ ,_fefgc :=range _gbgae {if !_fefgc .complete (){return false ;};};};return true ;};type textLine struct{_fd .PdfRectangle ;_eefeg float64 ;_eecg []*textWord ;_acaea float64 ; +};func _gdag (_edfe []TextMark ,_gggb *int ,_eeba TextMark )[]TextMark {_eeba .Offset =*_gggb ;_edfe =append (_edfe ,_eeba );*_gggb +=len (_eeba .Text );return _edfe ;};func _ggbf (_fbaa []*textWord ,_cgefe *textWord )[]*textWord {for _ebbec ,_egcdf :=range _fbaa {if _egcdf ==_cgefe {return _aedb (_fbaa ,_ebbec ); +};};_df .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_cgefe ); +return nil ;};func (_fcfc *shapesState )clearPath (){_fcfc ._bgf =nil ;_fcfc ._baffg =false ;if _face {_df .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_fcfc );};};func _fbacd (_adeeg ,_bcfbc int )uint64 {return uint64 (_adeeg )*0x1000000+uint64 (_bcfbc )}; +func (_ecfab *textPara )toCellTextMarks (_badfg *int )[]TextMark {var _bedg []TextMark ;for _ggcb ,_cdec :=range _ecfab ._bgdfb {_fgbbb :=_cdec .toTextMarks (_badfg );_abe :=_agfcb &&_cdec .endsInHyphen ()&&_ggcb !=len (_ecfab ._bgdfb )-1;if _abe {_fgbbb =_fcefc (_fgbbb ,_badfg ); +};_bedg =append (_bedg ,_fgbbb ...);if !(_abe ||_ggcb ==len (_ecfab ._bgdfb )-1){_bedg =_fafacf (_bedg ,_badfg ,_fggg (_cdec ._eefeg ,_ecfab ._bgdfb [_ggcb +1]._eefeg ));};};return _bedg ;};func (_bgaea *textTable )emptyCompositeColumn (_ebaa int )bool {for _gdcee :=0; +_gdcee < _bgaea ._bfffcc ;_gdcee ++{if _fgcg ,_febfb :=_bgaea ._bbgcf [_fbacd (_ebaa ,_gdcee )];_febfb {if len (_fgcg .paraList )> 0{return false ;};};};return true ;};func (_cgab *shapesState )newSubPath (){_cgab .clearPath ();if _face {_df .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_cgab ); +};};func (_ccdfa rectRuling )checkWidth (_acgee ,_bfccb float64 )(float64 ,bool ){_fdff :=_bfccb -_acgee ;_dbffd :=_fdff <=_fbc ;return _fdff ,_dbffd ;};func (_gbbdd *textPara )getListLines ()[]*textLine {var _cfega []*textLine ;_ggfe :=_aabd (_gbbdd ._bgdfb ); +for _ ,_eecc :=range _gbbdd ._bgdfb {_cffb :=_eecc ._eecg [0]._gabac [0];if _abce (_cffb ){_cfega =append (_cfega ,_eecc );};};_cfega =append (_cfega ,_ggfe ...);return _cfega ;};func (_dfgdb paraList )findGridTables (_aabef []gridTiling )[]*textTable {if _adfgd {_df .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_dfgdb )); +for _fggf ,_egdg :=range _dfgdb {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fggf ,_egdg );};};var _cfggd []*textTable ;for _ddeda ,_dggf :=range _aabef {_dbcb ,_edgfc :=_dfgdb .findTableGrid (_dggf );if _dbcb !=nil {_dbcb .log (_bc .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_ddeda )); +_cfggd =append (_cfggd ,_dbcb );_dbcb .markCells ();};for _gaacb :=range _edgfc {_gaacb ._ddega =true ;};};if _adfgd {_df .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_cfggd )); +};return _cfggd ;};func (_ebdbb paraList )lines ()[]*textLine {var _dffag []*textLine ;for _ ,_fefb :=range _ebdbb {_dffag =append (_dffag ,_fefb ._bgdfb ...);};return _dffag ;};func _aggfc (_eegg float64 )int {var _cbcg int ;if _eegg >=0{_cbcg =int (_eegg /_gcfb ); +}else {_cbcg =int (_eegg /_gcfb )-1;};return _cbcg ;}; + +// String returns a description of `p`. +func (_edcd *textPara )String ()string {if _edcd ._gfadg {return _bc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_edcd .PdfRectangle );};_effd :="";if _edcd ._bbdgd !=nil {_effd =_bc .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_edcd ._bbdgd ._deafe ,_edcd ._bbdgd ._bfffcc ); +};return _bc .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_edcd .PdfRectangle ,_effd ,len (_edcd ._bgdfb ),_fdafd (_edcd .text (),50));};type rectRuling struct{_gadd rulingKind ;_adbbd markKind ; +_ff .Color ;_fd .PdfRectangle ;}; + +// List returns all the list objects detected on the page. +// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects. +// A given bullet list object has a tree structure. +// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree. +// The rest content of the pdf is ignored and only text in the bullet point lists are extracted. +// The list extraction is done in two ways. +// 1. If the document is tagged then the lists are extracted using the tags provided in the document. +// 2. Otherwise the bullet lists are extracted from the raw text using regex matching. +// By default the document tag is used if available. +// However this can be disabled using `DisableDocumentTags` in the `Options` object. +// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly. +// +// options := &Options{ +// DisableDocumentTags: false, // this means use document tag if available +// } +// ex, err := NewWithOptions(page, options) +// // handle error +// pageText, _, _, err := ex.ExtractPageText() +// // handle error +// lists := pageText.List() +// txt := lists.Text() +func (_agbe PageText )List ()lists {_dgab :=!_agbe ._fcaa ._affg ;_cafec :=_agbe .getParagraphs ();_aaecb :=_cafec .list ();if _agbe ._eebg !=nil &&_dgab {_dfeb :=_adcbc (&_cafec );if len (_agbe ._eebg .K )==0{_df .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e"); +return _aaecb ;};_aaecb =_fadg (_agbe ._eebg ,_dfeb ,_agbe ._ddegf );};return _aaecb ;};func (_adfg *textObject )setTextRise (_edcg float64 ){if _adfg ==nil {return ;};_adfg ._ddgb ._abbcb =_edcg ;}; + +// ExtractPageImages returns the image contents of the page extractor, including data +// and position, size information for each image. +// A set of options to control page image extraction can be passed in. The options +// parameter can be nil for the default options. By default, inline stencil masks +// are not extracted. +func (_eaa *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_fbd :=&imageExtractContext {_fea :options };_edc :=_fbd .extractContentStreamImages (_eaa ._fed ,_eaa ._dd );if _edc !=nil {return nil ,_edc ;};return &PageImages {Images :_fbd ._dcc },nil ; +}; + +// String returns a description of `v`. +func (_bfaea *ruling )String ()string {if _bfaea ._fbgb ==_bfba {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_adcf ,_gafd :="\u0078","\u0079";if _bfaea ._fbgb ==_caec {_adcf ,_gafd ="\u0079","\u0078";};_efcbb :="";if _bfaea ._bccfg !=0.0{_efcbb =_bc .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_bfaea ._bccfg ); +};return _bc .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_bfaea ._fbgb ,_adcf ,_bfaea ._faad ,_gafd ,_bfaea ._feae ,_bfaea ._feagb ,_bfaea ._feagb -_bfaea ._feae ,_bfaea ._bcab ,_bfaea .Color ,_efcbb ); +};func _fggc (_eac *_cb .ContentStreamOperation )(float64 ,error ){if len (_eac .Params )!=1{_ede :=_dea .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_eac .Operand ,1,len (_eac .Params ),_eac .Params ); +return 0.0,_ede ;};return _gc .GetNumberAsFloat (_eac .Params [0]);};func _gfc (_fab []string ,_fc int ,_bd string )int {_bcc :=_fc ;for ;_bcc < len (_fab );_bcc ++{if _fab [_bcc ]!=_bd {return _bcc ;};};return _bcc ;};func (_edgadc rulingList )comp (_geab ,_gagfa int )bool {_bedc ,_degcg :=_edgadc [_geab ],_edgadc [_gagfa ]; +_defe ,_ggebb :=_bedc ._fbgb ,_degcg ._fbgb ;if _defe !=_ggebb {return _defe > _ggebb ;};if _defe ==_bfba {return false ;};_ccba :=func (_bafg bool )bool {if _defe ==_caec {return _bafg ;};return !_bafg ;};_ggbe ,_dcdeb :=_bedc ._faad ,_degcg ._faad ;if _ggbe !=_dcdeb {return _ccba (_ggbe > _dcdeb ); +};_ggbe ,_dcdeb =_bedc ._feae ,_degcg ._feae ;if _ggbe !=_dcdeb {return _ccba (_ggbe < _dcdeb );};return _ccba (_bedc ._feagb < _degcg ._feagb );}; + +// TableInfo gets table information of the textmark `tm`. +func (_cabg *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_cabg ._eddg {return nil ,nil ;};_fbfe :=_cabg ._ecfba ;_ecfa :=_fbfe .getCellInfo (*_cabg );return _fbfe ,_ecfa ;};const (_afae =false ;_egeg =false ;_adab =false ;_ffdd =false ;_face =false ; +_gebg =false ;_efcea =false ;_acab =false ;_aaac =false ;_baac =_aaac &&true ;_eefe =_baac &&false ;_bgede =_aaac &&true ;_adfgd =false ;_dabe =_adfgd &&false ;_eggd =_adfgd &&true ;_bfdd =false ;_cgde =_bfdd &&false ;_addf =_bfdd &&false ;_eagd =_bfdd &&true ; +_gdae =_bfdd &&false ;_ebeef =_bfdd &&false ;);func (_acef paraList )toTextMarks ()[]TextMark {_bbab :=0;var _fccd []TextMark ;for _cage ,_gabf :=range _acef {if _gabf ._gfadg {continue ;};_fcdd :=_gabf .toTextMarks (&_bbab );_fccd =append (_fccd ,_fcdd ...); +if _cage !=len (_acef )-1{if _gggbe (_gabf ,_acef [_cage +1]){_fccd =_fafacf (_fccd ,&_bbab ,"\u0020");}else {_fccd =_fafacf (_fccd ,&_bbab ,"\u000a");_fccd =_fafacf (_fccd ,&_bbab ,"\u000a");};};};_fccd =_fafacf (_fccd ,&_bbab ,"\u000a");_fccd =_fafacf (_fccd ,&_bbab ,"\u000a"); +return _fccd ;};func (_gafb *wordBag )absorb (_ggeb *wordBag ){_gfee :=_ggeb .makeRemovals ();for _acdd ,_baed :=range _ggeb ._edaag {for _ ,_aeddf :=range _baed {_gafb .pullWord (_aeddf ,_acdd ,_gfee );};};_ggeb .applyRemovals (_gfee );};type markKind int ; + + +// Text returns the extracted page text. +func (_bfag PageText )Text ()string {return _bfag ._dcag }; + +// Extractor stores and offers functionality for extracting content from PDF pages. +type Extractor struct{_fed string ;_dd *_fd .PdfPageResources ;_aea _fd .PdfRectangle ;_bdc *_fd .PdfRectangle ;_cc int ;_afd map[string ]fontEntry ;_ga map[string ]textResult ;_bff map[string ]textResult ;_ddg int64 ;_cab *Options ;_gaf *_fd .StructTreeRoot ; +_ea _gc .PdfObject ;_fdg []*_fd .PdfAnnotation ;};type shapesState struct{_cdg _aec .Matrix ;_fagg _aec .Matrix ;_bgf []*subpath ;_baffg bool ;_fabe _aec .Point ;_eefg *textObject ;};func (_dcagf *ruling )intersects (_caac *ruling )bool {_ddddc :=(_dcagf ._fbgb ==_bfdef &&_caac ._fbgb ==_caec )||(_caac ._fbgb ==_bfdef &&_dcagf ._fbgb ==_caec ); +_ffcf :=func (_ffegf ,_acgg *ruling )bool {return _ffegf ._feae -_eedab <=_acgg ._faad &&_acgg ._faad <=_ffegf ._feagb +_eedab ;};_eaggb :=_ffcf (_dcagf ,_caac );_acdbf :=_ffcf (_caac ,_dcagf );if _bfdd {_bc .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_ddddc ,_eaggb ,_acdbf ,_ddddc &&_eaggb &&_acdbf ,_dcagf ,_caac ); +};return _ddddc &&_eaggb &&_acdbf ;};type gridTile struct{_fd .PdfRectangle ;_eecbb ,_agbbg ,_eeeaf ,_cbag bool ;};func (_agcg *shapesState )cubicTo (_ecda ,_adbe ,_efcc ,_fegb ,_eefb ,_baa float64 ){if _face {_df .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a"); +};_agcg .addPoint (_eefb ,_baa );};func (_afac rulingList )aligned ()bool {if len (_afac )< 2{return false ;};_gbfa :=make (map[*ruling ]int );_gbfa [_afac [0]]=0;for _ ,_fdcg :=range _afac [1:]{_cbccb :=false ;for _gcbg :=range _gbfa {if _fdcg .gridIntersecting (_gcbg ){_gbfa [_gcbg ]++; +_cbccb =true ;break ;};};if !_cbccb {_gbfa [_fdcg ]=0;};};_cfeea :=0;for _ ,_becag :=range _gbfa {if _becag ==0{_cfeea ++;};};_dgfec :=float64 (_cfeea )/float64 (len (_afac ));_bgae :=_dgfec <=1.0-_bcd ;if _bfdd {_df .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bgae ,_dgfec ,_cfeea ,len (_afac ),_afac .String ()); +};return _bgae ;};func (_badd *textPara )toTextMarks (_eeec *int )[]TextMark {if _badd ._bbdgd ==nil {return _badd .toCellTextMarks (_eeec );};var _deea []TextMark ;for _eaec :=0;_eaec < _badd ._bbdgd ._bfffcc ;_eaec ++{for _gedbd :=0;_gedbd < _badd ._bbdgd ._deafe ; +_gedbd ++{_fgee :=_badd ._bbdgd .get (_gedbd ,_eaec );if _fgee ==nil {_deea =_fafacf (_deea ,_eeec ,"\u0009");}else {_cbdde :=_fgee .toCellTextMarks (_eeec );_deea =append (_deea ,_cbdde ...);};_deea =_fafacf (_deea ,_eeec ,"\u0020");};if _eaec < _badd ._bbdgd ._bfffcc -1{_deea =_fafacf (_deea ,_eeec ,"\u000a"); +};};_cadc :=_badd ._bbdgd ;if _cadc .isExportable (){_dbabe :=_cadc .toTextTable ();_deea =_gdgded (_deea ,&_dbabe );};return _deea ;};func _bcad (_gdbc []*textLine ,_fcadc map[float64 ][]*textLine )[]*list {_gbba :=_fbdb (_fcadc );_adfgdb :=[]*list {}; +if len (_gbba )==0{return _adfgdb ;};_cedg :=_gbba [0];_ddggg :=1;_fafaf :=_fcadc [_cedg ];for _cbaf ,_ceecg :=range _fafaf {var _cfeba float64 ;_ffbg :=[]*list {};_agbg :=_ceecg ._eefeg ;_eafb :=-1.0;if _cbaf < len (_fafaf )-1{_eafb =_fafaf [_cbaf +1]._eefeg ; +};if _ddggg < len (_gbba ){_ffbg =_badec (_gdbc ,_fcadc ,_gbba ,_ddggg ,_agbg ,_eafb );};_cfeba =_eafb ;if len (_ffbg )> 0{_aeada :=_ffbg [0];if len (_aeada ._ddage )> 0{_cfeba =_aeada ._ddage [0]._eefeg ;};};_gade :=[]*textLine {_ceecg };_cabb :=_gfad (_ceecg ,_gdbc ,_agbg ,_cfeba ); +_gade =append (_gade ,_cabb ...);_bffa :=_dead (_gade ,"\u0062\u0075\u006c\u006c\u0065\u0074",_ffbg );_bffa ._eeca =_gbcd (_gade ,"");_adfgdb =append (_adfgdb ,_bffa );};return _adfgdb ;};func (_dceeg *textTable )compositeRowCorridors ()map[int ][]float64 {_gedd :=make (map[int ][]float64 ,_dceeg ._bfffcc ); +if _adfgd {_df .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_dceeg ._bfffcc );};for _gcgf :=1;_gcgf < _dceeg ._bfffcc ;_gcgf ++{var _ddea []compositeCell ; +for _afgc :=0;_afgc < _dceeg ._deafe ;_afgc ++{if _efaca ,_fefba :=_dceeg ._bbgcf [_fbacd (_afgc ,_gcgf )];_fefba {_ddea =append (_ddea ,_efaca );};};if len (_ddea )==0{continue ;};_fffac :=_faga (_ddea );_gedd [_gcgf ]=_fffac ;if _adfgd {_bc .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_gcgf ,_fffac ); +};};return _gedd ;};func _afef (_efff bounded )float64 {return -_efff .bbox ().Lly };func _gfce (_egeecb ,_accf _ae .Image )_ae .Image {_abag ,_dgff :=_accf .Bounds ().Size (),_egeecb .Bounds ().Size ();_gbcb ,_abeb :=_abag .X ,_abag .Y ;if _dgff .X > _gbcb {_gbcb =_dgff .X ; +};if _dgff .Y > _abeb {_abeb =_dgff .Y ;};_bebb :=_ae .Rect (0,0,_gbcb ,_abeb );if _abag .X !=_gbcb ||_abag .Y !=_abeb {_fgfg :=_ae .NewRGBA (_bebb );_fa .BiLinear .Scale (_fgfg ,_bebb ,_egeecb ,_accf .Bounds (),_fa .Over ,nil );_accf =_fgfg ;};if _dgff .X !=_gbcb ||_dgff .Y !=_abeb {_dafa :=_ae .NewRGBA (_bebb ); +_fa .BiLinear .Scale (_dafa ,_bebb ,_egeecb ,_egeecb .Bounds (),_fa .Over ,nil );_egeecb =_dafa ;};_effcb :=_ae .NewRGBA (_bebb );_fa .DrawMask (_effcb ,_bebb ,_egeecb ,_ae .Point {},_accf ,_ae .Point {},_fa .Over );return _effcb ;};func _gdgded (_agdb []TextMark ,_fafc *TextTable )[]TextMark {var _fadbe []TextMark ; +for _ ,_afbc :=range _agdb {_afbc ._eddg =true ;_afbc ._ecfba =_fafc ;_fadbe =append (_fadbe ,_afbc );};return _fadbe ;};func _ebdf (_egcga ,_fedea _fd .PdfRectangle )bool {return _fedea .Llx <=_egcga .Urx &&_egcga .Llx <=_fedea .Urx ;};func _gebad (_bafe _fd .PdfRectangle )*ruling {return &ruling {_fbgb :_bfdef ,_faad :_bafe .Urx ,_feae :_bafe .Lly ,_feagb :_bafe .Ury }; +};func (_bdgc gridTiling )log (_gfaff string ){if !_eagd {return ;};_df .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_bdgc ._afdaa ),len (_bdgc ._cecbb ),_gfaff );_bc .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_bdgc ._afdaa ); +_bc .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_bdgc ._cecbb );for _ccfd ,_egfb :=range _bdgc ._cecbb {_cfgca ,_fecff :=_bdgc ._cdeeb [_egfb ];if !_fecff {continue ;};_bc .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ccfd ,_egfb ); +for _adabf ,_dbgg :=range _bdgc ._afdaa {_geebc ,_abda :=_cfgca [_dbgg ];if !_abda {continue ;};_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_adabf ,_geebc .String ());};};}; + +// String returns a string descibing `i`. +func (_dbag gridTile )String ()string {_gegd :=func (_geed bool ,_dceg string )string {if _geed {return _dceg ;};return "\u005f";};return _bc .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_dbag .PdfRectangle ,_gegd (_dbag ._agbbg ,"\u004c"),_gegd (_dbag ._cbag ,"\u0052"),_gegd (_dbag ._eeeaf ,"\u0042"),_gegd (_dbag ._eecbb ,"\u0054")); +};func (_dcac *wordBag )allWords ()[]*textWord {var _affa []*textWord ;for _ ,_agab :=range _dcac ._edaag {_affa =append (_affa ,_agab ...);};return _affa ;};const (_age ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065"; +_daf ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064"; +_fga ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";_eeb ="E\u0052R\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066o\u006e\u0074\u0020\u0066ou\u006e\u0064"; +); + +// String returns a description of `k`. +func (_baec rulingKind )String ()string {_addcf ,_gagf :=_acfce [_baec ];if !_gagf {return _bc .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_baec );};return _addcf ;};func (_edb *stateStack )empty ()bool {return len (*_edb )==0}; + + +// StrokePath is a stroked path. +type StrokePath struct{Points []_aec .Point ;_ff .Color ;}; + +// Search searches the pages specified by `pages`. +func (_cac *Editor )Search (pattern string ,pages []int )(map[int ]Match ,error ){_daa ,_ ,_agea :=_cac .getMatches (pattern ,pages );return _daa ,_agea ;}; + +// String returns a string describing the current state of the textState stack. +func (_cca *stateStack )String ()string {_febg :=[]string {_bc .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_cca ))};for _edf ,_adef :=range *_cca {_gdde :="\u003c\u006e\u0069l\u003e"; +if _adef !=nil {_gdde =_adef .String ();};_febg =append (_febg ,_bc .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_edf ,_gdde ));};return _c .Join (_febg ,"\u000a");};func _aba (_dbe *TextMarkArray ,_aeg *string ,_aeb *int ,_cgef string )error {var _fee TextMark ; +for _ ,_ced :=range _dbe .Elements (){_gcg :=_ced .Text ;_abbc :=_ced .Font ;_bed :="";_fcaeb :=*_aeg ;if len (_fcaeb )> *_aeb {_bed =_fcaeb [*_aeb :*_aeb +len (_gcg )];}else if *_aeb ==len (_cgef )-1&&len (_fcaeb )> *_aeb {_bed =_fcaeb [*_aeb :];};_fecc :=_ced .DirectObject ; +if _fecc ==nil &&_ced .Text =="\u0020"{_edad :=_fee .ObjString ;_bcg :=_edad [len (_edad )-1];if _bcg !=_ced .Text {_fecc =_fee .DirectObject ;_abbc =_fee .Font ;_gccd ,_geeg :=_gc .GetString (_fecc );if !_geeg {return _bc .Errorf ("\u0075n\u0061\u0062l\u0065\u0020\u0074\u006f \u0067\u0065\u0074 \u0073\u0074\u0072\u0069\u006e\u0067\u0020\u0042\u0079te\u0073\u0020\u0066r\u006f\u006d \u0064\u0069\u0072\u0065\u0063\u0074O\u0062\u006ae\u0063\u0074"); +};_bface ,_geeg :=_gc .GetStringBytes (_fecc );if !_geeg {return _gc .ErrTypeError ;};_gafe :=_cdfc (_bface ,_abbc );_bed =_gafe +_bed ;_ecbc (_gccd ,_bed ,_abbc );*_aeb +=len (_gcg );continue ;};};_bbe ,_efcf :=_gc .GetString (_fecc );if !_efcf {return _bc .Errorf ("\u0075n\u0061\u0062l\u0065\u0020\u0074\u006f \u0067\u0065\u0074 \u0073\u0074\u0072\u0069\u006e\u0067\u0020\u0042\u0079te\u0073\u0020\u0066r\u006f\u006d \u0064\u0069\u0072\u0065\u0063\u0074O\u0062\u006ae\u0063\u0074"); +};_cgg :="";_ffa ,_efcf :=_gc .GetStringBytes (_fecc );if !_efcf {return _gc .ErrTypeError ;};_gebf :=_cdfc (_ffa ,_abbc );_cgg =_c .Replace (_gebf ,_gcg ,_bed ,1);_ecbc (_bbe ,_cgg ,_abbc );*_aeb +=len (_gcg );_fee =_ced ;};return nil ;};func (_bdccc *wordBag )blocked (_cfee *textWord )bool {if _cfee .Urx < _bdccc .Llx {_ebcc :=_gebad (_cfee .PdfRectangle ); +_bgg :=_adad (_bdccc .PdfRectangle );if _bdccc ._ecea .blocks (_ebcc ,_bgg ){if _ebeef {_df .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_cfee ,_bdccc );};return true ;};}else if _bdccc .Urx < _cfee .Llx {_aabe :=_gebad (_bdccc .PdfRectangle ); +_dbgd :=_adad (_cfee .PdfRectangle );if _bdccc ._ecea .blocks (_aabe ,_dbgd ){if _ebeef {_df .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_cfee ,_bdccc );};return true ;};};if _cfee .Ury < _bdccc .Lly {_cbca :=_fbac (_cfee .PdfRectangle ); +_cdda :=_gfae (_bdccc .PdfRectangle );if _bdccc ._gbef .blocks (_cbca ,_cdda ){if _ebeef {_df .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_cfee ,_bdccc );};return true ;};}else if _bdccc .Ury < _cfee .Lly {_ffbc :=_fbac (_bdccc .PdfRectangle ); +_afda :=_gfae (_cfee .PdfRectangle );if _bdccc ._gbef .blocks (_ffbc ,_afda ){if _ebeef {_df .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_cfee ,_bdccc );};return true ;};};return false ;};func _aabd (_eefba []*textLine )[]*textLine {_ggab :=[]*textLine {}; +for _ ,_efeg :=range _eefba {_ccddd :=_efeg .text ();_ecaa :=_fgbc .Find ([]byte (_ccddd ));if _ecaa !=nil {_ggab =append (_ggab ,_efeg );};};return _ggab ;};func (_afdg *compositeCell )updateBBox (){for _ ,_gfgf :=range _afdg .paraList {_afdg .PdfRectangle =_gcacd (_afdg .PdfRectangle ,_gfgf .PdfRectangle ); +};};func _gfae (_ffag _fd .PdfRectangle )*ruling {return &ruling {_fbgb :_caec ,_faad :_ffag .Lly ,_feae :_ffag .Llx ,_feagb :_ffag .Urx };};func _befbd (_degfeg map[float64 ]gridTile )[]float64 {_acgd :=make ([]float64 ,0,len (_degfeg ));for _gdce :=range _degfeg {_acgd =append (_acgd ,_gdce ); +};_bg .Float64s (_acgd );return _acgd ;};const _dgecc =1.0/1000.0; + +// ImageExtractOptions contains options for controlling image extraction from +// PDF pages. +type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_edde *wordBag )firstWord (_fddda int )*textWord {return _edde ._edaag [_fddda ][0]};func (_fgf *textLine )appendWord (_ggacf *textWord ){_fgf ._eecg =append (_fgf ._eecg ,_ggacf ); +_fgf .PdfRectangle =_gcacd (_fgf .PdfRectangle ,_ggacf .PdfRectangle );if _ggacf ._gage > _fgf ._acaea {_fgf ._acaea =_ggacf ._gage ;};if _ggacf ._dcggb > _fgf ._eefeg {_fgf ._eefeg =_ggacf ._dcggb ;};};func (_edbfe *textPara )isAtom ()*textTable {_fbbd :=_edbfe ; +_fedeab :=_edbfe ._ebea ;_efga :=_edbfe ._gfafd ;if _fedeab .taken ()||_efga .taken (){return nil ;};_efbee :=_fedeab ._gfafd ;if _efbee .taken ()||_efbee !=_efga ._ebea {return nil ;};return _cedd (_fbbd ,_fedeab ,_efga ,_efbee );};func _fbdb (_gbbfc map[float64 ][]*textLine )[]float64 {_fedbd :=[]float64 {}; +for _agefc :=range _gbbfc {_fedbd =append (_fedbd ,_agefc );};_bg .Float64s (_fedbd );return _fedbd ;};func (_decg paraList )computeEBBoxes (){if _afae {_df .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a"); +};for _ ,_bbfd :=range _decg {_bbfd ._ecedc =_bbfd .PdfRectangle ;};_cbbg :=_decg .yNeighbours (0);for _egdf ,_ddfg :=range _decg {_bfcc :=_ddfg ._ecedc ;_dcbg ,_ceabd :=-1.0e9,+1.0e9;for _ ,_bbgda :=range _cbbg [_ddfg ]{_dgcd :=_decg [_bbgda ]._ecedc ; +if _dgcd .Urx < _bfcc .Llx {_dcbg =_g .Max (_dcbg ,_dgcd .Urx );}else if _bfcc .Urx < _dgcd .Llx {_ceabd =_g .Min (_ceabd ,_dgcd .Llx );};};for _bdeba ,_gbfd :=range _decg {_dbdd :=_gbfd ._ecedc ;if _egdf ==_bdeba ||_dbdd .Ury > _bfcc .Lly {continue ;}; +if _dcbg <=_dbdd .Llx &&_dbdd .Llx < _bfcc .Llx {_bfcc .Llx =_dbdd .Llx ;}else if _dbdd .Urx <=_ceabd &&_bfcc .Urx < _dbdd .Urx {_bfcc .Urx =_dbdd .Urx ;};};if _afae {_bc .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_egdf ,_ddfg ._ecedc ,_bfcc ,_fdafd (_ddfg .text (),50)); +};_ddfg ._ecedc =_bfcc ;};if _cfc {for _ ,_bddeg :=range _decg {_bddeg .PdfRectangle =_bddeg ._ecedc ;};};};func _gb (_fdd []string ,_dga int ,_ag int ,_ef string ){for _cee :=_dga ;_cee < _ag ;_cee ++{_fdd [_cee ]=_ef ;};};func _ddbg (_efdfc *_fd .Image ,_aebgb _ff .Color )_ae .Image {_ecgfd ,_abbd :=int (_efdfc .Width ),int (_efdfc .Height ); +_defcc :=_ae .NewRGBA (_ae .Rect (0,0,_ecgfd ,_abbd ));for _fdcf :=0;_fdcf < _abbd ;_fdcf ++{for _eecedc :=0;_eecedc < _ecgfd ;_eecedc ++{_egcfa ,_cebaa :=_efdfc .ColorAt (_eecedc ,_fdcf );if _cebaa !=nil {_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_eecedc ,_fdcf ); +continue ;};_bceea ,_ffedg ,_dbeb ,_ :=_egcfa .RGBA ();var _agfcfd _ff .Color ;if _bceea +_ffedg +_dbeb ==0{_agfcfd =_aebgb ;}else {_agfcfd =_ff .Transparent ;};_defcc .Set (_eecedc ,_fdcf ,_agfcfd );};};return _defcc ;};func (_efccf paraList )findTableGrid (_eefdb gridTiling )(*textTable ,map[*textPara ]struct{}){_cded :=len (_eefdb ._afdaa ); +_eedg :=len (_eefdb ._cecbb );_fdce :=textTable {_bbcb :true ,_deafe :_cded ,_bfffcc :_eedg ,_afcbd :make (map[uint64 ]*textPara ,_cded *_eedg ),_bbgcf :make (map[uint64 ]compositeCell ,_cded *_eedg )};_fdce .PdfRectangle =_eefdb .PdfRectangle ;_adec :=make (map[*textPara ]struct{}); +_eeeg :=int ((1.0-_fgae )*float64 (_cded *_eedg ));_aebae :=0;if _eagd {_df .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_cded ,_eedg );};for _eecgc ,_cgbcc :=range _eefdb ._cecbb {_cadgf ,_gbcdb :=_eefdb ._cdeeb [_cgbcc ]; +if !_gbcdb {continue ;};for _aefaf ,_eedd :=range _eefdb ._afdaa {_gbaae ,_caga :=_cadgf [_eedd ];if !_caga {continue ;};_adeb :=_efccf .inTile (_gbaae );if len (_adeb )==0{_aebae ++;if _aebae > _eeeg {if _eagd {_df .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_aebae ); +};return nil ,nil ;};}else {_fdce .putComposite (_aefaf ,_eecgc ,_adeb ,_gbaae .PdfRectangle );for _ ,_afdgb :=range _adeb {_adec [_afdgb ]=struct{}{};};};};};_baaag :=0;for _fffb :=0;_fffb < _cded ;_fffb ++{_edgg :=_fdce .get (_fffb ,0);if _edgg ==nil ||!_edgg ._gfadg {_baaag ++; +};};if _baaag ==0{if _eagd {_df .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_egcfd :=_fdce .reduceTiling (_eefdb ,_fbdc );_egcfd =_egcfd .subdivide ();return _egcfd ,_adec ;};func _cdggf (_gcbcf _fd .PdfRectangle )rulingKind {_ccef :=_gcbcf .Width (); +_bdabd :=_gcbcf .Height ();if _ccef > _bdabd {if _ccef >=_bccgd {return _caec ;};}else {if _bdabd >=_bccgd {return _bfdef ;};};return _bfba ;};const (_agfcb =true ;_ggdb =true ;_ebga =true ;_cfc =false ;_dfge =true ;_eecdc =true ;_fcce =true ;_bbgb =true ; +_ebdbc =false ;);func (_bbf *textObject )setTextLeading (_aebe float64 ){if _bbf ==nil {return ;};_bbf ._ddgb ._eadg =_aebe ;};func (_dfac *stateStack )push (_feca *textState ){_agb :=*_feca ;*_dfac =append (*_dfac ,&_agb )};func (_aefaa gridTile )numBorders ()int {_ggdab :=0; +if _aefaa ._agbbg {_ggdab ++;};if _aefaa ._cbag {_ggdab ++;};if _aefaa ._eeeaf {_ggdab ++;};if _aefaa ._eecbb {_ggdab ++;};return _ggdab ;};func _ggac (_eebf _fd .PdfRectangle )textState {return textState {_fgcd :100,_bag :RenderModeFill ,_aecb :_eebf }; +};func _gcebd (_fbdcf string ,_dfbd []rulingList ){_df .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_dfbd ),_fbdcf );for _eabe ,_ffea :=range _dfbd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eabe ,_ffea .String ()); +};};func _aedb (_ffcac []*textWord ,_acfca int )[]*textWord {_faafa :=len (_ffcac );copy (_ffcac [_acfca :],_ffcac [_acfca +1:]);return _ffcac [:_faafa -1];};func _cfeb (_ddfe ,_cfdbf _fd .PdfRectangle )bool {return _ebdf (_ddfe ,_cfdbf )&&_cdffa (_ddfe ,_cfdbf )}; + + +// Box represents the bounding box of a given textMark on pdf page. +// This might be used for different kinds of high lighting after doing the search +type Box struct{BBox _fd .PdfRectangle ;};func (_ccfc *Extractor )extractPageText (_bdcbf string ,_gddg *_fd .PdfPageResources ,_egcg _aec .Matrix ,_ebd int ,_ceecf bool )(*PageText ,int ,int ,error ){_df .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_ebd ); +_agac :=&PageText {_fdcd :_ccfc ._aea ,_eebg :_ccfc ._gaf ,_ddegf :_ccfc ._ea };_cdc :=_ggac (_ccfc ._aea );var _bde stateStack ;_dcee :=_ffdc (_ccfc ,_gddg ,_cb .GraphicsState {},&_cdc ,&_bde );_fda :=shapesState {_fagg :_egcg ,_cdg :_aec .IdentityMatrix (),_eefg :_dcee }; +var _gca bool ;_cefg :=-1;_bcga :="";if _ebd > _acg {_ecag :=_dea .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_ebd ,_ecag ); +return _agac ,_cdc ._acdb ,_cdc ._dcea ,_ecag ;};_geee :=_cb .NewContentStreamParser (_bdcbf );_efe ,_caef :=_geee .Parse ();if _caef !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_caef ); +return _agac ,_cdc ._acdb ,_cdc ._dcea ,_caef ;};_agac ._bbge =_efe ;_bfb :=_cb .NewContentStreamProcessor (*_efe );if _ccfc ._cab !=nil {_bfb .SetRelaxedMode (_ccfc ._cab .RelaxedMode );};_bfb .AddHandler (_cb .HandlerConditionEnumAllOperands ,"",func (_babcf *_cb .ContentStreamOperation ,_bec _cb .GraphicsState ,_badg *_fd .PdfPageResources )error {_fefd :=_babcf .Operand ; +if _adab {_df .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_babcf );};switch _fefd {case "\u0071":if _face {_df .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fda ._cdg );};_bde .push (&_cdc );case "\u0051":if !_bde .empty (){_cdc =*_bde .pop (); +};_fda ._cdg =_bec .CTM ;if _face {_df .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fda ._cdg );};case "\u0042\u0044\u0043":_agee ,_fdeg :=_gc .GetDict (_babcf .Params [1]);if !_fdeg {_df .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_babcf ); +return _caef ;};_dbg :=_agee .Get ("\u004d\u0043\u0049\u0044");if _dbg !=nil {_dgcb ,_faca :=_gc .GetIntVal (_dbg );if !_faca {_df .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_babcf ,_dbg ); +};_cefg =_dgcb ;}else {_cefg =-1;};if _ccfc ._gaf !=nil &&_cefg !=-1&&_ccfc ._cc !=-1&&_ccfc ._gaf .ParentTree !=nil {_bcbbb :=_ccfc ._cc ;var _adff func (_eage []*_fd .KValue )bool ;_adff =func (_bda []*_fd .KValue )bool {for _ ,_ecf :=range _bda {if _cbccd :=_ecf .GetKDict (); +_cbccd !=nil {_egca :=_cbccd .GetChildren ();if len (_egca )==1&&_egca [0].GetMCID ()!=nil {if *_egca [0].GetMCID ()==_cefg {if _cbccd .ActualText !=nil {_bcga =_c .TrimSpace (_cbccd .ActualText .Str ());return true ;};return false ;};}else {return _adff (_egca ); +};};};return false ;};if _edce :=_ccfc ._gaf .ParentTree .Get ("\u004e\u0075\u006d\u0073");_edce !=nil {_ade ,_afa :=_gc .GetArray (_edce );if !_afa {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0042\u0044\u0043\u0020\u006f\u0070\u003d\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u004e\u0075m\u0073\u0020\u0061\u0072\u0072a\u0079\u002e \u006f\u003d\u0025\u0073",_babcf ,_edce ); +}else {for _eeg :=0;_eeg < _ade .Len ();_eeg +=2{if _ffab ,_fcbf :=_gc .GetInt (_ade .Get (_eeg ));_fcbf {if int (*_ffab )==_bcbbb {if _fede :=_ade .Get (_eeg +1);_fede !=nil {if _cdca ,_ccd :=_gc .GetArray (_fede );_ccd {for _ ,_gbdg :=range _cdca .Elements (){_ecgc ,_dgd :=_fd .NewKDictFromPdfObject (_gbdg ); +if _dgd !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0042\u0044\u0043\u0020\u006f\u0070\u003d\u0025\u0073\u002e\u0020\u0042\u0061d\u0020\u004b\u0044\u0069\u0063\u0074\u002e \u006f\u003d\u0025\u0073",_babcf ,_gbdg );continue ;};_gcd :=_ecgc .GetChildren (); +if len (_gcd )==1&&_gcd [0].GetMCID ()!=nil {if *_gcd [0].GetMCID ()==_cefg {if _ecgc .ActualText !=nil {_bcga =_c .TrimSpace (_ecgc .ActualText .Str ());};break ;}else if _adff (_gcd ){break ;};};};};};};};};};};};if _bcga ==""{_geegd :=_agee .Get ("\u0041\u0063\u0074\u0075\u0061\u006c\u0054\u0065\u0078\u0074"); +if _geegd !=nil {_bcga =_c .TrimSpace (_geegd .String ());};};case "\u0045\u004d\u0043":_cefg =-1;_bcga ="";case "\u0042\u0054":if _gca {_df .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074"); +_agac ._facbc =append (_agac ._facbc ,_dcee ._agfc ...);};_gca =true ;_dfdd :=_bec ;if _ceecf {_dfdd =_cb .GraphicsState {};_dfdd .CTM =_fda ._cdg ;};_dfdd .CTM =_egcg .Mult (_dfdd .CTM );_dcee =_ffdc (_ccfc ,_badg ,_dfdd ,&_cdc ,&_bde );_fda ._eefg =_dcee ; +case "\u0045\u0054":if !_gca {_df .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_gca =false ;_agac ._facbc =append (_agac ._facbc ,_dcee ._agfc ...); +_dcee .reset ();case "\u0054\u002a":_dcee .nextLine ();case "\u0054\u0064":if _gbbd ,_dfee :=_dcee .checkOp (_babcf ,2,true );!_gbbd {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfee );return _dfee ; +};_bfg ,_adda ,_abae :=_gfaba (_babcf .Params );if _abae !=nil {return _abae ;};_dcee .moveText (_bfg ,_adda );case "\u0054\u0044":if _aebbe ,_bbg :=_dcee .checkOp (_babcf ,2,true );!_aebbe {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbg ); +return _bbg ;};_fabf ,_fecg ,_afga :=_gfaba (_babcf .Params );if _afga !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afga );return _afga ;};_dcee .moveTextSetLeading (_fabf ,_fecg );case "\u0054\u006a":if _dec ,_efee :=_dcee .checkOp (_babcf ,1,true ); +!_dec {_df .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_babcf ,_efee );return _efee ;};_dfdg :=_gc .TraceToDirectObject (_babcf .Params [0]);_feeg ,_ffd :=_gc .GetStringBytes (_dfdg ); +if !_ffd {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_babcf );return _gc .ErrTypeError ; +};return _dcee .showText (_dfdg ,_feeg ,_cefg ,_bcga );case "\u0054\u004a":if _eaf ,_bcfd :=_dcee .checkOp (_babcf ,1,true );!_eaf {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcfd );return _bcfd ; +};_cffg ,_baee :=_gc .GetArray (_babcf .Params [0]);if !_baee {_df .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_babcf ); +return _caef ;};return _dcee .showTextAdjusted (_cffg ,_cefg ,_bcga );case "\u0027":if _daba ,_ccgcd :=_dcee .checkOp (_babcf ,1,true );!_daba {_df .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccgcd ); +return _ccgcd ;};_deca :=_gc .TraceToDirectObject (_babcf .Params [0]);_ebde ,_eada :=_gc .GetStringBytes (_deca );if !_eada {_df .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_babcf ); +return _gc .ErrTypeError ;};_dcee .nextLine ();return _dcee .showText (_deca ,_ebde ,_cefg ,_bcga );case "\u0022":if _aaa ,_fgbf :=_dcee .checkOp (_babcf ,3,true );!_aaa {_df .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgbf ); +return _fgbf ;};_acca ,_feb ,_dbb :=_gfaba (_babcf .Params [:2]);if _dbb !=nil {return _dbb ;};_aegb :=_gc .TraceToDirectObject (_babcf .Params [2]);_ebdc ,_ece :=_gc .GetStringBytes (_aegb );if !_ece {_df .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_babcf ); +return _gc .ErrTypeError ;};_dcee .setCharSpacing (_acca );_dcee .setWordSpacing (_feb );_dcee .nextLine ();return _dcee .showText (_aegb ,_ebdc ,_cefg ,_bcga );case "\u0054\u004c":_geba ,_cggb :=_fggc (_babcf );if _cggb !=nil {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cggb ); +return _cggb ;};_dcee .setTextLeading (_geba );case "\u0054\u0063":_ceaa ,_cfg :=_fggc (_babcf );if _cfg !=nil {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfg );return _cfg ;};_dcee .setCharSpacing (_ceaa ); +case "\u0054\u0066":if _cffff ,_gga :=_dcee .checkOp (_babcf ,2,true );!_cffff {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gga );return _gga ;};_bdcbd ,_gdb :=_gc .GetNameVal (_babcf .Params [0]); +if !_gdb {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_babcf );return _gc .ErrTypeError ;};_ceaf ,_bfced :=_gc .GetNumberAsFloat (_babcf .Params [1]); +if !_gdb {_df .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_babcf ,_bfced ); +return _bfced ;};_bfced =_dcee .setFont (_bdcbd ,_ceaf );_dcee ._gbga =_dea .Is (_bfced ,_gc .ErrNotSupported );if _bfced !=nil &&!_dcee ._gbga {return _bfced ;};case "\u0054\u006d":if _agfd ,_cadf :=_dcee .checkOp (_babcf ,6,true );!_agfd {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cadf ); +return _cadf ;};_ceaff ,_afca :=_gc .GetNumbersAsFloat (_babcf .Params );if _afca !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afca );return _afca ;};_dcee .setTextMatrix (_ceaff );case "\u0054\u0072":if _aae ,_dfdf :=_dcee .checkOp (_babcf ,1,true ); +!_aae {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfdf );return _dfdf ;};_eebe ,_fccb :=_gc .GetIntVal (_babcf .Params [0]);if !_fccb {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_babcf ); +return _gc .ErrTypeError ;};_dcee .setTextRenderMode (_eebe );case "\u0054\u0073":if _fged ,_caf :=_dcee .checkOp (_babcf ,1,true );!_fged {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_caf ); +return _caf ;};_ffgf ,_afcd :=_gc .GetNumberAsFloat (_babcf .Params [0]);if _afcd !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afcd );return _afcd ;};_dcee .setTextRise (_ffgf );case "\u0054\u0077":if _beca ,_ddef :=_dcee .checkOp (_babcf ,1,true ); +!_beca {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ddef );return _ddef ;};_fgaa ,_fgcf :=_gc .GetNumberAsFloat (_babcf .Params [0]);if _fgcf !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgcf ); +return _fgcf ;};_dcee .setWordSpacing (_fgaa );case "\u0054\u007a":if _gfbd ,_aead :=_dcee .checkOp (_babcf ,1,true );!_gfbd {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aead );return _aead ;};_bdde ,_fcdc :=_gc .GetNumberAsFloat (_babcf .Params [0]); +if _fcdc !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fcdc );return _fcdc ;};_dcee .setHorizScaling (_bdde );case "\u0063\u006d":if !_ceecf {_fda ._cdg =_bec .CTM ;};if _fda ._cdg .Singular (){_eff :=_aec .IdentityMatrix ().Translate (_fda ._cdg .Translation ()); +_df .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_fda ._cdg ,_eff );_fda ._cdg =_eff ;};if _face {_df .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fda ._cdg );};case "\u006d":if len (_babcf .Params )!=2{_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gcee ); +return nil ;};_eabd ,_gcbd :=_gc .GetNumbersAsFloat (_babcf .Params );if _gcbd !=nil {return _gcbd ;};_fda .moveTo (_eabd [0],_eabd [1]);case "\u006c":if len (_babcf .Params )!=2{_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gcee ); +return nil ;};_ggde ,_becab :=_gc .GetNumbersAsFloat (_babcf .Params );if _becab !=nil {return _becab ;};_fda .lineTo (_ggde [0],_ggde [1]);case "\u0063":if len (_babcf .Params )!=6{return _gcee ;};_gbg ,_fdbac :=_gc .GetNumbersAsFloat (_babcf .Params ); +if _fdbac !=nil {return _fdbac ;};_df .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_gbg );_fda .cubicTo (_gbg [0],_gbg [1],_gbg [2],_gbg [3],_gbg [4],_gbg [5]);case "\u0076","\u0079":if len (_babcf .Params )!=4{return _gcee ; +};_aff ,_afgad :=_gc .GetNumbersAsFloat (_babcf .Params );if _afgad !=nil {return _afgad ;};_df .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_aff );_fda .quadraticTo (_aff [0],_aff [1],_aff [2],_aff [3]); +case "\u0068":_fda .closePath ();case "\u0072\u0065":if len (_babcf .Params )!=4{return _gcee ;};_fgbb ,_bfaa :=_gc .GetNumbersAsFloat (_babcf .Params );if _bfaa !=nil {return _bfaa ;};_fda .drawRectangle (_fgbb [0],_fgbb [1],_fgbb [2],_fgbb [3]);_fda .closePath (); +case "\u0053":_fda .stroke (&_agac ._bcgae );_fda .clearPath ();case "\u0073":_fda .closePath ();_fda .stroke (&_agac ._bcgae );_fda .clearPath ();case "\u0046":_fda .fill (&_agac ._ffad );_fda .clearPath ();case "\u0066","\u0066\u002a":_fda .closePath (); +_fda .fill (&_agac ._ffad );_fda .clearPath ();case "\u0042","\u0042\u002a":_fda .fill (&_agac ._ffad );_fda .stroke (&_agac ._bcgae );_fda .clearPath ();case "\u0062","\u0062\u002a":_fda .closePath ();_fda .fill (&_agac ._ffad );_fda .stroke (&_agac ._bcgae ); +_fda .clearPath ();case "\u006e":_fda .clearPath ();case "\u0044\u006f":if len (_babcf .Params )==0{_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_babcf .Params ); +return _gc .ErrRangeError ;};_ecfb ,_dad :=_gc .GetName (_babcf .Params [0]);if !_dad {_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_babcf .Params [0]); +return _gc .ErrTypeError ;};_ ,_dcde :=_badg .GetXObjectByName (*_ecfb );if _dcde !=_fd .XObjectTypeForm {break ;};_ecfe ,_dad :=_ccfc ._ga [_ecfb .String ()];if !_dad {_egfg ,_cba :=_badg .GetXObjectFormByName (*_ecfb );if _cba !=nil {_df .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cba ); +return _cba ;};_edaa ,_cba :=_egfg .GetContentStream ();if _cba !=nil {_df .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cba );return _cba ;};_gbgb :=_egfg .Resources ;if _gbgb ==nil {_gbgb =_badg ;};_dcgf :=_bec .CTM ;if _acd ,_fdgb :=_gc .GetArray (_egfg .Matrix ); +_fdgb {_bfgg ,_ffabc :=_acd .GetAsFloat64Slice ();if _ffabc !=nil {return _ffabc ;};if len (_bfgg )!=6{return _gcee ;};_gef :=_aec .NewMatrix (_bfgg [0],_bfgg [1],_bfgg [2],_bfgg [3],_bfgg [4],_bfgg [5]);_dcgf =_bec .CTM .Mult (_gef );};_agae ,_dgedd ,_deb ,_cba :=_ccfc .extractPageText (string (_edaa ),_gbgb ,_egcg .Mult (_dcgf ),_ebd +1,false ); +if _cba !=nil {_df .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cba );return _cba ;};_ecfe =textResult {*_agae ,_dgedd ,_deb };_ccfc ._ga [_ecfb .String ()]=_ecfe ;};_fda ._cdg =_bec .CTM ;if _face {_df .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fda ._cdg ); +};_agac ._facbc =append (_agac ._facbc ,_ecfe ._cega ._facbc ...);_agac ._bcgae =append (_agac ._bcgae ,_ecfe ._cega ._bcgae ...);_agac ._ffad =append (_agac ._ffad ,_ecfe ._cega ._ffad ...);_cdc ._acdb +=_ecfe ._gddc ;_cdc ._dcea +=_ecfe ._ebb ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_dcee ._ceca .ColorspaceNonStroking =_bec .ColorspaceNonStroking ; +_dcee ._ceca .ColorNonStroking =_bec .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_dcee ._ceca .ColorspaceStroking =_bec .ColorspaceStroking ;_dcee ._ceca .ColorStroking =_bec .ColorStroking ; +};return nil ;});_caef =_bfb .Process (_gddg );if _ccfc ._cab !=nil &&_ccfc ._cab .IncludeAnnotations &&!_ceecf {for _ ,_dcgb :=range _ccfc ._fdg {_cggd ,_aedd :=_gc .GetDict (_dcgb .AP );if !_aedd {continue ;};_cgge ,_aedd :=_cggd .Get ("\u004e").(*_gc .PdfObjectStream ); +if !_aedd {continue ;};_gacf ,_ceef :=_gc .DecodeStream (_cgge );if _ceef !=nil {_df .Log .Debug ("\u0045\u0072\u0072\u006f\u0072\u0020\u006f\u006e\u0020\u0064\u0065c\u006f\u0064\u0065\u0020\u0073\u0074\u0072\u0065\u0061\u006d:\u0020\u0025\u0076",_ceef ); +continue ;};_aaec :=_cgge .PdfObjectDictionary .Get ("\u0052e\u0073\u006f\u0075\u0072\u0063\u0065s");_agdf ,_ceef :=_fd .NewPdfPageResourcesFromDict (_aaec .(*_gc .PdfObjectDictionary ));if _ceef !=nil {_df .Log .Debug ("\u0045\u0072\u0072\u006f\u0072 \u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0072\u0065\u0073\u006f\u0075\u0072\u0063\u0065\u0073\u003a\u0020\u0025\u0076",_ceef ); +continue ;};_gdbf :=_aec .IdentityMatrix ();_dfae ,_aedd :=_cgge .PdfObjectDictionary .Get ("\u004d\u0061\u0074\u0072\u0069\u0078").(*_gc .PdfObjectArray );if _aedd {_eecd ,_dgfa :=_dfae .GetAsFloat64Slice ();if _dgfa !=nil {_df .Log .Debug ("\u0045\u0072\u0072or\u0020\u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069n\u0067 \u0066l\u006fa\u0074\u0036\u0034\u0020\u0073\u006c\u0069\u0063\u0065\u003a\u0020\u0025\u0076",_dgfa ); +continue ;};if len (_eecd )!=6{_df .Log .Debug ("I\u006e\u0076\u0061\u006c\u0069\u0064 \u006d\u0061\u0074\u0072\u0069\u0078\u0020\u0073\u006ci\u0063\u0065\u0020l\u0065n\u0067\u0074\u0068");continue ;};_gdbf =_aec .NewMatrix (_eecd [0],_eecd [1],_eecd [2],_eecd [3],_eecd [4],_eecd [5]); +};_fdee ,_aedd :=_ccfc ._bff [_cgge .String ()];if !_aedd {_gfa ,_adc ,_gbbf ,_bafa :=_ccfc .extractPageText (string (_gacf ),_agdf ,_gdbf ,_ebd +1,true );if _bafa !=nil {_df .Log .Debug ("\u0045\u0052R\u004f\u0052\u0020\u0065x\u0074\u0072a\u0063\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0074\u0065\u0078\u0074s\u003a\u0020\u0025\u0076",_bafa ); +continue ;};_fdee =textResult {*_gfa ,_adc ,_gbbf };_ccfc ._bff [_cgge .String ()]=_fdee ;};_agac ._facbc =append (_agac ._facbc ,_fdee ._cega ._facbc ...);_agac ._bcgae =append (_agac ._bcgae ,_fdee ._cega ._bcgae ...);_agac ._ffad =append (_agac ._ffad ,_fdee ._cega ._ffad ...); +_cdc ._acdb +=_fdee ._gddc ;_cdc ._dcea +=_fdee ._ebb ;};};return _agac ,_cdc ._acdb ,_cdc ._dcea ,_caef ;};func _bcfbe (_gdac float64 ,_edee int )int {if _edee ==0{_edee =1;};_eagac :=float64 (_edee );return int (_g .Round (_gdac /_eagac )*_eagac );}; +func (_dccb *wordBag )scanBand (_adee string ,_cbga *wordBag ,_eeda func (_ccfbb *wordBag ,_eeafd *textWord )bool ,_gbaa ,_bbca ,_dcdg float64 ,_aced ,_eeed bool )int {_agaed :=_cbga ._dffab ;var _eeede map[int ]map[*textWord ]struct{};if !_aced {_eeede =_dccb .makeRemovals (); +};_fbfg :=_eabab *_agaed ;_egga :=0;for _ ,_bgcce :=range _dccb .depthBand (_gbaa -_fbfg ,_bbca +_fbfg ){if len (_dccb ._edaag [_bgcce ])==0{continue ;};for _ ,_cbae :=range _dccb ._edaag [_bgcce ]{if !(_gbaa -_fbfg <=_cbae ._dcggb &&_cbae ._dcggb <=_bbca +_fbfg ){continue ; +};if !_eeda (_cbga ,_cbae ){continue ;};_babg :=2.0*_g .Abs (_cbae ._gage -_cbga ._dffab )/(_cbae ._gage +_cbga ._dffab );_bcgag :=_g .Max (_cbae ._gage /_cbga ._dffab ,_cbga ._dffab /_cbae ._gage );_eadf :=_g .Min (_babg ,_bcgag );if _dcdg > 0&&_eadf > _dcdg {continue ; +};if _cbga .blocked (_cbae ){continue ;};if !_aced {_cbga .pullWord (_cbae ,_bgcce ,_eeede );};_egga ++;if !_eeed {if _cbae ._dcggb < _gbaa {_gbaa =_cbae ._dcggb ;};if _cbae ._dcggb > _bbca {_bbca =_cbae ._dcggb ;};};if _aced {break ;};};};if !_aced {_dccb .applyRemovals (_eeede ); +};return _egga ;};func (_dfec *imageExtractContext )extractInlineImage (_egc *_cb .ContentStreamInlineImage ,_bdcb _cb .GraphicsState ,_eed *_fd .PdfPageResources )error {_dcbb ,_fadf :=_egc .ToImage (_eed );if _fadf !=nil {return _fadf ;};_dff ,_fadf :=_egc .GetColorSpace (_eed ); +if _fadf !=nil {return _fadf ;};if _dff ==nil {_dff =_fd .NewPdfColorspaceDeviceGray ();};_eaaa ,_fadf :=_dff .ImageToRGB (*_dcbb );if _fadf !=nil {return _fadf ;};_bcbb :=ImageMark {Image :&_eaaa ,Width :_bdcb .CTM .ScalingFactorX (),Height :_bdcb .CTM .ScalingFactorY (),Angle :_bdcb .CTM .Angle ()}; +_bcbb .X ,_bcbb .Y =_bdcb .CTM .Translation ();_dfec ._dcc =append (_dfec ._dcc ,_bcbb );_dfec ._baeb ++;return nil ;};var _cbd =[]string {"\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u004f\u004e","\u004f\u004e","\u0041\u004c","\u0045\u0054","\u0045\u0054","\u0041\u004c","\u0043\u0053","\u0041\u004c","\u004f\u004e","\u004f\u004e","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0045\u0054","\u0041\u004e","\u0041\u004e","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004e","\u004f\u004e","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004f\u004e","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c"}; +func (_bddb *textObject )checkOp (_gab *_cb .ContentStreamOperation ,_ggdc int ,_aee bool )(_dbdg bool ,_decab error ){if _bddb ==nil {var _daab []_gc .PdfObject ;if _ggdc > 0{_daab =_gab .Params ;if len (_daab )> _ggdc {_daab =_daab [:_ggdc ];};};_df .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_gab .Operand ,_daab ); +};if _ggdc >=0{if len (_gab .Params )!=_ggdc {if _aee {_decab =_dea .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gab .Operand ,_ggdc ,len (_gab .Params ),_gab .Params ); +return false ,_decab ;};};return true ,nil ;};func (_gcgec rulingList )tidied (_bdce string )rulingList {_gcga :=_gcgec .removeDuplicates ();_gcga .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_gbed :=_gcga .snapToGroups ();if _gbed ==nil {return nil ; +};_gbed .sort ();if _bfdd {_df .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_bdce ,len (_gcgec ),len (_gcga ),len (_gbed )); +};_gbed .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _gbed ;};const _acg =20;func (_cbac *textObject )setCharSpacing (_acag float64 ){if _cbac ==nil {return ;};_cbac ._ddgb ._bdab =_acag ;if _gebg {_df .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_acag ,_cbac ._ddgb .String ()); +};};func (_gbcga *shapesState )drawRectangle (_cebe ,_ceabf ,_ecgaf ,_ddab float64 ){if _face {_fdag :=_gbcga .devicePoint (_cebe ,_ceabf );_bgdb :=_gbcga .devicePoint (_cebe +_ecgaf ,_ceabf +_ddab );_dbba :=_fd .PdfRectangle {Llx :_fdag .X ,Lly :_fdag .Y ,Urx :_bgdb .X ,Ury :_bgdb .Y }; +_df .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_dbba );};_gbcga .newSubPath ();_gbcga .moveTo (_cebe ,_ceabf );_gbcga .lineTo (_cebe +_ecgaf ,_ceabf );_gbcga .lineTo (_cebe +_ecgaf ,_ceabf +_ddab ); +_gbcga .lineTo (_cebe ,_ceabf +_ddab );_gbcga .closePath ();};func (_bbeb *wordBag )pullWord (_gcff *textWord ,_fcef int ,_fgccc map[int ]map[*textWord ]struct{}){_bbeb .PdfRectangle =_gcacd (_bbeb .PdfRectangle ,_gcff .PdfRectangle );if _gcff ._gage > _bbeb ._dffab {_bbeb ._dffab =_gcff ._gage ; +};_bbeb ._edaag [_fcef ]=append (_bbeb ._edaag [_fcef ],_gcff );_fgccc [_fcef ][_gcff ]=struct{}{};};func (_fgdg *subpath )makeRectRuling (_adaa _ff .Color )(*ruling ,bool ){if _gdae {_df .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_fgdg ); +};_ccab :=_fgdg ._gcac [:4];_deab :=make (map[int ]rulingKind ,len (_ccab ));for _aggbe ,_fffgg :=range _ccab {_ggdeb :=_fgdg ._gcac [(_aggbe +1)%4];_deab [_aggbe ]=_dgabd (_fffgg ,_ggdeb );if _gdae {_bc .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_aggbe ,_deab [_aggbe ],_fffgg ,_ggdeb ); +};};if _gdae {_bc .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_deab );};var _cgdf ,_gegg []int ;for _gcbcd ,_agec :=range _deab {switch _agec {case _caec :_gegg =append (_gegg ,_gcbcd );case _bfdef :_cgdf =append (_cgdf ,_gcbcd ); +};};if _gdae {_bc .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gegg ),_gegg );_bc .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cgdf ),_cgdf ); +};_afdga :=(len (_gegg )==2&&len (_cgdf )==2)||(len (_gegg )==2&&len (_cgdf )==0&&_afgbe (_ccab [_gegg [0]],_ccab [_gegg [1]]))||(len (_cgdf )==2&&len (_gegg )==0&&_fgfbg (_ccab [_cgdf [0]],_ccab [_cgdf [1]]));if _gdae {_bc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gegg ),len (_cgdf ),_afdga ); +};if !_afdga {if _gdae {_df .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_fgdg );_bc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gegg ),len (_cgdf ),_afdga ); +};return &ruling {},false ;};if len (_cgdf )==0{for _cbbe ,_cbabc :=range _deab {if _cbabc !=_caec {_cgdf =append (_cgdf ,_cbbe );};};};if len (_gegg )==0{for _dgdg ,_abfc :=range _deab {if _abfc !=_bfdef {_gegg =append (_gegg ,_dgdg );};};};if _gdae {_df .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_gegg ),len (_cgdf ),len (_ccab ),_gegg ,_cgdf ,_ccab ); +};var _dfcac ,_aggce ,_fgcdc ,_agba _aec .Point ;if _ccab [_gegg [0]].Y > _ccab [_gegg [1]].Y {_fgcdc ,_agba =_ccab [_gegg [0]],_ccab [_gegg [1]];}else {_fgcdc ,_agba =_ccab [_gegg [1]],_ccab [_gegg [0]];};if _ccab [_cgdf [0]].X > _ccab [_cgdf [1]].X {_dfcac ,_aggce =_ccab [_cgdf [0]],_ccab [_cgdf [1]]; +}else {_dfcac ,_aggce =_ccab [_cgdf [1]],_ccab [_cgdf [0]];};_bcdc :=_fd .PdfRectangle {Llx :_dfcac .X ,Urx :_aggce .X ,Lly :_agba .Y ,Ury :_fgcdc .Y };if _bcdc .Llx > _bcdc .Urx {_bcdc .Llx ,_bcdc .Urx =_bcdc .Urx ,_bcdc .Llx ;};if _bcdc .Lly > _bcdc .Ury {_bcdc .Lly ,_bcdc .Ury =_bcdc .Ury ,_bcdc .Lly ; +};_ggade :=rectRuling {PdfRectangle :_bcdc ,_gadd :_cdggf (_bcdc ),Color :_adaa };if _ggade ._gadd ==_bfba {if _gdae {_df .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c"); +};return nil ,false ;};_eagab ,_cecc :=_ggade .asRuling ();if !_cecc {if _gdae {_df .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _bfdd {_bc .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_eagab .String ()); +};return _eagab ,true ;}; + +// String returns a description of `state`. +func (_fecgf *textState )String ()string {_cffgg :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _fecgf ._fedd !=nil {_cffgg =_fecgf ._fedd .BaseFont ();};return _bc .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_fecgf ._bdab ,_fecgf ._cdeb ,_fecgf ._ddeg ,_cffgg ); +};func (_cebec paraList )llyRange (_abdba []int ,_bebfa ,_efdgb float64 )[]int {_gfgba :=len (_cebec );if _efdgb < _cebec [_abdba [0]].Lly ||_bebfa > _cebec [_abdba [_gfgba -1]].Lly {return nil ;};_agega :=_bg .Search (_gfgba ,func (_cccd int )bool {return _cebec [_abdba [_cccd ]].Lly >=_bebfa }); +_bbdgf :=_bg .Search (_gfgba ,func (_efca int )bool {return _cebec [_abdba [_efca ]].Lly > _efdgb });return _abdba [_agega :_bbdgf ];};func (_bcec *textTable )put (_egafb ,_ceecb int ,_ebdcd *textPara ){_bcec ._afcbd [_fbacd (_egafb ,_ceecb )]=_ebdcd ; +}; + +// String returns a description of `tm`. +func (_cfac *textMark )String ()string {return _bc .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_cfac .PdfRectangle ,_cfac ._cbdd ,_cfac ._dfdde );};func _bbb (_efc []rune )BidiText {_bdd :=-1; +_cd :=false ;_dca :=true ;_ab :=len (_efc );_cf :=make ([]string ,_ab );_egf :=make ([]string ,_ab );if _ab ==0||_cd {return _fde (string (_efc ),_dca ,_cd );};_cbff :=0;for _ec ,_gg :=range _efc {_cf [_ec ]=string (_gg );_afe :="\u004c";if _gg <=0x00ff{_afe =_ba [_gg ]; +}else if 0x0590<=_gg &&_gg <=0x05f4{_afe ="\u0052";}else if 0x0600<=_gg &&_gg <=0x06ff{_fcg :=_gg &0xff;if int (_fcg )>=len (_cbd ){_df .Log .Debug ("\u0042\u0069\u0064\u0069\u003a\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0055n\u0069c\u006f\u0064\u0065\u0020\u0063\u0068\u0061\u0072\u0061\u0063\u0074\u0065\u0072\u0020"+string (_gg )); +};_afe =_cbd [_gg &0xff];}else if (0x0700<=_gg &&_gg <=0x08ac)||(0xfb50<=_gg &&_gg <=0xfdff)||(0xfe70<=_gg &&_gg <=0xfeff){_afe ="\u0041\u004c";};if _afe =="\u0052"||_afe =="\u0041\u004c"||_afe =="\u0041\u004e"{_cbff ++;};_egf [_ec ]=_afe ;};if _cbff ==0{_dca =true ; +return _fde (string (_efc ),_dca ,false );};if _bdd ==-1{if float64 (_cbff )/float64 (_ab )< 0.3&&_ab > 4{_dca =true ;_bdd =0;}else {_dca =false ;_bdd =1;};};var _cfd []int ;for range _efc {_cfd =append (_cfd ,_bdd );};_ded :="\u004c";if _dg (_bdd ){_ded ="\u0052"; +};_dcf :=_ded ;_db :=_dcf ;_be :=_dcf ;for _gfb :=range _efc {if _egf [_gfb ]=="\u004e\u0053\u004d"{_egf [_gfb ]=_be ;}else {_be =_egf [_gfb ];};};_be =_dcf ;var _ecb string ;for _bae :=range _efc {_ecb =_egf [_bae ];switch _ecb {case "\u0045\u004e":if _be =="\u0041\u004c"{_egf [_bae ]="\u0041\u004e"; +}else {_egf [_bae ]="\u0045\u004e";};case "\u0052","\u004c","\u0041\u004c":_be =_ecb ;};};for _cfa :=range _efc {_aef :=_egf [_cfa ];if _aef =="\u0041\u004c"{_egf [_cfa ]="\u0052";};};for _ffb :=1;_ffb < (len (_efc )-1);_ffb ++{if _egf [_ffb ]=="\u0045\u0053"&&_egf [_ffb -1]=="\u0045\u004e"&&_egf [_ffb +1]=="\u0045\u004e"{_egf [_ffb ]="\u0045\u004e"; +};if _egf [_ffb ]=="\u0043\u0053"&&(_egf [_ffb -1]=="\u0045\u004e"||_egf [_ffb -1]=="\u0041\u004e")&&_egf [_ffb +1]==_egf [_ffb -1]{_egf [_ffb ]=_egf [_ffb -1];};};for _fe :=range _efc {if _egf [_fe ]=="\u0045\u004e"{for _dcff :=_fe -1;_dcff >=0;_dcff --{if _egf [_dcff ]!="\u0045\u0054"{break ; +};_egf [_dcff ]="\u0045\u004e";};for _dedc :=_fe +1;_dedc < _ab ;_dedc ++{if _egf [_dedc ]!="\u0045\u0054"{break ;};_egf [_dedc ]="\u0045\u004e";};};};for _ggd :=range _efc {_cg :=_egf [_ggd ];if _cg =="\u0057\u0053"||_cg =="\u0045\u0053"||_cg =="\u0045\u0054"||_cg =="\u0043\u0053"{_egf [_ggd ]="\u004f\u004e"; +};};_be ="\u0073\u006f\u0072";for _abb :=range _efc {_fb :=_egf [_abb ];switch _fb {case "\u0045\u004e":if _be =="\u004c"{_egf [_abb ]="\u004c";}else {_egf [_abb ]="\u0045\u004e";};case "\u0052","\u004c":_be =_fb ;};};for _gcb :=0;_gcb < len (_efc );_gcb ++{if _egf [_gcb ]=="\u004f\u004e"{_ed :=_gfc (_egf ,_gcb +1,"\u004f\u004e"); +_ceb :=_db ;if _gcb > 0{_ceb =_egf [_gcb -1];};_gce :=_db ;if _ed +1< _ab {_gce =_egf [_ed +1];};if _ceb !="\u004c"{_ceb ="\u0052";};if _gce !="\u004c"{_gce ="\u0052";};if _ceb ==_gce {_gb (_egf ,_gcb ,_ed ,_ceb );};_gcb =_ed -1;};};for _gd :=range _efc {if _egf [_gd ]=="\u004f\u004e"{_egf [_gd ]=_ded ; +};};for _ge :=range _efc {_fg :=_egf [_ge ];if _da (_cfd [_ge ]){switch _fg {case "\u0052":_cfd [_ge ]++;case "\u0041\u004e","\u0045\u004e":_cfd [_ge ]+=2;};}else if _fg =="\u004c"||_fg =="\u0041\u004e"||_fg =="\u0045\u004e"{_cfd [_ge ]++;};};_fdbb :=-1; +_baf :=99;var _efa int ;for _eca :=0;_eca < len (_cfd );_eca ++{_efa =_cfd [_eca ];if _fdbb < _efa {_fdbb =_efa ;};if _baf > _efa &&_dg (_efa ){_baf =_efa ;};};for _agc :=_fdbb ;_agc >=_baf ;_agc --{_gdg :=-1;for _fca :=0;_fca < len (_cfd );_fca ++{if _cfd [_fca ]< _agc {if _gdg >=0{_dag (_cf ,_gdg ,_fca ); +_gdg =-1;};}else if _gdg < 0{_gdg =_fca ;};};if _gdg >=0{_dag (_cf ,_gdg ,len (_cfd ));};};for _ceed :=0;_ceed < len (_cf );_ceed ++{_bcbc :=_cf [_ceed ];if _bcbc =="\u003c"||_bcbc =="\u003e"{_cf [_ceed ]="";};};return _fde (_c .Join (_cf ,""),_dca ,false ); +}; + +// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text. +// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where +// `start` and `end` are offsets in the extracted text. +// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and +// last elements of the returned TextMarkArray may only partially overlap text[start:end]. +func (_fdf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _fdf ==nil {return nil ,_dea .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_bc .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end ); +};_dabd :=len (_fdf ._dbce );if _dabd ==0{return _fdf ,nil ;};if start < _fdf ._dbce [0].Offset {start =_fdf ._dbce [0].Offset ;};if end > _fdf ._dbce [_dabd -1].Offset +1{end =_fdf ._dbce [_dabd -1].Offset +1;};_dcgbf :=_bg .Search (_dabd ,func (_ebee int )bool {return _fdf ._dbce [_ebee ].Offset +len (_fdf ._dbce [_ebee ].Text )-1>=start }); +if !(0<=_dcgbf &&_dcgbf < _dabd ){_fdad :=_bc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_dcgbf ,_dabd ,_fdf ._dbce [0],_fdf ._dbce [_dabd -1]); +return nil ,_fdad ;};_efde :=_bg .Search (_dabd ,func (_gaaf int )bool {return _fdf ._dbce [_gaaf ].Offset > end -1});if !(0<=_efde &&_efde < _dabd ){_ccaa :=_bc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_efde ,_dabd ,_fdf ._dbce [0],_fdf ._dbce [_dabd -1]); +return nil ,_ccaa ;};if _efde <=_dcgbf {return nil ,_bc .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_dcgbf ,_efde ); +};return &TextMarkArray {_dbce :_fdf ._dbce [_dcgbf :_efde ]},nil ;};func _bdaf (_gcdbb map[int ][]float64 )string {_cggce :=_abdc (_gcdbb );_fgaea :=make ([]string ,len (_gcdbb ));for _bcgbd ,_beag :=range _cggce {_fgaea [_bcgbd ]=_bc .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_beag ,_gcdbb [_beag ]); +};return _bc .Sprintf ("\u007b\u0025\u0073\u007d",_c .Join (_fgaea ,"\u002c\u0020"));};func (_degbc *textTable )putComposite (_gdec ,_feccb int ,_cdbc paraList ,_eaefd _fd .PdfRectangle ){if len (_cdbc )==0{_df .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073"); +return ;};_gbebf :=compositeCell {PdfRectangle :_eaefd ,paraList :_cdbc };if _adfgd {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_gdec ,_feccb ,_gbebf .String ()); +};_gbebf .updateBBox ();_degbc ._bbgcf [_fbacd (_gdec ,_feccb )]=_gbebf ;};func (_fgcce *textObject )showText (_fadd _gc .PdfObject ,_cafe []byte ,_cbg int ,_bdcc string )error {return _fgcce .renderText (_fadd ,_cafe ,_cbg ,_bdcc );};func _ecbc (_dagd *_gc .PdfObjectString ,_geeb string ,_bcgb *_fd .PdfFont ){_gfgb ,_bef :=_bcgb .StringToCharcodeBytes (_geeb ); +if _bef !=0{_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0073\u006fm\u0065\u0020\u0072un\u0065\u0073\u0020\u0063\u006f\u0075l\u0064\u0020\u006e\u006f\u0074\u0020\u0062\u0065\u0020\u0065\u006e\u0063\u006f\u0064\u0065d\u002e\u000a\u0009\u0025\u0073\u0020\u002d\u003e \u0025\u0076",_geeb ,_gfgb ); +};_aafg :=_gc .MakeStringFromBytes (_gfgb );*_dagd =*_aafg ;};func (_dfcda rulingList )bbox ()_fd .PdfRectangle {var _baba _fd .PdfRectangle ;if len (_dfcda )==0{_df .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073"); +return _fd .PdfRectangle {};};if _dfcda [0]._fbgb ==_caec {_baba .Llx ,_baba .Urx =_dfcda .secMinMax ();_baba .Lly ,_baba .Ury =_dfcda .primMinMax ();}else {_baba .Llx ,_baba .Urx =_dfcda .primMinMax ();_baba .Lly ,_baba .Ury =_dfcda .secMinMax ();};return _baba ; +};func (_gfebf *shapesState )lineTo (_dfacb ,_ccbe float64 ){if _face {_df .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_dfacb ,_ccbe ,_gfebf .devicePoint (_dfacb ,_ccbe )); +};_gfebf .addPoint (_dfacb ,_ccbe );};type textPara struct{_fd .PdfRectangle ;_ecedc _fd .PdfRectangle ;_bgdfb []*textLine ;_bbdgd *textTable ;_ddega bool ;_gfadg bool ;_gfbb *textPara ;_ebea *textPara ;_ebaf *textPara ;_gfafd *textPara ;_bddf []list ; +};const (_aceg markKind =iota ;_bcfag ;_febcb ;_degca ;);func (_cfdd paraList )llyOrdering ()[]int {_fcgaf :=make ([]int ,len (_cfdd ));for _bbeba :=range _cfdd {_fcgaf [_bbeba ]=_bbeba ;};_bg .SliceStable (_fcgaf ,func (_eega ,_acec int )bool {_aaaa ,_ccbg :=_fcgaf [_eega ],_fcgaf [_acec ]; +return _cfdd [_aaaa ].Lly < _cfdd [_ccbg ].Lly ;});return _fcgaf ;}; + +// GetContentStreamOps returns the contentStreamOps field of `pt`. +func (_fccbf *PageText )GetContentStreamOps ()*_cb .ContentStreamOperations {return _fccbf ._bbge };func (_aacb *textPara )bbox ()_fd .PdfRectangle {return _aacb .PdfRectangle };const (_bfba rulingKind =iota ;_caec ;_bfdef ;);func (_ddedd intSet )del (_adfc int ){delete (_ddedd ,_adfc )}; + + +// ApplyArea processes the page text only within the specified area `bbox`. +// Each time ApplyArea is called, it updates the result set in `pt`. +// Can be called multiple times in a row with different bounding boxes. +func (_cfdga *PageText )ApplyArea (bbox _fd .PdfRectangle ){_bfbf :=make ([]*textMark ,0,len (_cfdga ._facbc ));for _ ,_cfbd :=range _cfdga ._facbc {if _cfeb (_cfbd .bbox (),bbox ){_bfbf =append (_bfbf ,_cfbd );};};var _cdag paraList ;_fbfc :="";_beb :=len (_bfbf ); +for _bga :=0;_bga < 360&&_beb > 0;_bga +=90{_cdcf :=make ([]*textMark ,0,len (_bfbf )-_beb );for _ ,_abgb :=range _bfbf {if _abgb ._gdcb ==_bga {_cdcf =append (_cdcf ,_abgb );};};if len (_cdcf )> 0{if _cfdga ._fcaa ._egfd ==ExtractionModePlain {_fbfc +=_ggaa (_cdcf ,_cfdga ._fdcd ); +}else {_baca :=_gcbf (_cdcf ,_cfdga ._fdcd ,nil ,nil ,_cfdga ._fcaa ._egfd ==ExtractionModeLayoutNoBreaks );_cdag =append (_cdag ,_baca ...);};_beb -=len (_cdcf );};};if _cfdga ._fcaa ._egfd ==ExtractionModePlain {_cfdga ._dcag =_fbfc ;}else {_gefa :=new (_de .Buffer ); +_cdag .writeText (_gefa );_cfdga ._dcag =_gefa .String ();_cfdga ._cgf =_cdag .toTextMarks ();_cfdga ._gbab =_cdag .tables ();};};type rulingList []*ruling ;func (_fcdcc rulingList )sortStrict (){_bg .Slice (_fcdcc ,func (_adbg ,_agbb int )bool {_ccegg ,_ddgd :=_fcdcc [_adbg ],_fcdcc [_agbb ]; +_eebggb ,_caad :=_ccegg ._fbgb ,_ddgd ._fbgb ;if _eebggb !=_caad {return _eebggb > _caad ;};_bfcec ,_gfbf :=_ccegg ._faad ,_ddgd ._faad ;if !_cfdab (_bfcec -_gfbf ){return _bfcec < _gfbf ;};_bfcec ,_gfbf =_ccegg ._feae ,_ddgd ._feae ;if _bfcec !=_gfbf {return _bfcec < _gfbf ; +};return _ccegg ._feagb < _ddgd ._feagb ;});};func (_cdfa *ruling )encloses (_fcag ,_fgcb float64 )bool {return _cdfa ._feae -_eedab <=_fcag &&_fgcb <=_cdfa ._feagb +_eedab ;}; + +// Font represents the font properties on a PDF page. +type Font struct{PdfFont *_fd .PdfFont ; + +// FontName represents Font Name from font properties. +FontName string ; + +// FontType represents Font Subtype entry in the font dictionary inside page resources. +// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont. +FontType string ; + +// ToUnicode is true if font provides a `ToUnicode` mapping. +ToUnicode bool ; + +// IsCID is true if underlying font is a composite font. +// Composite font is represented by a font dictionary whose Subtype is `Type0` +IsCID bool ; + +// IsSimple is true if font is simple font. +// A simple font is limited to only 8 bit (255) character codes. +IsSimple bool ; + +// FontData represents the raw data of the embedded font file. +// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF). +// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor. +// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value. +FontData []byte ; + +// FontFileName is a name representing the font. it has format: +// (Font Name) + (Font Type Extension), example: helvetica.ttf. +FontFileName string ; + +// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor). +FontDescriptor *_fd .PdfFontDescriptor ;};func (_cbfec compositeCell )split (_gebe ,_cbddg []float64 )*textTable {_defgg :=len (_gebe )+1;_gcge :=len (_cbddg )+1;if _adfgd {_df .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_gcge ,_defgg ,_cbfec ,_gebe ,_cbddg ); +_bc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_cbfec .paraList ));for _ffbd ,_fced :=range _cbfec .paraList {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ffbd ,_fced .String ());}; +_bc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_cbfec .lines ()));for _gfac ,_gfeef :=range _cbfec .lines (){_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gfac ,_gfeef );};};_gebe =_ebcdg (_gebe ,_cbfec .Ury ,_cbfec .Lly ); +_cbddg =_ebcdg (_cbddg ,_cbfec .Llx ,_cbfec .Urx );_cagea :=make (map[uint64 ]*textPara ,_gcge *_defgg );_adbb :=textTable {_deafe :_gcge ,_bfffcc :_defgg ,_afcbd :_cagea };_ffae :=_cbfec .paraList ;_bg .Slice (_ffae ,func (_gcbfd ,_edcge int )bool {_feeb ,_cedba :=_ffae [_gcbfd ],_ffae [_edcge ]; +_bcgbe ,_efcb :=_feeb .Lly ,_cedba .Lly ;if _bcgbe !=_efcb {return _bcgbe < _efcb ;};return _feeb .Llx < _cedba .Llx ;});_fefc :=make (map[uint64 ]_fd .PdfRectangle ,_gcge *_defgg );for _dcgad ,_ccbfb :=range _gebe [1:]{_deaf :=_gebe [_dcgad ];for _fgbfe ,_fbec :=range _cbddg [1:]{_ecgf :=_cbddg [_fgbfe ]; +_fefc [_fbacd (_fgbfe ,_dcgad )]=_fd .PdfRectangle {Llx :_ecgf ,Urx :_fbec ,Lly :_ccbfb ,Ury :_deaf };};};if _adfgd {_df .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073"); +_bc .Printf ("\u0020\u0020\u0020\u0020");for _gfada :=0;_gfada < _gcge ;_gfada ++{_bc .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_gfada );};_bc .Println ();for _eeac :=0;_eeac < _defgg ;_eeac ++{_bc .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_eeac ); +for _gdefd :=0;_gdefd < _gcge ;_gdefd ++{_bc .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_fefc [_fbacd (_gdefd ,_eeac )]);};_bc .Println ();};};_ddfge :=func (_fbcg *textLine )(int ,int ){for _ggeef :=0;_ggeef < _defgg ;_ggeef ++{for _gdbbb :=0;_gdbbb < _gcge ; +_gdbbb ++{if _ccgb (_fefc [_fbacd (_gdbbb ,_ggeef )],_fbcg .PdfRectangle ){return _gdbbb ,_ggeef ;};};};return -1,-1;};_egcf :=make (map[uint64 ][]*textLine ,_gcge *_defgg );for _ ,_aecf :=range _ffae .lines (){_gacg ,_bebdd :=_ddfge (_aecf );if _gacg < 0{continue ; +};_egcf [_fbacd (_gacg ,_bebdd )]=append (_egcf [_fbacd (_gacg ,_bebdd )],_aecf );};for _baaa :=0;_baaa < len (_gebe )-1;_baaa ++{_egad :=_gebe [_baaa ];_eabbg :=_gebe [_baaa +1];for _cccdg :=0;_cccdg < len (_cbddg )-1;_cccdg ++{_gdea :=_cbddg [_cccdg ]; +_beed :=_cbddg [_cccdg +1];_fadee :=_fd .PdfRectangle {Llx :_gdea ,Urx :_beed ,Lly :_eabbg ,Ury :_egad };_gcde :=_egcf [_fbacd (_cccdg ,_baaa )];if len (_gcde )==0{continue ;};_adeg :=_bcfba (_fadee ,_gcde );_adbb .put (_cccdg ,_baaa ,_adeg );};};return &_adbb ; +};func _gfaba (_bfgbd []_gc .PdfObject )(_dabc ,_fabfc float64 ,_bbff error ){if len (_bfgbd )!=2{return 0,0,_bc .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_bfgbd )); +};_caggda ,_bbff :=_gc .GetNumbersAsFloat (_bfgbd );if _bbff !=nil {return 0,0,_bbff ;};return _caggda [0],_caggda [1],nil ;};func (_beedf *textTable )depth ()float64 {_agfg :=1e10;for _dgadc :=0;_dgadc < _beedf ._deafe ;_dgadc ++{_dbga :=_beedf .get (_dgadc ,0); +if _dbga ==nil ||_dbga ._gfadg {continue ;};_agfg =_g .Min (_agfg ,_dbga .depth ());};return _agfg ;};func _gcacd (_bede ,_ccac _fd .PdfRectangle )_fd .PdfRectangle {return _fd .PdfRectangle {Llx :_g .Min (_bede .Llx ,_ccac .Llx ),Lly :_g .Min (_bede .Lly ,_ccac .Lly ),Urx :_g .Max (_bede .Urx ,_ccac .Urx ),Ury :_g .Max (_bede .Ury ,_ccac .Ury )}; +};func (_fef *imageExtractContext )processOperand (_geb *_cb .ContentStreamOperation ,_eee _cb .GraphicsState ,_efg *_fd .PdfPageResources )error {if _geb .Operand =="\u0042\u0049"&&len (_geb .Params )==1{_bbdb ,_dbf :=_geb .Params [0].(*_cb .ContentStreamInlineImage ); +if !_dbf {return nil ;};if _gcf ,_afb :=_gc .GetBoolVal (_bbdb .ImageMask );_afb {if _gcf &&!_fef ._fea .IncludeInlineStencilMasks {return nil ;};};return _fef .extractInlineImage (_bbdb ,_eee ,_efg );}else if _geb .Operand =="\u0044\u006f"&&len (_geb .Params )==1{_eeeb ,_bfc :=_gc .GetName (_geb .Params [0]); +if !_bfc {_df .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _aggb ;};_ ,_fgc :=_efg .GetXObjectByName (*_eeeb );switch _fgc {case _fd .XObjectTypeImage :return _fef .extractXObjectImage (_eeeb ,_eee ,_efg );case _fd .XObjectTypeForm :return _fef .extractFormImages (_eeeb ,_eee ,_efg ); +};}else if _fef ._dfb &&(_geb .Operand =="\u0073\u0063\u006e"||_geb .Operand =="\u0053\u0043\u004e")&&len (_geb .Params )==1{_fcgd ,_eaaf :=_gc .GetName (_geb .Params [0]);if !_eaaf {_df .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065"); +return _aggb ;};_abf ,_eaaf :=_efg .GetPatternByName (*_fcgd );if !_eaaf {_df .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _abf .IsTiling (){_cga :=_abf .GetAsTilingPattern (); +_cgc ,_dce :=_cga .GetContentStream ();if _dce !=nil {return _dce ;};_dce =_fef .extractContentStreamImages (string (_cgc ),_cga .Resources );if _dce !=nil {return _dce ;};};}else if (_geb .Operand =="\u0063\u0073"||_geb .Operand =="\u0043\u0053")&&len (_geb .Params )>=1{_fef ._dfb =_geb .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e"; +};return nil ;};func _adbcg (_ceaaf []pathSection )rulingList {_ggadf (_ceaaf );if _bfdd {_df .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_ceaaf ));};var _dfgab rulingList ; +for _ ,_dfef :=range _ceaaf {for _ ,_fbeg :=range _dfef ._ddgc {if !_fbeg .isQuadrilateral (){if _bfdd {_df .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_fbeg );};continue ;};if _eeea ,_dgdbb :=_fbeg .makeRectRuling (_dfef .Color ); +_dgdbb {_dfgab =append (_dfgab ,_eeea );}else {if _gdae {_df .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_fbeg );};};};};if _bfdd {_df .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_dfgab .String ()); +};return _dfgab ;};type cachedImage struct{_dcd *_fd .Image ;_bfa _fd .PdfColorspace ;};func (_fbbcf paraList )tables ()[]TextTable {var _defd []TextTable ;if _adfgd {_df .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a"); +};for _ ,_dfbbb :=range _fbbcf {_bcbaf :=_dfbbb ._bbdgd ;if _bcbaf !=nil &&_bcbaf .isExportable (){_defd =append (_defd ,_bcbaf .toTextTable ());};};return _defd ;};func (_dgdc *wordBag )empty (_aede int )bool {_ ,_baab :=_dgdc ._edaag [_aede ];return !_baab }; +func _dg (_ca int )bool {return (_ca &1)!=0};func _eead (_adfec []*textLine ){_bg .Slice (_adfec ,func (_bfege ,_bgbc int )bool {_gbafc ,_fafca :=_adfec [_bfege ],_adfec [_bgbc ];return _gbafc ._eefeg < _fafca ._eefeg ;});};const _degc =10;func (_bfgbc *textWord )addDiacritic (_gdcdf string ){_dcec :=_bfgbc ._fcdae [len (_bfgbc ._fcdae )-1]; +_dcec ._dfdde +=_gdcdf ;_dcec ._dfdde =_ce .NFKC .String (_dcec ._dfdde );};func (_fbgba rulingList )intersections ()map[int ]intSet {var _bcda ,_bdcdc []int ;for _fgad ,_eegd :=range _fbgba {switch _eegd ._fbgb {case _bfdef :_bcda =append (_bcda ,_fgad ); +case _caec :_bdcdc =append (_bdcdc ,_fgad );};};if len (_bcda )< _dbfd +1||len (_bdcdc )< _afecc +1{return nil ;};if len (_bcda )+len (_bdcdc )> _bcfe {_df .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_fbgba ),len (_bcda ),len (_bdcdc )); +return nil ;};_bbeeb :=make (map[int ]intSet ,len (_bcda )+len (_bdcdc ));for _ ,_bceec :=range _bcda {for _ ,_effde :=range _bdcdc {if _fbgba [_bceec ].intersects (_fbgba [_effde ]){if _ ,_geegf :=_bbeeb [_bceec ];!_geegf {_bbeeb [_bceec ]=make (intSet ); +};if _ ,_bgaa :=_bbeeb [_effde ];!_bgaa {_bbeeb [_effde ]=make (intSet );};_bbeeb [_bceec ].add (_effde );_bbeeb [_effde ].add (_bceec );};};};return _bbeeb ;};func _dead (_gccf []*textLine ,_gfeeg string ,_agcgb []*list )*list {return &list {_ddage :_gccf ,_ecbgd :_gfeeg ,_eegf :_agcgb }; +};func (_bdbce *wordBag )arrangeText (_gddae bool )*textPara {_bdbce .sort ();if _ggdb {_bdbce .removeDuplicates ();};var _ccddg []*textLine ;for _ ,_agad :=range _bdbce .depthIndexes (){for !_bdbce .empty (_agad ){_addc :=_bdbce .firstReadingIndex (_agad ); +_bacb :=_bdbce .firstWord (_addc );_acge :=_ecfac (_bdbce ,_addc );_geaae :=_bacb ._gage ;if _geaae < _dbbg {_geaae =_dbbg ;};_efcg :=_bacb ._dcggb -_eabab *_geaae ;_ddcf :=_bacb ._dcggb +_eabab *_geaae ;_gddaee :=_edga *_geaae ;_dccc :=_ccbf *_geaae ; +_ggea :for {var _ceefc *textWord ;_aade :=0;for _ ,_bfcbb :=range _bdbce .depthBand (_efcg ,_ddcf ){_ecdd :=_bdbce .highestWord (_bfcbb ,_efcg ,_ddcf );if _ecdd ==nil {continue ;};_fbcf :=_babb (_ecdd ,_acge ._eecg [len (_acge ._eecg )-1]);if _fbcf < -_dccc {break _ggea ; +};if !_gddae &&_fbcf > _gddaee {continue ;};if _ceefc !=nil &&_dfcf (_ecdd ,_ceefc )>=0{continue ;};_ceefc =_ecdd ;_aade =_bfcbb ;};if _ceefc ==nil {break ;};_acge .pullWord (_bdbce ,_ceefc ,_aade );};_acge .markWordBoundaries ();_ccddg =append (_ccddg ,_acge ); +};};if len (_ccddg )==0{return nil ;};_bg .Slice (_ccddg ,func (_bfcfe ,_cfegf int )bool {return _ffbfc (_ccddg [_bfcfe ],_ccddg [_cfegf ])< 0});_dbcg :=_bcfba (_bdbce .PdfRectangle ,_ccddg );if _aaac {_df .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_dbcg .String ()); +if _baac {for _dddeb ,_feaf :=range _dbcg ._bgdfb {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dddeb ,_feaf .String ());if _eefe {for _cggc ,_geef :=range _feaf ._eecg {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cggc ,_geef .String ()); +for _gaaa ,_dgdbe :=range _geef ._fcdae {_bc .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_gaaa ,_dgdbe .String ());};};};};};};return _dbcg ;};func (_dgg *Editor )getMatches (_faa string ,_babc []int )(map[int ]Match ,map[int ][]*TextMarkArray ,error ){_ega :=map[int ]Match {}; +_abg :=map[int ][]*TextMarkArray {};for _ ,_cef :=range _babc {_cfda ,_bfff :=_dgg ._dgb .GetPage (_cef );if _bfff !=nil {return nil ,nil ,_bfff ;};_bgc ,_bfff :=New (_cfda );if _bfff !=nil {return nil ,nil ,_bfff ;};_ffe ,_ ,_ ,_bfff :=_bgc .ExtractPageText (); +if _bfff !=nil {return nil ,nil ,_bfff ;};_bcba :=_ffe .Text ();_efdb ,_bfff :=_fecf (_faa ,_bcba );if _bfff !=nil {return nil ,nil ,_bfff ;};if len (_efdb )==0{_df .Log .Info ("\u004e\u006f\u0020\u006d\u0061\u0074\u0063\u0068\u0020\u0066\u006f\u0075\u006e\u0064\u0020f\u006fr\u0020\u0025\u0073\u0020\u006f\u006e\u0020\u0070\u0061\u0067\u0065\u0020\u0025\u0064",_faa ,_cef ); +};_ccb :=_ffe .Marks ();_cgb :=[]Box {};for _ ,_cff :=range _efdb {_ ,_eadb ,_acc :=_cfeg (_cff ,_ccb ,_faa );if _acc !=nil {return nil ,nil ,_acc ;};_cgb =append (_cgb ,_eadb );};_cae :=Match {Pattern :_faa ,Indexes :_efdb ,Locations :_cgb };_ega [_cef ]=_cae ; +};return _ega ,_abg ,nil ;}; + +// String returns a string describing `tm`. +func (_edaed TextMark )String ()string {_feg :=_edaed .BBox ;var _egg string ;if _edaed .Font !=nil {_egg =_edaed .Font .String ();if len (_egg )> 50{_egg =_egg [:50]+"\u002e\u002e\u002e";};};var _cggg string ;if _edaed .Meta {_cggg ="\u0020\u002a\u004d\u002a"; +};return _bc .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_edaed .Offset ,_edaed .Text ,[]rune (_edaed .Text ),_feg .Llx ,_feg .Lly ,_feg .Urx ,_feg .Ury ,_egg ,_cggg ); +};func (_caaa *ruling )gridIntersecting (_bbagd *ruling )bool {return _bfagg (_caaa ._feae ,_bbagd ._feae )&&_bfagg (_caaa ._feagb ,_bbagd ._feagb );};type compositeCell struct{_fd .PdfRectangle ;paraList ;};func (_fcgb compositeCell )hasLines (_eceaa []*textLine )bool {for _aagab ,_dcaf :=range _eceaa {_aeaf :=_cfeb (_fcgb .PdfRectangle ,_dcaf .PdfRectangle ); +if _adfgd {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_aeaf ,_aagab ,len (_eceaa ));_bc .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_fcgb ); +_bc .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_dcaf );};if _aeaf {return true ;};};return false ;};func (_ecaeb *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_gbfg :=make (map[int ]map[*textWord ]struct{},len (_ecaeb ._edaag )); +for _bdeb :=range _ecaeb ._edaag {_gbfg [_bdeb ]=make (map[*textWord ]struct{});};return _gbfg ;};func _fecf (_ddf string ,_abd string )([][]int ,error ){_bfcd ,_aca :=_f .Compile (_ddf );if _aca !=nil {return nil ,_bc .Errorf ("\u0065\u0072\u0072\u006f\u0072\u0020c\u006f\u006d\u0070\u0069\u006c\u0069\u006e\u0067\u0020\u0072\u0065\u0067\u0065x\u0020\u0070\u0061\u0074\u0074\u0065\u0072n\u003a\u0020\u0025\u0077",_aca ); +};_eaad :=_bfcd .FindAllStringIndex (_abd ,-1);return _eaad ,nil ;};var _acfce =map[rulingKind ]string {_bfba :"\u006e\u006f\u006e\u0065",_caec :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_bfdef :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"}; +func (_cgbed rulingList )log (_added string ){if !_bfdd {return ;};_df .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_added ,_cgbed .String ());for _ccggc ,_bgedea :=range _cgbed {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccggc ,_bgedea .String ()); +};};func (_cgcdc paraList )applyTables (_cadae []*textTable )paraList {var _badgb paraList ;for _ ,_febab :=range _cadae {_badgb =append (_badgb ,_febab .newTablePara ());};for _ ,_bafd :=range _cgcdc {if _bafd ._ddega {continue ;};_badgb =append (_badgb ,_bafd ); +};return _badgb ;};func (_ggfcf *textMark )bbox ()_fd .PdfRectangle {return _ggfcf .PdfRectangle };func _feag (_addfc *list ,_gfbdg *string )string {_afbdb :=_c .Split (_addfc ._eeca ,"\u000a");_fggb :=&_c .Builder {};for _ ,_dabad :=range _afbdb {if _dabad !=""{_fggb .WriteString (*_gfbdg ); +_fggb .WriteString (_dabad );_fggb .WriteString ("\u000a");};};return _fggb .String ();};func (_eeagd *textTable )emptyCompositeRow (_fcfd int )bool {for _fffc :=0;_fffc < _eeagd ._deafe ;_fffc ++{if _eadfc ,_dbbfb :=_eeagd ._bbgcf [_fbacd (_fffc ,_fcfd )]; +_dbbfb {if len (_eadfc .paraList )> 0{return false ;};};};return true ;};func _ebcdg (_ebfe []float64 ,_dbfde ,_bbaag float64 )[]float64 {_bfcbf ,_ggae :=_dbfde ,_bbaag ;if _ggae < _bfcbf {_bfcbf ,_ggae =_ggae ,_bfcbf ;};_cgcbg :=make ([]float64 ,0,len (_ebfe )+2); +_cgcbg =append (_cgcbg ,_dbfde );for _ ,_gaef :=range _ebfe {if _gaef <=_bfcbf {continue ;}else if _gaef >=_ggae {break ;};_cgcbg =append (_cgcbg ,_gaef );};_cgcbg =append (_cgcbg ,_bbaag );return _cgcbg ;};var _fgbc *_f .Regexp =_f .MustCompile (_dadb +"\u007c"+_ebbf ); +func (_baff *stateStack )top ()*textState {if _baff .empty (){return nil ;};return (*_baff )[_baff .size ()-1];};func _fgfbg (_cgff ,_afff _aec .Point )bool {_ffegg :=_g .Abs (_cgff .X -_afff .X );_gcgd :=_g .Abs (_cgff .Y -_afff .Y );return _cecff (_ffegg ,_gcgd ); +};func (_febbb *textWord )computeText ()string {_cfaba :=make ([]string ,len (_febbb ._fcdae ));for _daff ,_egdaa :=range _febbb ._fcdae {_cfaba [_daff ]=_egdaa ._dfdde ;};return _c .Join (_cfaba ,"");};func _bfeb (_fdbaf []*wordBag )[]*wordBag {if len (_fdbaf )<=1{return _fdbaf ; +};if _aaac {_df .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_bg .Slice (_fdbaf ,func (_cfba ,_afge int )bool {_bcbd ,_eaef :=_fdbaf [_cfba ],_fdbaf [_afge ];_bged :=_bcbd .Width ()*_bcbd .Height ();_adfe :=_eaef .Width ()*_eaef .Height (); +if _bged !=_adfe {return _bged > _adfe ;};if _bcbd .Height ()!=_eaef .Height (){return _bcbd .Height ()> _eaef .Height ();};return _cfba < _afge ;});var _fabeg []*wordBag ;_fffg :=make (intSet );for _ccde :=0;_ccde < len (_fdbaf );_ccde ++{if _fffg .has (_ccde ){continue ; +};_abfd :=_fdbaf [_ccde ];for _eeaec :=_ccde +1;_eeaec < len (_fdbaf );_eeaec ++{if _fffg .has (_ccde ){continue ;};_edgf :=_fdbaf [_eeaec ];_cgce :=_abfd .PdfRectangle ;_cgce .Llx -=_abfd ._dffab ;if _ccgb (_cgce ,_edgf .PdfRectangle ){_abfd .absorb (_edgf ); +_fffg .add (_eeaec );};};_fabeg =append (_fabeg ,_abfd );};if len (_fdbaf )!=len (_fabeg )+len (_fffg ){_df .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_fdbaf ),len (_fabeg ),len (_fffg )); +};return _fabeg ;};func (_adbc *textObject )getStrokeColor ()_ff .Color {return _gcegb (_adbc ._ceca .ColorspaceStroking ,_adbc ._ceca .ColorStroking );};func (_aacg paraList )writeText (_efbad _a .Writer ){for _bgda ,_fddg :=range _aacg {if _fddg ._gfadg {continue ; +};_fddg .writeText (_efbad );if _bgda !=len (_aacg )-1{if _gggbe (_fddg ,_aacg [_bgda +1]){_efbad .Write ([]byte ("\u0020"));}else {_efbad .Write ([]byte ("\u000a"));_efbad .Write ([]byte ("\u000a"));};};};_efbad .Write ([]byte ("\u000a"));_efbad .Write ([]byte ("\u000a")); +};func _abdc (_ccddc map[int ][]float64 )[]int {_fecfb :=make ([]int ,len (_ccddc ));_cbaa :=0;for _aaeac :=range _ccddc {_fecfb [_cbaa ]=_aaeac ;_cbaa ++;};_bg .Ints (_fecfb );return _fecfb ;};func (_cfdff *textLine )text ()string {var _dacf []string ; +for _ ,_edcf :=range _cfdff ._eecg {if _edcf ._debgd {_dacf =append (_dacf ,"\u0020");};_dacf =append (_dacf ,_edcf ._gabac );};_dfgbf :=_c .Join (_dacf ,"");_eaga :=_bbb ([]rune (_dfgbf ));return _eaga ._gf ;};func _ffdc (_egbe *Extractor ,_fbga *_fd .PdfPageResources ,_bbef _cb .GraphicsState ,_fdegc *textState ,_ege *stateStack )*textObject {return &textObject {_faec :_egbe ,_gcbc :_fbga ,_ceca :_bbef ,_dgad :_ege ,_ddgb :_fdegc ,_def :_aec .IdentityMatrix (),_febb :_aec .IdentityMatrix ()}; +};func _bdffd (_defcgf float64 )float64 {return _egaff *_g .Round (_defcgf /_egaff )};func _fefga (_eaddc map[float64 ]map[float64 ]gridTile )[]float64 {_agfa :=make ([]float64 ,0,len (_eaddc ));_cebc :=make (map[float64 ]struct{},len (_eaddc ));for _ ,_bbbd :=range _eaddc {for _gbdb :=range _bbbd {if _ ,_bcac :=_cebc [_gbdb ]; +_bcac {continue ;};_agfa =append (_agfa ,_gbdb );_cebc [_gbdb ]=struct{}{};};};_bg .Float64s (_agfa );return _agfa ;};var _begf =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};func _dadf (_dbee ,_gcec _fd .PdfRectangle )(_fd .PdfRectangle ,bool ){if !_cfeb (_dbee ,_gcec ){return _fd .PdfRectangle {},false ; +};return _fd .PdfRectangle {Llx :_g .Max (_dbee .Llx ,_gcec .Llx ),Urx :_g .Min (_dbee .Urx ,_gcec .Urx ),Lly :_g .Max (_dbee .Lly ,_gcec .Lly ),Ury :_g .Min (_dbee .Ury ,_gcec .Ury )},true ;};func _dcead (_dbdgf []*_fd .KValue ,_eddeg map[int ][]*textLine ,_gabaf _gc .PdfObject )[]*list {_affga :=[]*list {}; +for _ ,_dggcf :=range _dbdgf {_dgbce :=_dggcf .GetKDict ();_bdaa :=_dgbce .GetChildren ();_fdfe :=[]*textLine {};_eecb :=[]*list {};_cgbc :=_dgbce .S .(*_gc .PdfObjectName ).String ();_bfaage :=_bdaa [0];_eaadc :=_bfaage .GetMCID ();if len (_bdaa )==1&&_eaadc !=nil &&*_eaadc !=-1{if _dgbce .Pg ==_gabaf {_fdfe =_eddeg [*_eaadc ]; +};}else {_eecb =_dcead (_bdaa ,_eddeg ,_gabaf );};_aafe :=_dead (_fdfe ,_cgbc ,_eecb );_affga =append (_affga ,_aafe );};return _affga ;};type textWord struct{_fd .PdfRectangle ;_dcggb float64 ;_gabac string ;_fcdae []*textMark ;_gage float64 ;_debgd bool ; +};func (_gcbda rulingList )augmentGrid ()(rulingList ,rulingList ){_cbaeb ,_cfddc :=_gcbda .vertsHorzs ();if len (_cbaeb )==0||len (_cfddc )==0{return _cbaeb ,_cfddc ;};_fegf ,_bfbad :=_cbaeb ,_cfddc ;_eeabe :=_cbaeb .bbox ();_ceecc :=_cfddc .bbox ();if _bfdd {_df .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_eeabe ); +_df .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_ceecc );};var _eeee ,_eagf ,_faed ,_cgdffb *ruling ;if _ceecc .Llx < _eeabe .Llx -_eedab {_eeee =&ruling {_bcab :_degca ,_fbgb :_bfdef ,_faad :_ceecc .Llx ,_feae :_eeabe .Lly ,_feagb :_eeabe .Ury }; +_cbaeb =append (rulingList {_eeee },_cbaeb ...);};if _ceecc .Urx > _eeabe .Urx +_eedab {_eagf =&ruling {_bcab :_degca ,_fbgb :_bfdef ,_faad :_ceecc .Urx ,_feae :_eeabe .Lly ,_feagb :_eeabe .Ury };_cbaeb =append (_cbaeb ,_eagf );};if _eeabe .Lly < _ceecc .Lly -_eedab {_faed =&ruling {_bcab :_degca ,_fbgb :_caec ,_faad :_eeabe .Lly ,_feae :_ceecc .Llx ,_feagb :_ceecc .Urx }; +_cfddc =append (rulingList {_faed },_cfddc ...);};if _eeabe .Ury > _ceecc .Ury +_eedab {_cgdffb =&ruling {_bcab :_degca ,_fbgb :_caec ,_faad :_eeabe .Ury ,_feae :_ceecc .Llx ,_feagb :_ceecc .Urx };_cfddc =append (_cfddc ,_cgdffb );};if len (_cbaeb )+len (_cfddc )==len (_gcbda ){return _fegf ,_bfbad ; +};_dfgd :=append (_cbaeb ,_cfddc ...);_gcbda .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_dfgd .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _cbaeb ,_cfddc ;}; + +// Elements returns the TextMarks in `ma`. +func (_aad *TextMarkArray )Elements ()[]TextMark {return _aad ._dbce };func (_bcca *textObject )moveTextSetLeading (_aebbef ,_fead float64 ){_bcca ._ddgb ._eadg =-_fead ;_bcca .moveLP (_aebbef ,_fead );}; + +// Editor represents a document editor object +type Editor struct{_dgb *_fd .PdfReader };func _da (_dc int )bool {return (_dc &1)==0};type textResult struct{_cega PageText ;_gddc int ;_ebb int ;};func _fafacf (_feega []TextMark ,_debfb *int ,_gcbee string )[]TextMark {_bbgaf :=_aefc ;_bbgaf .Text =_gcbee ; +return _gdag (_feega ,_debfb ,_bbgaf );};func (_gefe gridTile )complete ()bool {return _gefe .numBorders ()==4};func (_bbagc gridTile )contains (_bcceb _fd .PdfRectangle )bool {if _bbagc .numBorders ()< 3{return false ;};if _bbagc ._agbbg &&_bcceb .Llx < _bbagc .Llx -_fdfb {return false ; +};if _bbagc ._cbag &&_bcceb .Urx > _bbagc .Urx +_fdfb {return false ;};if _bbagc ._eeeaf &&_bcceb .Lly < _bbagc .Lly -_fdfb {return false ;};if _bbagc ._eecbb &&_bcceb .Ury > _bbagc .Ury +_fdfb {return false ;};return true ;};func (_dgf *imageExtractContext )extractXObjectImage (_bgb *_gc .PdfObjectName ,_cge _cb .GraphicsState ,_ggf *_fd .PdfPageResources )error {_dcbd ,_ :=_ggf .GetXObjectByName (*_bgb ); +if _dcbd ==nil {return nil ;};_gffb ,_bcf :=_dgf ._cfae [_dcbd ];if !_bcf {_fafd ,_edg :=_ggf .GetXObjectImageByName (*_bgb );if _edg !=nil {return _edg ;};if _fafd ==nil {return nil ;};_bcae ,_edg :=_fafd .ToImage ();if _edg !=nil {return _edg ;};var _ggfa _ae .Image ; +if _fafd .Mask !=nil {if _ggfa ,_edg =_feegf (_fafd .Mask ,_ff .Opaque );_edg !=nil {_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e"); +};}else if _fafd .SMask !=nil {_ggfa ,_edg =_eeefe (_fafd .SMask ,_ff .Opaque );if _edg !=nil {_df .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e"); +};};if _ggfa !=nil {_bab ,_efag :=_bcae .ToGoImage ();if _efag !=nil {return _efag ;};_bab =_gfce (_bab ,_ggfa );switch _fafd .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_bcae ,_efag =_fd .ImageHandling .NewGrayImageFromGoImage (_bab ); +if _efag !=nil {return _efag ;};default:_bcae ,_efag =_fd .ImageHandling .NewImageFromGoImage (_bab );if _efag !=nil {return _efag ;};};};_gffb =&cachedImage {_dcd :_bcae ,_bfa :_fafd .ColorSpace };_dgf ._cfae [_dcbd ]=_gffb ;};_acf :=_gffb ._dcd ;_cec :=_gffb ._bfa ; +_geg ,_ffbf :=_cec .ImageToRGB (*_acf );if _ffbf !=nil {return _ffbf ;};_df .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_cge .CTM .String ());_efce :=ImageMark {Image :&_geg ,Width :_cge .CTM .ScalingFactorX (),Height :_cge .CTM .ScalingFactorY (),Angle :_cge .CTM .Angle ()}; +_efce .X ,_efce .Y =_cge .CTM .Translation ();_dgf ._dcc =append (_dgf ._dcc ,_efce );_dgf ._gced ++;return nil ;};func (_dagf *textObject )nextLine (){_dagf .moveLP (0,-_dagf ._ddgb ._eadg )}; + +// Tables returns the tables extracted from the page. +func (_deag PageText )Tables ()[]TextTable {if _adfgd {_df .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_deag ._gbab ));};return _deag ._gbab ;};func _cacb (_bbgg *wordBag ,_dgdf float64 ,_dfgf ,_bbdd rulingList ,_cbbc bool )[]*wordBag {var _dadbb []*wordBag ; +for _ ,_cdbee :=range _bbgg .depthIndexes (){_dagfe :=false ;for !_bbgg .empty (_cdbee ){_cegbb :=_bbgg .firstReadingIndex (_cdbee );_gedb :=_bbgg .firstWord (_cegbb );_eceb :=_ddag (_gedb ,_dgdf ,_dfgf ,_bbdd );_bbgg .removeWord (_gedb ,_cegbb );if _efcea {_df .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_gedb .String ()); +};for _ddde :=true ;_ddde ;_ddde =_dagfe {_dagfe =false ;_bccf :=_cbge *_eceb ._dffab ;_gdda :=_eade *_eceb ._dffab ;if _cbbc {_gdda =_g .MaxFloat64 ;};_aceb :=_acff *_eceb ._dffab ;if _efcea {_df .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_eceb .minDepth (),_eceb .maxDepth (),_aceb ,_gdda ); +};if _bbgg .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_eceb ,_dccbd (_cege ,0),_eceb .minDepth ()-_aceb ,_eceb .maxDepth ()+_aceb ,_bbaa ,false ,false )> 0{_dagfe =true ;};if _bbgg .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_eceb ,_dccbd (_cege ,_gdda ),_eceb .minDepth (),_eceb .maxDepth (),_aaea ,false ,false )> 0{_dagfe =true ; +};if _dagfe {continue ;};_bbea :=_bbgg .scanBand ("",_eceb ,_dccbd (_bcee ,_bccf ),_eceb .minDepth (),_eceb .maxDepth (),_bage ,true ,false );if _bbea > 0{_fbbb :=(_eceb .maxDepth ()-_eceb .minDepth ())/_eceb ._dffab ;if (_bbea > 1&&float64 (_bbea )> 0.3*_fbbb )||_bbea <=10{if _bbgg .scanBand ("\u006f\u0074\u0068e\u0072",_eceb ,_dccbd (_bcee ,_bccf ),_eceb .minDepth (),_eceb .maxDepth (),_bage ,false ,true )> 0{_dagfe =true ; +};};};};_dadbb =append (_dadbb ,_eceb );};};return _dadbb ;};func _ccgb (_dabb ,_eafc _fd .PdfRectangle )bool {return _dabb .Llx <=_eafc .Llx &&_eafc .Urx <=_dabb .Urx &&_dabb .Lly <=_eafc .Lly &&_eafc .Ury <=_dabb .Ury ;};func (_efef *subpath )close (){if !_fcfde (_efef ._gcac [0],_efef .last ()){_efef .add (_efef ._gcac [0]); +};_efef ._gdgde =true ;_efef .removeDuplicates ();};func (_abbg *textWord )appendMark (_fgdfg *textMark ,_ccbfc _fd .PdfRectangle ){_abbg ._fcdae =append (_abbg ._fcdae ,_fgdfg );_abbg .PdfRectangle =_gcacd (_abbg .PdfRectangle ,_fgdfg .PdfRectangle ); +if _fgdfg ._cbdd > _abbg ._gage {_abbg ._gage =_fgdfg ._cbdd ;};_abbg ._dcggb =_ccbfc .Ury -_abbg .PdfRectangle .Lly ;}; + +// ExtractTextWithStats works like ExtractText but returns the number of characters in the output +// (`numChars`) and the number of characters that were not decoded (`numMisses`). +func (_efge *Extractor )ExtractTextWithStats ()(_aag string ,_aebb int ,_bdgb int ,_abdf error ){_cgd ,_aebb ,_bdgb ,_abdf :=_efge .ExtractPageText ();if _abdf !=nil {return "",_aebb ,_bdgb ,_abdf ;};return _cgd .Text (),_aebb ,_bdgb ,nil ;};func _efgef (_bgfb *PageText )error {_eacca :=_bb .GetLicenseKey (); +if _eacca !=nil &&_eacca .IsLicensed ()||_fcb {return nil ;};_bc .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_bc .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f"); +return _dea .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};var (_aggb =_dea .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072"); +_gcee =_dea .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _fbgbac (_ggfff []*textMark ,_dcdf _fd .PdfRectangle ,_geca bool )[]*textWord {var _fcgba []*textWord ;var _decad *textWord ;if _egeg {_df .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_ggfff )); +};_beagg :=func (){if _decad !=nil {_egae :=_decad .computeText ();if !_cbcgg (_egae ){_decad ._gabac =_egae ;_fcgba =append (_fcgba ,_decad );if _egeg {_df .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_fcgba )-1,_decad .String ()); +for _agda ,_fgdfe :=range _decad ._fcdae {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_agda ,_fgdfe .String ());};};};_decad =nil ;};};for _ ,_aadc :=range _ggfff {if _ebga &&_decad !=nil &&len (_decad ._fcdae )> 0{_fcdb :=_decad ._fcdae [len (_decad ._fcdae )-1]; +_cabeg ,_gfdf :=_cebcef (_aadc ._dfdde );_aaadc ,_bgbcd :=_cebcef (_fcdb ._dfdde );if _gfdf &&!_bgbcd &&_fcdb .inDiacriticArea (_aadc ){_decad .addDiacritic (_cabeg );continue ;};if _bgbcd &&!_gfdf &&_aadc .inDiacriticArea (_fcdb ){_decad ._fcdae =_decad ._fcdae [:len (_decad ._fcdae )-1]; +_decad .appendMark (_aadc ,_dcdf );_decad .addDiacritic (_aaadc );continue ;};};_cbbdf :=_cbcgg (_aadc ._dfdde );if _cbbdf {_beagg ();continue ;};if _decad ==nil &&!_cbbdf {_decad =_bfgb ([]*textMark {_aadc },_dcdf );continue ;};_dcge :=_decad ._gage ; +_cafgf :=_g .Abs (_fbbc (_dcdf ,_aadc )-_decad ._dcggb )/_dcge ;_gfcda :=_babb (_aadc ,_decad )/_dcge ;_cbcgb :=_decad .Width ()+_aadc .Width ();if _geca &&_aadc ._edgad .Angle ()!=0.0{_feegg :=_g .Abs (_cbcgb *_g .Tan (_aadc ._edgad .Angle ())/_dcge ); +if _cafgf > _feegg {_cafgf -=_feegg ;}else {_cafgf =0.0;};};if _gfcda >=_aeba ||!(-_aadb <=_gfcda &&_cafgf <=_ebgd ){_beagg ();_decad =_bfgb ([]*textMark {_aadc },_dcdf );continue ;};_decad .appendMark (_aadc ,_dcdf );};_beagg ();return _fcgba ;};func (_defb *wordBag )depthRange (_bdba ,_dafg int )[]int {var _ddae []int ; +for _gddf :=range _defb ._edaag {if _bdba <=_gddf &&_gddf <=_dafg {_ddae =append (_ddae ,_gddf );};};if len (_ddae )==0{return nil ;};_bg .Ints (_ddae );return _ddae ;};func _dag (_bcb []string ,_bba int ,_bac int ){for _agg ,_egb :=_bba ,_bac -1;_agg < _egb ; +_agg ,_egb =_agg +1,_egb -1{_bf :=_bcb [_agg ];_bcb [_agg ]=_bcb [_egb ];_bcb [_egb ]=_bf ;};};func (_aaccc rulingList )toGrids ()[]rulingList {if _bfdd {_df .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_aaccc );};_ccdea :=_aaccc .intersections (); +if _bfdd {_df .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_aaccc ),len (_ccdea ));for _ ,_aace :=range _bfcdb (_ccdea ){_bc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_aace ,_ccdea [_aace ]); +};};_gdgda :=make (map[int ]intSet ,len (_aaccc ));for _addb :=range _aaccc {_agddgf :=_aaccc .connections (_ccdea ,_addb );if len (_agddgf )> 0{_gdgda [_addb ]=_agddgf ;};};if _bfdd {_df .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_gdgda )); +for _ ,_bcce :=range _bfcdb (_gdgda ){_bc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_bcce ,_gdgda [_bcce ]);};};_bcea :=_cbbd (len (_aaccc ),func (_abge ,_eacad int )bool {_gcdbe ,_cabe :=len (_gdgda [_abge ]),len (_gdgda [_eacad ]);if _gcdbe !=_cabe {return _gcdbe > _cabe ; +};return _aaccc .comp (_abge ,_eacad );});if _bfdd {_df .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_bcea );};_eeaea :=[][]int {{_bcea [0]}};_bfcdd :for _ ,_daggc :=range _bcea [1:]{for _aeffa ,_gdgc :=range _eeaea {for _ ,_abfga :=range _gdgc {if _gdgda [_abfga ].has (_daggc ){_eeaea [_aeffa ]=append (_gdgc ,_daggc ); +continue _bfcdd ;};};};_eeaea =append (_eeaea ,[]int {_daggc });};if _bfdd {_df .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_eeaea );};_bg .SliceStable (_eeaea ,func (_gadf ,_deec int )bool {return len (_eeaea [_gadf ])> len (_eeaea [_deec ])}); +for _ ,_gfacc :=range _eeaea {_bg .Slice (_gfacc ,func (_gcfc ,_fgea int )bool {return _aaccc .comp (_gfacc [_gcfc ],_gfacc [_fgea ])});};_degb :=make ([]rulingList ,len (_eeaea ));for _cfbgd ,_debff :=range _eeaea {_cceg :=make (rulingList ,len (_debff )); +for _gebd ,_egfgg :=range _debff {_cceg [_gebd ]=_aaccc [_egfgg ];};_degb [_cfbgd ]=_cceg ;};if _bfdd {_df .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_degb );};var _acefb []rulingList ;for _ ,_gddb :=range _degb {if _ceeg ,_deagg :=_gddb .isActualGrid (); +_deagg {_gddb =_ceeg ;_gddb =_gddb .snapToGroups ();_acefb =append (_acefb ,_gddb );};};if _bfdd {_gcebd ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_acefb );_df .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_degb ),len (_acefb )); +};return _acefb ;};func (_dgae *shapesState )establishSubpath ()*subpath {_cbde ,_fafa :=_dgae .lastpointEstablished ();if !_fafa {_dgae ._bgf =append (_dgae ._bgf ,_bfcbce (_cbde ));};if len (_dgae ._bgf )==0{return nil ;};_dgae ._baffg =false ;return _dgae ._bgf [len (_dgae ._bgf )-1]; +};func _fdddc (_gagd map[float64 ]map[float64 ]gridTile )[]float64 {_bddg :=make ([]float64 ,0,len (_gagd ));for _bcdgg :=range _gagd {_bddg =append (_bddg ,_bcdgg );};_bg .Float64s (_bddg );_cbgea :=len (_bddg );for _cccda :=0;_cccda < _cbgea /2;_cccda ++{_bddg [_cccda ],_bddg [_cbgea -1-_cccda ]=_bddg [_cbgea -1-_cccda ],_bddg [_cccda ]; +};return _bddg ;};func _fbbc (_bdcfc _fd .PdfRectangle ,_dddc bounded )float64 {return _bdcfc .Ury -_dddc .bbox ().Lly };func (_bdea paraList )findTables (_cdbb []gridTiling )[]*textTable {_bdea .addNeighbours ();_bg .Slice (_bdea ,func (_gdbfa ,_cbfb int )bool {return _feee (_bdea [_gdbfa ],_bdea [_cbfb ])< 0}); +var _fdagg []*textTable ;if _dfge {_bacae :=_bdea .findGridTables (_cdbb );_fdagg =append (_fdagg ,_bacae ...);};if _eecdc {_fcgc :=_bdea .findTextTables ();_fdagg =append (_fdagg ,_fcgc ...);};return _fdagg ;};func _ggadf (_afedb []pathSection ){if _egaff < 0.0{return ; +};if _bfdd {_df .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_afedb ));};for _cbffa ,_cedbd :=range _afedb {for _dfbed ,_efdea :=range _cedbd ._ddgc {for _gfffc ,_cgeg :=range _efdea ._gcac {_efdea ._gcac [_gfffc ]=_aec .Point {X :_bdffd (_cgeg .X ),Y :_bdffd (_cgeg .Y )}; +if _bfdd {_bfdf :=_efdea ._gcac [_gfffc ];if !_fcfde (_cgeg ,_bfdf ){_dadbc :=_aec .Point {X :_bfdf .X -_cgeg .X ,Y :_bfdf .Y -_cgeg .Y };_bc .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cbffa ,_dfbed ,_gfffc ,_cgeg ,_bfdf ,_dadbc ); +};};};};};};func (_ecgg *textPara )depth ()float64 {if _ecgg ._gfadg {return -1.0;};if len (_ecgg ._bgdfb )> 0{return _ecgg ._bgdfb [0]._eefeg ;};return _ecgg ._bbdgd .depth ();};func _cecff (_edfa ,_gdged float64 )bool {return _edfa /_g .Max (_babd ,_gdged )< _gffbb }; + + +// ToTextMark returns the public view of `tm`. +func (_agaa *textMark )ToTextMark ()TextMark {return TextMark {Text :_agaa ._dfdde ,Original :_agaa ._aeeb ,BBox :_agaa ._cgcb ,Font :_agaa ._edbb ,FontSize :_agaa ._cbdd ,FillColor :_agaa ._agfcg ,StrokeColor :_agaa ._acdc ,Orientation :_agaa ._gdcb ,DirectObject :_agaa ._gbae ,ObjString :_agaa ._abfde ,Tw :_agaa .Tw ,Th :_agaa .Th ,Tc :_agaa ._bbgc ,Index :_agaa ._ceag }; +}; + +// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText. +// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful. +// +// Replace with a function like Extract() (*PageText, error) +func (_abdfd *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_dde ,_bgd ,_bea ,_afdb :=_abdfd .extractPageText (_abdfd ._fed ,_abdfd ._dd ,_aec .IdentityMatrix (),0,false );if _afdb !=nil &&_afdb !=_fd .ErrColorOutOfRange {return nil ,0,0,_afdb ; +};if _abdfd ._cab !=nil {_dde ._fcaa ._egfd =_abdfd ._cab .ExtractionMode ;};_dde .computeViews ();_afdb =_efgef (_dde );if _afdb !=nil {return nil ,0,0,_afdb ;};if _abdfd ._cab !=nil {if _abdfd ._cab .ApplyCropBox &&_abdfd ._bdc !=nil {_dde .ApplyArea (*_abdfd ._bdc ); +};_dde ._fcaa ._affg =_abdfd ._cab .DisableDocumentTags ;};return _dde ,_bgd ,_bea ,nil ;}; + +// TextMarkArray is a collection of TextMarks. +type TextMarkArray struct{_dbce []TextMark };func _bagf (_dgbe []*textLine )map[float64 ][]*textLine {_bg .Slice (_dgbe ,func (_dcbf ,_bagc int )bool {return _dgbe [_dcbf ]._eefeg < _dgbe [_bagc ]._eefeg });_bcfda :=map[float64 ][]*textLine {};for _ ,_faaag :=range _dgbe {_ggff :=_fccf (_faaag ); +_ggff =_g .Round (_ggff );_bcfda [_ggff ]=append (_bcfda [_ggff ],_faaag );};return _bcfda ;};func _beaf (_dfca string )bool {if _e .RuneCountInString (_dfca )< _caefg {return false ;};_afgf ,_ccag :=_e .DecodeLastRuneInString (_dfca );if _ccag <=0||!_eb .Is (_eb .Hyphen ,_afgf ){return false ; +};_afgf ,_ccag =_e .DecodeLastRuneInString (_dfca [:len (_dfca )-_ccag ]);return _ccag > 0&&!_eb .IsSpace (_afgf );};func (_caff rulingList )snapToGroups ()rulingList {_eafa ,_gbac :=_caff .vertsHorzs ();if len (_eafa )> 0{_eafa =_eafa .snapToGroupsDirection (); +};if len (_gbac )> 0{_gbac =_gbac .snapToGroupsDirection ();};_bgga :=append (_eafa ,_gbac ...);_bgga .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _bgga ;};func _gbcd (_cfec []*textLine ,_fcfe string )string {var _aaeb _c .Builder ; +_dfda :=0.0;for _dafe ,_eaca :=range _cfec {_gedc :=_eaca .text ();_gabed :=_eaca ._eefeg ;if _dafe < len (_cfec )-1{_dfda =_cfec [_dafe +1]._eefeg ;}else {_dfda =0.0;};_aaeb .WriteString (_fcfe );_aaeb .WriteString (_gedc );if _dfda !=_gabed {_aaeb .WriteString ("\u000a"); +}else {_aaeb .WriteString ("\u0020");};};return _aaeb .String ();};func (_fcefe paraList )readBefore (_gcdd []int ,_gcbeg ,_dfaf int )bool {_cffcc ,_eabba :=_fcefe [_gcbeg ],_fcefe [_dfaf ];if _dfaeg (_cffcc ,_eabba )&&_cffcc .Lly > _eabba .Lly {return true ; +};if !(_cffcc ._ecedc .Urx < _eabba ._ecedc .Llx ){return false ;};_fdada ,_cdfdc :=_cffcc .Lly ,_eabba .Lly ;if _fdada > _cdfdc {_cdfdc ,_fdada =_fdada ,_cdfdc ;};_cdcd :=_g .Max (_cffcc ._ecedc .Llx ,_eabba ._ecedc .Llx );_eced :=_g .Min (_cffcc ._ecedc .Urx ,_eabba ._ecedc .Urx ); +_fcga :=_fcefe .llyRange (_gcdd ,_fdada ,_cdfdc );for _ ,_cbaeg :=range _fcga {if _cbaeg ==_gcbeg ||_cbaeg ==_dfaf {continue ;};_addg :=_fcefe [_cbaeg ];if _addg ._ecedc .Llx <=_eced &&_cdcd <=_addg ._ecedc .Urx {return false ;};};return true ;};type textState struct{_bdab float64 ; +_cdeb float64 ;_fgcd float64 ;_eadg float64 ;_ddeg float64 ;_bag RenderMode ;_abbcb float64 ;_fedd *_fd .PdfFont ;_aecb _fd .PdfRectangle ;_acdb int ;_dcea int ;};func (_eaafd paraList )eventNeighbours (_cagec []event )map[*textPara ][]int {_bg .Slice (_cagec ,func (_eedb ,_ggaeb int )bool {_cccbf ,_afafa :=_cagec [_eedb ],_cagec [_ggaeb ]; +_cdddf ,_aeec :=_cccbf ._aeef ,_afafa ._aeef ;if _cdddf !=_aeec {return _cdddf < _aeec ;};if _cccbf ._ceedf !=_afafa ._ceedf {return _cccbf ._ceedf ;};return _eedb < _ggaeb ;});_ggbc :=make (map[int ]intSet );_fffcc :=make (intSet );for _ ,_efddb :=range _cagec {if _efddb ._ceedf {_ggbc [_efddb ._cfgde ]=make (intSet ); +for _cafdb :=range _fffcc {if _cafdb !=_efddb ._cfgde {_ggbc [_efddb ._cfgde ].add (_cafdb );_ggbc [_cafdb ].add (_efddb ._cfgde );};};_fffcc .add (_efddb ._cfgde );}else {_fffcc .del (_efddb ._cfgde );};};_dgcaa :=map[*textPara ][]int {};for _dfbeg ,_feeec :=range _ggbc {_ffgb :=_eaafd [_dfbeg ]; +if len (_feeec )==0{_dgcaa [_ffgb ]=nil ;continue ;};_bbabf :=make ([]int ,len (_feeec ));_eccg :=0;for _bbcde :=range _feeec {_bbabf [_eccg ]=_bbcde ;_eccg ++;};_dgcaa [_ffgb ]=_bbabf ;};return _dgcaa ;};type stateStack []*textState ;func (_edceb lineRuling )asRuling ()(*ruling ,bool ){_bgbe :=ruling {_fbgb :_edceb ._acbd ,Color :_edceb .Color ,_bcab :_bcfag }; +switch _edceb ._acbd {case _bfdef :_bgbe ._faad =_edceb .xMean ();_bgbe ._feae =_g .Min (_edceb ._cebge .Y ,_edceb ._cfcd .Y );_bgbe ._feagb =_g .Max (_edceb ._cebge .Y ,_edceb ._cfcd .Y );case _caec :_bgbe ._faad =_edceb .yMean ();_bgbe ._feae =_g .Min (_edceb ._cebge .X ,_edceb ._cfcd .X ); +_bgbe ._feagb =_g .Max (_edceb ._cebge .X ,_edceb ._cfcd .X );default:_df .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_edceb ._acbd );return nil ,false ;};return &_bgbe ,true ; +};func (_ddd *subpath )add (_gaac ..._aec .Point ){_ddd ._gcac =append (_ddd ._gcac ,_gaac ...)};type ruling struct{_fbgb rulingKind ;_bcab markKind ;_ff .Color ;_faad float64 ;_feae float64 ;_feagb float64 ;_bccfg float64 ;};func _cedd (_cgeac ,_cbgf ,_fdgf ,_ecbfe *textPara )*textTable {_cfgcge :=&textTable {_deafe :2,_bfffcc :2,_afcbd :make (map[uint64 ]*textPara ,4)}; +_cfgcge .put (0,0,_cgeac );_cfgcge .put (1,0,_cbgf );_cfgcge .put (0,1,_fdgf );_cfgcge .put (1,1,_ecbfe );return _cfgcge ;};func (_ddagc *ruling )alignsPrimary (_fbgf *ruling )bool {return _ddagc ._fbgb ==_fbgf ._fbgb &&_g .Abs (_ddagc ._faad -_fbgf ._faad )< _fbc *0.5; +};func _cege (_ccca *wordBag ,_ccbdc *textWord ,_edbf float64 )bool {return _ccbdc .Llx < _ccca .Urx +_edbf &&_ccca .Llx -_edbf < _ccbdc .Urx ;};func (_eeag *shapesState )fill (_egfga *[]pathSection ){_cfbb :=pathSection {_ddgc :_eeag ._bgf ,Color :_eeag ._eefg .getFillColor ()}; +*_egfga =append (*_egfga ,_cfbb );if _bfdd {_gebac :=_cfbb .bbox ();_bc .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_egfga ),len (_cfbb ._ddgc ),_eeag ,_cfbb .Color ,_gebac ,_gebac .Width (),_gebac .Height ()); +if _cgde {for _ceee ,_agcgd :=range _cfbb ._ddgc {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ceee ,_agcgd );if _ceee ==10{break ;};};};};};func (_caed *textObject )setFont (_fbb string ,_eeebe float64 )error {if _caed ==nil {return nil ; +};_caed ._ddgb ._ddeg =_eeebe ;_dgcbe ,_becd :=_caed .getFont (_fbb );if _becd !=nil {return _becd ;};_caed ._ddgb ._fedd =_dgcbe ;return nil ;};func _fccf (_edab *textLine )float64 {return _edab ._eecg [0].Llx };func _abce (_dage byte )bool {for _ ,_agbf :=range _begf {if []byte (_agbf )[0]==_dage {return true ; +};};return false ;};func (_abab rulingList )snapToGroupsDirection ()rulingList {_abab .sortStrict ();_acdbb :=make (map[*ruling ]rulingList ,len (_abab ));_daac :=_abab [0];_ggca :=func (_cedgb *ruling ){_daac =_cedgb ;_acdbb [_daac ]=rulingList {_cedgb }}; +_ggca (_abab [0]);for _ ,_eecdcf :=range _abab [1:]{if _eecdcf ._faad < _daac ._faad -_acde {_df .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_daac ,_eecdcf ); +};if _eecdcf ._faad > _daac ._faad +_fbc {_ggca (_eecdcf );}else {_acdbb [_daac ]=append (_acdbb [_daac ],_eecdcf );};};_eecda :=make (map[*ruling ]float64 ,len (_acdbb ));_degcac :=make (map[*ruling ]*ruling ,len (_abab ));for _dfgee ,_eddea :=range _acdbb {_eecda [_dfgee ]=_eddea .mergePrimary (); +for _ ,_dcbca :=range _eddea {_degcac [_dcbca ]=_dfgee ;};};for _ ,_gdaed :=range _abab {_gdaed ._faad =_eecda [_degcac [_gdaed ]];};_egba :=make (rulingList ,0,len (_abab ));for _ ,_egec :=range _acdbb {_bfaaf :=_egec .splitSec ();for _gbeb ,_dgbeg :=range _bfaaf {_fgedc :=_dgbeg .merge (); +if len (_egba )> 0{_fcec :=_egba [len (_egba )-1];if _fcec .alignsPrimary (_fgedc )&&_fcec .alignsSec (_fgedc ){_df .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_gbeb ,_fcec ,_fgedc ); +continue ;};};_egba =append (_egba ,_fgedc );};};_egba .sortStrict ();return _egba ;};func (_ebfa paraList )extractTables (_fdfd []gridTiling )paraList {if _adfgd {_df .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ebfa )); +};if len (_ebfa )< _bacc {return _ebfa ;};_cfbbf :=_ebfa .findTables (_fdfd );if _adfgd {_df .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_cfbbf )); +for _fddb ,_cccada :=range _cfbbf {_cccada .log (_bc .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_fddb ));};};return _ebfa .applyTables (_cfbbf );};func _bcfba (_eaeaf _fd .PdfRectangle ,_fcgef []*textLine )*textPara {return &textPara {PdfRectangle :_eaeaf ,_bgdfb :_fcgef }; +};type textTable struct{_fd .PdfRectangle ;_deafe ,_bfffcc int ;_bbcb bool ;_afcbd map[uint64 ]*textPara ;_bbgcf map[uint64 ]compositeCell ;};func _afgbe (_cecce ,_aebgg _aec .Point )bool {_ceba :=_g .Abs (_cecce .X -_aebgg .X );_acfb :=_g .Abs (_cecce .Y -_aebgg .Y ); +return _cecff (_acfb ,_ceba );};func _ebba (_fdaac []*textLine ,_gafc ,_gbdf float64 )[]*textLine {var _cefdc []*textLine ;for _ ,_dgaba :=range _fdaac {if _gafc ==-1{if _dgaba ._eefeg > _gbdf {_cefdc =append (_cefdc ,_dgaba );};}else {if _dgaba ._eefeg > _gbdf &&_dgaba ._eefeg < _gafc {_cefdc =append (_cefdc ,_dgaba ); +};};};return _cefdc ;};func (_dffe *textLine )pullWord (_cdbg *wordBag ,_ccffe *textWord ,_dfcb int ){_dffe .appendWord (_ccffe );_cdbg .removeWord (_ccffe ,_dfcb );}; + +// NewEditor returns a new Editor object +func NewEditor (reader *_fd .PdfReader )*Editor {return &Editor {_dgb :reader }};type intSet map[int ]struct{}; + +// PageText represents the layout of text on a device page. +type PageText struct{_facbc []*textMark ;_dcag string ;_cgf []TextMark ;_gbab []TextTable ;_fdcd _fd .PdfRectangle ;_bcgae []pathSection ;_ffad []pathSection ;_eebg *_fd .StructTreeRoot ;_ddegf _gc .PdfObject ;_bbge *_cb .ContentStreamOperations ;_fcaa PageTextOptions ; +};var _dadb string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029"; +type lists []*list ;func _gfad (_bcedd *textLine ,_baae []*textLine ,_cdfd ,_edgd float64 )[]*textLine {_gdef :=[]*textLine {};for _ ,_defg :=range _baae {if _defg ._eefeg >=_cdfd {if _edgd !=-1&&_defg ._eefeg < _edgd {if _defg .text ()!=_bcedd .text (){if _g .Round (_defg .Llx )< _g .Round (_bcedd .Llx ){break ; +};_gdef =append (_gdef ,_defg );};}else if _edgd ==-1{if _defg ._eefeg ==_bcedd ._eefeg {if _defg .text ()!=_bcedd .text (){_gdef =append (_gdef ,_defg );};continue ;};_bbdg :=_ccagd (_bcedd ,_baae );if _bbdg !=-1&&_defg ._eefeg <=_bbdg {_gdef =append (_gdef ,_defg ); +};};};};return _gdef ;};func _feegf (_aeed _gc .PdfObject ,_bafb _ff .Color )(_ae .Image ,error ){_gebab ,_cfeca :=_gc .GetStream (_aeed );if !_cfeca {return nil ,nil ;};_fgba ,_bccc :=_fd .NewXObjectImageFromStream (_gebab );if _bccc !=nil {return nil ,_bccc ; +};_ccbcg ,_bccc :=_fgba .ToImage ();if _bccc !=nil {return nil ,_bccc ;};return _ddbg (_ccbcg ,_bafb ),nil ;};type rulingKind int ; + +// ExtractText processes and extracts all text data in content streams and returns as a string. +// It takes into account character encodings in the PDF file, which are decoded by +// CharcodeBytesToUnicode. +// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = �). +func (_abbe *Extractor )ExtractText ()(string ,error ){_agef ,_ ,_ ,_dffg :=_abbe .ExtractTextWithStats ();return _agef ,_dffg ;};func _cdf (_ceec []Font ,_dda string )bool {for _ ,_gdd :=range _ceec {if _gdd .FontName ==_dda {return true ;};};return false ; +};func _gfgc (_cfab []int )[]int {_geec :=make ([]int ,len (_cfab ));for _agfcf ,_fade :=range _cfab {_geec [len (_cfab )-1-_agfcf ]=_fade ;};return _geec ;};func (_aega rulingList )toTilings ()(rulingList ,[]gridTiling ){_aega .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s"); +if len (_aega )==0{return nil ,nil ;};_aega =_aega .tidied ("\u0061\u006c\u006c");_aega .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_beae :=_aega .toGrids ();_faff :=make ([]gridTiling ,len (_beae ));for _gbabc ,_cgcae :=range _beae {_faff [_gbabc ]=_cgcae .asTiling (); +};return _aega ,_faff ;}; + +// Options extractor options. +type Options struct{ + +// DisableDocumentTags specifies whether to use the document tags during list extraction. +DisableDocumentTags bool ; + +// ApplyCropBox will extract page text based on page cropbox if set to `true`. +ApplyCropBox bool ; + +// Text extraction mode, default is ExtractionModeLayout. +ExtractionMode ExtractionMode ; + +// IncludeAnnotations specifies whether to include annotations in the extraction process, default value is `false`. +IncludeAnnotations bool ; + +// RelaxedMode specifies whether to use relaxed mode for processing the objects, +// If enabled UniPDF will automatically try to fix invalid parameters length and value. +// Default is `false`. +RelaxedMode bool ;};func (_aafdf *ruling )equals (_fdbe *ruling )bool {return _aafdf ._fbgb ==_fdbe ._fbgb &&_bfagg (_aafdf ._faad ,_fdbe ._faad )&&_bfagg (_aafdf ._feae ,_fdbe ._feae )&&_bfagg (_aafdf ._feagb ,_fdbe ._feagb );};func _dfaeg (_bbee ,_gacef *textPara )bool {return _ebdf (_bbee ._ecedc ,_gacef ._ecedc )}; + + +// ExtractionMode defines different types of extraction mode. +type ExtractionMode int ;func _fdebf (_afbd *list ,_cfcf *_c .Builder ,_agca *string ){_dddg :=_feag (_afbd ,_agca );_cfcf .WriteString (_dddg );for _ ,_febc :=range _afbd ._eegf {_fbfgd :=*_agca +"\u0020\u0020\u0020";_fdebf (_febc ,_cfcf ,&_fbfgd );}; +};func (_eeaf *textObject )getFontDict (_ecd string )(_fagf _gc .PdfObject ,_fgccg error ){_cebd :=_eeaf ._gcbc ;if _cebd ==nil {_df .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_ecd ); +return nil ,nil ;};_fagf ,_bbcf :=_cebd .GetFontByName (_gc .PdfObjectName (_ecd ));if !_bbcf {_df .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_ecd ); +return nil ,_dea .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _fagf ,nil ;};func (_abaf *textTable )markCells (){for _feadb :=0;_feadb < _abaf ._bfffcc ;_feadb ++{for _gaee :=0; +_gaee < _abaf ._deafe ;_gaee ++{_dbfc :=_abaf .get (_gaee ,_feadb );if _dbfc !=nil {_dbfc ._ddega =true ;};};};};func (_edaad paraList )sortTopoOrder (){_abbb :=_edaad .topoOrder ();_edaad .reorder (_abbb )};func (_gbfaa *textTable )get (_gbbfb ,_egfdd int )*textPara {return _gbfaa ._afcbd [_fbacd (_gbbfb ,_egfdd )]; +};func _adad (_cagd _fd .PdfRectangle )*ruling {return &ruling {_fbgb :_bfdef ,_faad :_cagd .Llx ,_feae :_cagd .Lly ,_feagb :_cagd .Ury };};func _dddef (_fdae ,_gdeg _aec .Point ,_eedad _ff .Color )(*ruling ,bool ){_fadgc :=lineRuling {_cebge :_fdae ,_cfcd :_gdeg ,_acbd :_gffbd (_fdae ,_gdeg ),Color :_eedad }; +if _fadgc ._acbd ==_bfba {return nil ,false ;};return _fadgc .asRuling ();};func _decce (_bada ,_fdde int )int {if _bada > _fdde {return _bada ;};return _fdde ;};func (_gbfc *shapesState )quadraticTo (_deagd ,_feegd ,_dfddc ,_cgcc float64 ){if _face {_df .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a"); +};_gbfc .addPoint (_dfddc ,_cgcc );};func (_beff *textObject )reset (){_beff ._def =_aec .IdentityMatrix ();_beff ._febb =_aec .IdentityMatrix ();_beff ._agfc =nil ;}; + +// PageFonts represents extracted fonts on a PDF page. +type PageFonts struct{Fonts []Font ;};func (_fecd paraList )topoOrder ()[]int {if _acab {_df .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_daggf :=len (_fecd );_abba :=make ([]bool ,_daggf );_fgbg :=make ([]int ,0,_daggf ); +_gaacf :=_fecd .llyOrdering ();var _degfa func (_baegf int );_degfa =func (_feab int ){_abba [_feab ]=true ;for _fcceg :=0;_fcceg < _daggf ;_fcceg ++{if !_abba [_fcceg ]{if _fecd .readBefore (_gaacf ,_feab ,_fcceg ){_degfa (_fcceg );};};};_fgbg =append (_fgbg ,_feab ); +};for _fgdce :=0;_fgdce < _daggf ;_fgdce ++{if !_abba [_fgdce ]{_degfa (_fgdce );};};return _gfgc (_fgbg );};func (_dfcff *textLine )toTextMarks (_bcfa *int )[]TextMark {var _daec []TextMark ;for _ ,_gdga :=range _dfcff ._eecg {if _gdga ._debgd {_daec =_fafacf (_daec ,_bcfa ,"\u0020"); +};_baeg :=_gdga .toTextMarks (_bcfa );_daec =append (_daec ,_baeg ...);};return _daec ;};func (_ebc *textObject )setWordSpacing (_eece float64 ){if _ebc ==nil {return ;};_ebc ._ddgb ._cdeb =_eece ;};func (_eadd *PageText )computeViews (){if _eadd ._fcaa ._egfd ==ExtractionModePlain {_eadd ._dcag =_eadd .getText (); +return ;};_cdbf :=_eadd .getParagraphs ();_dedf :=new (_de .Buffer );_cdbf .writeText (_dedf );_eadd ._dcag =_dedf .String ();_eadd ._cgf =_cdbf .toTextMarks ();_eadd ._gbab =_cdbf .tables ();if _adfgd {_df .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_eadd ._gbab )); +};};func (_baef *PageFonts )extractPageResourcesToFont (_aefb *_fd .PdfPageResources )error {if _aefb .Font ==nil {return _dea .New (_eeb );};_fag ,_bbd :=_gc .GetDict (_aefb .Font );if !_bbd {return _dea .New (_age );};for _ ,_ffg :=range _fag .Keys (){var (_dbd =true ; +_cfe []byte ;_eda string ;);_dcgg ,_dcb :=_aefb .GetFontByName (_ffg );if !_dcb {return _dea .New (_daf );};_bddc ,_ac :=_fd .NewPdfFontFromPdfObject (_dcgg );if _ac !=nil {return _ac ;};_gee :=_bddc .FontDescriptor ();_fce :=_bddc .FontDescriptor ().FontName .String (); +_eea :=_bddc .Subtype ();if _cdf (_baef .Fonts ,_fce ){continue ;};if len (_bddc .ToUnicode ())==0{_dbd =false ;};if _gee .FontFile !=nil {if _dbab ,_abc :=_gc .GetStream (_gee .FontFile );_abc {_cfe ,_ac =_gc .DecodeStream (_dbab );if _ac !=nil {return _ac ; +};_eda =_fce +"\u002e\u0070\u0066\u0062";};}else if _gee .FontFile2 !=nil {if _cfb ,_dab :=_gc .GetStream (_gee .FontFile2 );_dab {_cfe ,_ac =_gc .DecodeStream (_cfb );if _ac !=nil {return _ac ;};_eda =_fce +"\u002e\u0074\u0074\u0066";};}else if _gee .FontFile3 !=nil {if _bfd ,_dge :=_gc .GetStream (_gee .FontFile3 ); +_dge {_cfe ,_ac =_gc .DecodeStream (_bfd );if _ac !=nil {return _ac ;};_eda =_fce +"\u002e\u0063\u0066\u0066";};};if len (_eda )< 1{_df .Log .Debug (_fga );};_ecae :=Font {FontName :_fce ,PdfFont :_bddc ,IsCID :_bddc .IsCID (),IsSimple :_bddc .IsSimple (),ToUnicode :_dbd ,FontType :_eea ,FontData :_cfe ,FontFileName :_eda ,FontDescriptor :_gee }; +_baef .Fonts =append (_baef .Fonts ,_ecae );};return nil ;};func (_cfef *textObject )setTextMatrix (_gba []float64 ){if len (_gba )!=6{_df .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_gba )); +return ;};_adce ,_dbef ,_eebc ,_ebbe ,_bfcb ,_bgef :=_gba [0],_gba [1],_gba [2],_gba [3],_gba [4],_gba [5];_cfef ._def =_aec .NewMatrix (_adce ,_dbef ,_eebc ,_ebbe ,_bfcb ,_bgef );_cfef ._febb =_cfef ._def ;};func (_edcfd *textMark )inDiacriticArea (_bdgbd *textMark )bool {_abfgc :=_edcfd .Llx -_bdgbd .Llx ; +_gdgf :=_edcfd .Urx -_bdgbd .Urx ;_ddc :=_edcfd .Lly -_bdgbd .Lly ;return _g .Abs (_abfgc +_gdgf )< _edcfd .Width ()*_bbdc &&_g .Abs (_ddc )< _edcfd .Height ()*_bbdc ;};type list struct{_ddage []*textLine ;_ecbgd string ;_eegf []*list ;_eeca string ;}; +func _faga (_debgg []compositeCell )[]float64 {var _aaag []*textLine ;_bdge :=0;for _ ,_defbe :=range _debgg {_bdge +=len (_defbe .paraList );_aaag =append (_aaag ,_defbe .lines ()...);};_bg .Slice (_aaag ,func (_bgcca ,_ebbae int )bool {_edec ,_adfbe :=_aaag [_bgcca ],_aaag [_ebbae ]; +_eceec ,_bggb :=_edec ._eefeg ,_adfbe ._eefeg ;if !_cfdab (_eceec -_bggb ){return _eceec < _bggb ;};return _edec .Llx < _adfbe .Llx ;});if _adfgd {_bc .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_bdge ,len (_aaag )); +for _cebaf ,_bdcfa :=range _aaag {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cebaf ,_bdcfa );};};var _eeced []float64 ;_bfbfb :=_aaag [0];var _ddgac [][]*textLine ;_cdfcf :=[]*textLine {_bfbfb };for _dddefe ,_eaefa :=range _aaag [1:]{if _eaefa .Ury < _bfbfb .Lly {_bdeeg :=0.5*(_eaefa .Ury +_bfbfb .Lly ); +if _adfgd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_dddefe ,_eaefa .Ury ,_bfbfb .Lly ,_bdeeg ,_bfbfb ,_eaefa ); +};_eeced =append (_eeced ,_bdeeg );_ddgac =append (_ddgac ,_cdfcf );_cdfcf =nil ;};_cdfcf =append (_cdfcf ,_eaefa );if _eaefa .Lly < _bfbfb .Lly {_bfbfb =_eaefa ;};};if len (_cdfcf )> 0{_ddgac =append (_ddgac ,_cdfcf );};if _adfgd {_bc .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_eeced ); +};if _adfgd {_df .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_debgg ));for _ecfagd ,_adgb :=range _debgg {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ecfagd ,_adgb );};_df .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_ddgac )); +for _gccfa ,_dbbef :=range _ddgac {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_gccfa ,len (_dbbef ));for _cgbbd ,_gaea :=range _dbbef {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cgbbd ,_gaea );};};};_baedb :=true ; +for _eeggb ,_ffff :=range _ddgac {_dfeec :=true ;for _fccef ,_geae :=range _debgg {if _adfgd {_bc .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_eeggb ,len (_ddgac ),_fccef ,len (_debgg ),_geae ); +};if !_geae .hasLines (_ffff ){if _adfgd {_bc .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_eeggb ,len (_ddgac ),_fccef ,len (_debgg )); +};_dfeec =false ;break ;};};if !_dfeec {_baedb =false ;break ;};};if !_baedb {if _adfgd {_df .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg"); +};_eeced =nil ;};if _adfgd &&_eeced !=nil {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_eeced );};return _eeced ; +};func (_dcdd *textWord )absorb (_efbd *textWord ){_dcdd .PdfRectangle =_gcacd (_dcdd .PdfRectangle ,_efbd .PdfRectangle );_dcdd ._fcdae =append (_dcdd ._fcdae ,_efbd ._fcdae ...);};func (_cfbf lineRuling )xMean ()float64 {return 0.5*(_cfbf ._cebge .X +_cfbf ._cfcd .X )}; +func _fdfg (_aebba *_fd .Image ,_beagc _ff .Color )_ae .Image {_cdde ,_gbefe :=int (_aebba .Width ),int (_aebba .Height );_cdddc :=_ae .NewRGBA (_ae .Rect (0,0,_cdde ,_gbefe ));for _agebg :=0;_agebg < _gbefe ;_agebg ++{for _gged :=0;_gged < _cdde ;_gged ++{_afdce ,_becge :=_aebba .ColorAt (_gged ,_agebg ); +if _becge !=nil {_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_gged ,_agebg ); +continue ;};_cbccbd ,_bagfd ,_eaeb ,_ :=_afdce .RGBA ();var _eaege _ff .Color ;if _cbccbd +_bagfd +_eaeb ==0{_eaege =_ff .Transparent ;}else {_eaege =_beagc ;};_cdddc .Set (_gged ,_agebg ,_eaege );};};return _cdddc ;}; + +// PageImages represents extracted images on a PDF page with spatial information: +// display position and size. +type PageImages struct{Images []ImageMark ;};func (_fggba rulingList )splitSec ()[]rulingList {_bg .Slice (_fggba ,func (_ccfcg ,_cffce int )bool {_fgbe ,_dceec :=_fggba [_ccfcg ],_fggba [_cffce ];if _fgbe ._feae !=_dceec ._feae {return _fgbe ._feae < _dceec ._feae ; +};return _fgbe ._feagb < _dceec ._feagb ;});_badde :=make (map[*ruling ]struct{},len (_fggba ));_ggef :=func (_dcce *ruling )rulingList {_cedc :=rulingList {_dcce };_badde [_dcce ]=struct{}{};for _ ,_agaf :=range _fggba {if _ ,_gedae :=_badde [_agaf ]; +_gedae {continue ;};for _ ,_cadcc :=range _cedc {if _agaf .alignsSec (_cadcc ){_cedc =append (_cedc ,_agaf );_badde [_agaf ]=struct{}{};break ;};};};return _cedc ;};_gbgg :=[]rulingList {_ggef (_fggba [0])};for _ ,_efec :=range _fggba [1:]{if _ ,_dbfdf :=_badde [_efec ]; +_dbfdf {continue ;};_gbgg =append (_gbgg ,_ggef (_efec ));};return _gbgg ;};func (_cdaa rulingList )mergePrimary ()float64 {_ccbbf :=_cdaa [0]._faad ;for _ ,_efaa :=range _cdaa [1:]{_ccbbf +=_efaa ._faad ;};return _ccbbf /float64 (len (_cdaa ));}; + +// BidiText represents a bidi text organized in its visual order +// with base direction of the text. +type BidiText struct{_gf string ;_eg string ;};func _bfagg (_dddd ,_badga float64 )bool {return _g .Abs (_dddd -_badga )<=_eedab };func (_gbcgf *wordBag )getDepthIdx (_accad float64 )int {_fdef :=_gbcgf .depthIndexes ();_beeb :=_aggfc (_accad );if _beeb < _fdef [0]{return _fdef [0]; +};if _beeb > _fdef [len (_fdef )-1]{return _fdef [len (_fdef )-1];};return _beeb ;};func (_bccge paraList )merge ()*textPara {_df .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bccge )); +if len (_bccge )==0{return nil ;};_bccge .sortReadingOrder ();_efbaa :=_bccge [0].PdfRectangle ;_abcfd :=_bccge [0]._bgdfb ;for _ ,_fbee :=range _bccge [1:]{_efbaa =_gcacd (_efbaa ,_fbee .PdfRectangle );_abcfd =append (_abcfd ,_fbee ._bgdfb ...);};return _bcfba (_efbaa ,_abcfd ); +};func (_cdce *textTable )compositeColCorridors ()map[int ][]float64 {_ggag :=make (map[int ][]float64 ,_cdce ._deafe );if _adfgd {_df .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_cdce ._deafe ); +};for _fbgc :=0;_fbgc < _cdce ._deafe ;_fbgc ++{_ggag [_fbgc ]=nil ;};return _ggag ;};func (_bfae TextTable )getCellInfo (_gcaa TextMark )[][]int {for _egee ,_bfeg :=range _bfae .Cells {for _effc :=range _bfeg {_cbe :=&_bfeg [_effc ].Marks ;if _cbe .exists (_gcaa ){return [][]int {{_egee },{_effc }}; +};};};return nil ;}; + +// Replace takes a pattern and replaces all the texts that much the pattern with `replacement`. +func (_add *Editor )Replace (pattern string ,replacement string ,pages []int )error {_ada :=map[int ]Match {};for _ ,_bfdec :=range pages {_dfa ,_gbff :=_add ._dgb .GetPage (_bfdec );if _gbff !=nil {return _gbff ;};_fagc ,_gbff :=New (_dfa );if _gbff !=nil {return _gbff ; +};_gfcfd ,_ ,_ ,_gbff :=_fagc .ExtractPageText ();if _gbff !=nil {return _gbff ;};_adde :="";_cbcc :=_gfcfd .Text ();_gbca ,_gbff :=_fecf (pattern ,_cbcc );if _gbff !=nil {return _gbff ;};_fae :=_gfcfd .Marks ();_dfc :=[]Box {};for _ ,_eag :=range _gbca {_bfac ,_abfg ,_ggg :=_cfeg (_eag ,_fae ,pattern ); +if _ggg !=nil {return _ggg ;};_bgba :=_dafd (_bfac );_dfc =append (_dfc ,_abfg );replacement ,_ggg =_ged (_bgba ,replacement ,pattern );if _ggg !=nil {return _ggg ;};};_adde =_gfcfd .GetContentStreamOps ().String ();_adf :=Match {Pattern :pattern ,Indexes :_gbca ,Locations :_dfc }; +_dfa .SetContentStreams ([]string {_adde },_gc .NewFlateEncoder ());_ada [_bfdec ]=_adf ;};return nil ;}; + +// String returns a human readable description of `vecs`. +func (_gbdgd rulingList )String ()string {if len (_gbdgd )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_fdbc ,_defcf :=_gbdgd .vertsHorzs ();_edbg :=len (_fdbc );_ggce :=len (_defcf );if _edbg ==0||_ggce ==0{return _bc .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_edbg ,_ggce ); +};_edadf :=_fd .PdfRectangle {Llx :_fdbc [0]._faad ,Urx :_fdbc [_edbg -1]._faad ,Lly :_defcf [_ggce -1]._faad ,Ury :_defcf [0]._faad };return _bc .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_edbg ,_ggce ,_edadf ); +};func (_ebgaf rulingList )connections (_eeggc map[int ]intSet ,_gega int )intSet {_acce :=make (intSet );_eaddb :=make (intSet );var _ebcd func (int );_ebcd =func (_efgb int ){if !_eaddb .has (_efgb ){_eaddb .add (_efgb );for _degfeb :=range _ebgaf {if _eeggc [_degfeb ].has (_efgb ){_acce .add (_degfeb ); +};};for _addbc :=range _ebgaf {if _acce .has (_addbc ){_ebcd (_addbc );};};};};_ebcd (_gega );return _acce ;};func _ggaa (_cfdgf []*textMark ,_eabb _fd .PdfRectangle )string {_df .Log .Trace ("\u006d\u0061\u006b\u0065\u0053i\u006d\u0070\u006c\u0065\u0054\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020e\u006c\u0065\u006d\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",len (_cfdgf ),_eabb ); +_ggfacd :="";if len (_cfdgf )==0{return _ggfacd ;};_aegg :=_fbgbac (_cfdgf ,_eabb ,true );if len (_aegg )==0{return _ggfacd ;};_adedf :=0.0;_dbefg :=true ;_eaea :="";for _ ,_agddg :=range _aegg {_gfbg :=_agddg ._gage ;if _gfbg > _dbbg {_gfbg =_dbbg ;}; +if (_agddg ._dcggb -_adedf > _eabab *_gfbg &&_adedf !=0.0)||(_adedf -_agddg ._dcggb > _gfbg *10){_cecba :=_bbb ([]rune (_eaea ));_eaea =_cecba ._gf ;_eaea +="\u000a";_ggfacd +=_eaea ;_eaea ="";}else {if !_dbefg {_eaea +="\u0020";};};_eaea +=_agddg ._gabac ; +_dbefg =false ;_adedf =_agddg ._dcggb ;};if _eaea !=""{_cfbbd :=_bbb ([]rune (_eaea ));_eaea =_cfbbd ._gf ;_eaea +="\u000a";_ggfacd +=_eaea ;};return _ggfacd ;};func (_eggg paraList )sortReadingOrder (){_df .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_eggg )); +if len (_eggg )<=1{return ;};_eggg .computeEBBoxes ();_bg .Slice (_eggg ,func (_bcfbg ,_fdda int )bool {return _ffbfc (_eggg [_bcfbg ],_eggg [_fdda ])<=0});};func (_cdagg *textObject )getFont (_agga string )(*_fd .PdfFont ,error ){if _cdagg ._faec ._afd !=nil {_gggc ,_egcgc :=_cdagg .getFontDict (_agga ); +if _egcgc !=nil {_df .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_agga ,_egcgc .Error ());return nil ,_egcgc ; +};_cdagg ._faec ._ddg ++;_egfe ,_aeea :=_cdagg ._faec ._afd [_gggc .String ()];if _aeea {_egfe ._cbda =_cdagg ._faec ._ddg ;return _egfe ._feea ,nil ;};};_cag ,_gebaa :=_cdagg .getFontDict (_agga );if _gebaa !=nil {return nil ,_gebaa ;};_abbf ,_gebaa :=_cdagg .getFontDirect (_agga ); +if _gebaa !=nil {return nil ,_gebaa ;};if _cdagg ._faec ._afd !=nil {_aeeaf :=fontEntry {_abbf ,_cdagg ._faec ._ddg };if len (_cdagg ._faec ._afd )>=_degc {var _fddd []string ;for _cdd :=range _cdagg ._faec ._afd {_fddd =append (_fddd ,_cdd );};_bg .Slice (_fddd ,func (_dgfb ,_degfe int )bool {return _cdagg ._faec ._afd [_fddd [_dgfb ]]._cbda < _cdagg ._faec ._afd [_fddd [_degfe ]]._cbda ; +});delete (_cdagg ._faec ._afd ,_fddd [0]);};_cdagg ._faec ._afd [_cag .String ()]=_aeeaf ;};return _abbf ,nil ;};func (_fbdbf compositeCell )parasBBox ()(paraList ,_fd .PdfRectangle ){return _fbdbf .paraList ,_fbdbf .PdfRectangle ;};type imageExtractContext struct{_dcc []ImageMark ; +_baeb int ;_gced int ;_eccf int ;_cfae map[*_gc .PdfObjectStream ]*cachedImage ;_fea *ImageExtractOptions ;_dfb bool ;};func (_dgecb *wordBag )minDepth ()float64 {return _dgecb ._fba -(_dgecb .Ury -_dgecb ._dffab )};func _ecgaa (_bfbc _aec .Matrix )_aec .Point {_fddf ,_debf :=_bfbc .Translation (); +return _aec .Point {X :_fddf ,Y :_debf };};func (_fdfc *wordBag )depthIndexes ()[]int {if len (_fdfc ._edaag )==0{return nil ;};_cccb :=make ([]int ,len (_fdfc ._edaag ));_dcdb :=0;for _bbfg :=range _fdfc ._edaag {_cccb [_dcdb ]=_bbfg ;_dcdb ++;};_bg .Ints (_cccb ); +return _cccb ;};func (_fbab *textTable )bbox ()_fd .PdfRectangle {return _fbab .PdfRectangle };func (_aedff *textTable )computeBbox ()_fd .PdfRectangle {var _fecgc _fd .PdfRectangle ;_ffcag :=false ;for _edcfe :=0;_edcfe < _aedff ._bfffcc ;_edcfe ++{for _babeb :=0; +_babeb < _aedff ._deafe ;_babeb ++{_cafd :=_aedff .get (_babeb ,_edcfe );if _cafd ==nil {continue ;};if !_ffcag {_fecgc =_cafd .PdfRectangle ;_ffcag =true ;}else {_fecgc =_gcacd (_fecgc ,_cafd .PdfRectangle );};};};return _fecgc ;};func _ecee (_egfdg float64 )bool {return _g .Abs (_egfdg )< _fbc }; +func (_dcdef *textTable )toTextTable ()TextTable {if _adfgd {_df .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_dcdef ._deafe ,_dcdef ._bfffcc );};_bgdad :=make ([][]TableCell ,_dcdef ._bfffcc ); +for _adbgd :=0;_adbgd < _dcdef ._bfffcc ;_adbgd ++{_bgdad [_adbgd ]=make ([]TableCell ,_dcdef ._deafe );for _eadgb :=0;_eadgb < _dcdef ._deafe ;_eadgb ++{_ggeg :=_dcdef .get (_eadgb ,_adbgd );if _ggeg ==nil {continue ;};_eead (_ggeg ._bgdfb );if _adfgd {_bc .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_eadgb ,_adbgd ,_ggeg ); +};_bgdad [_adbgd ][_eadgb ].Text =_ggeg .text ();_bgdg :=0;_bgdad [_adbgd ][_eadgb ].Marks ._dbce =_ggeg .toTextMarks (&_bgdg );};};_fgcdg :=TextTable {W :_dcdef ._deafe ,H :_dcdef ._bfffcc ,Cells :_bgdad };_fgcdg .PdfRectangle =_dcdef .bbox ();return _fgcdg ; +};func (_acbf *textTable )reduce ()*textTable {_bacg :=make ([]int ,0,_acbf ._bfffcc );_beec :=make ([]int ,0,_acbf ._deafe );for _bddca :=0;_bddca < _acbf ._bfffcc ;_bddca ++{if !_acbf .emptyCompositeRow (_bddca ){_bacg =append (_bacg ,_bddca );};};for _egffd :=0; +_egffd < _acbf ._deafe ;_egffd ++{if !_acbf .emptyCompositeColumn (_egffd ){_beec =append (_beec ,_egffd );};};if len (_bacg )==_acbf ._bfffcc &&len (_beec )==_acbf ._deafe {return _acbf ;};_eecf :=textTable {_bbcb :_acbf ._bbcb ,_deafe :len (_beec ),_bfffcc :len (_bacg ),_afcbd :make (map[uint64 ]*textPara ,len (_beec )*len (_bacg ))}; +if _adfgd {_df .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_acbf ._deafe ,_acbf ._bfffcc ,len (_beec ),len (_bacg ));_df .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_beec ); +_df .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_bacg );};for _dfccd ,_bgdba :=range _bacg {for _egegf ,_gffgc :=range _beec {_bcgea ,_gccg :=_acbf .getComposite (_gffgc ,_bgdba );if _bcgea ==nil {continue ; +};if _adfgd {_bc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_egegf ,_dfccd ,_gffgc ,_bgdba ,_fdafd (_bcgea .merge ().text (),50));};_eecf .putComposite (_egegf ,_dfccd ,_bcgea ,_gccg ); +};};return &_eecf ;};func _fggg (_aecd ,_gacec float64 )string {_dcaef :=!_cfdab (_aecd -_gacec );if _dcaef {return "\u000a";};return "\u0020";};func (_bced *wordBag )highestWord (_ddee int ,_cdgdb ,_cfbg float64 )*textWord {for _ ,_faag :=range _bced ._edaag [_ddee ]{if _cdgdb <=_faag ._dcggb &&_faag ._dcggb <=_cfbg {return _faag ; +};};return nil ;};func _dgabd (_eeaee ,_bdcd _aec .Point )rulingKind {_caab :=_g .Abs (_eeaee .X -_bdcd .X );_adca :=_g .Abs (_eeaee .Y -_bdcd .Y );return _ecebf (_caab ,_adca ,_gffbb );};func (_cacc *wordBag )maxDepth ()float64 {return _cacc ._fba -_cacc .Lly }; +const (ExtractionModeLayout =iota ;ExtractionModePlain ;ExtractionModeLayoutNoBreaks ;);func (_dbde *subpath )last ()_aec .Point {return _dbde ._gcac [len (_dbde ._gcac )-1]};func _cfeg (_bdfe []int ,_cecb *TextMarkArray ,_ddgg string )(*TextMarkArray ,Box ,error ){_cdffg :=Box {}; +_dgcc :=_bdfe [0];_gbb :=_bdfe [1];_ceac :=len (_ddgg )-len (_c .TrimLeft (_ddgg ,"\u0020"));_gfe :=len (_ddgg )-len (_c .TrimRight (_ddgg ,"\u0020\u000a"));_dgcc =_dgcc +_ceac ;_gbb =_gbb -_gfe ;_cbfe ,_cffe :=_cecb .RangeOffset (_dgcc ,_gbb );if _cffe !=nil {return nil ,_cdffg ,_cffe ; +};_facd ,_dfcc :=_cbfe .BBox ();if !_dfcc {return nil ,_cdffg ,_bc .Errorf ("\u0073\u0070\u0061\u006e\u004d\u0061\u0072\u006bs\u002e\u0042\u0042ox\u0020\u0068\u0061\u0073\u0020\u006eo\u0020\u0062\u006f\u0075\u006e\u0064\u0069\u006e\u0067\u0020\u0062\u006f\u0078\u002e\u0020s\u0070\u0061\u006e\u004d\u0061\u0072\u006b\u0073=\u0025\u0073",_cbfe ); +};_cdffg =Box {BBox :_facd };return _cbfe ,_cdffg ,nil ;};func (_ecge *ruling )alignsSec (_dfcg *ruling )bool {const _ebfb =_fbc +1.0;return _ecge ._feae -_ebfb <=_dfcg ._feagb &&_dfcg ._feae -_ebfb <=_ecge ._feagb ;};func (_gdba *shapesState )lastpointEstablished ()(_aec .Point ,bool ){if _gdba ._baffg {return _gdba ._fabe ,false ; +};_dac :=len (_gdba ._bgf );if _dac > 0&&_gdba ._bgf [_dac -1]._gdgde {return _gdba ._bgf [_dac -1].last (),false ;};return _aec .Point {},true ;};var _fcb =false ;func (_cffc *textObject )moveText (_cce ,_gcdb float64 ){_cffc .moveLP (_cce ,_gcdb )};func _agff (_gfda ,_dgdd int )int {if _gfda < _dgdd {return _gfda ; +};return _dgdd ;}; + +// ExtractFonts returns all font information from the page extractor, including +// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more. +// +// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document. +// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries. +// +// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range. +func (_dcg *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_ccg :=PageFonts {};_dae :=_ccg .extractPageResourcesToFont (_dcg ._dd );if _dae !=nil {return nil ,_dae ;};if previousPageFonts !=nil {for _ ,_fcad :=range previousPageFonts .Fonts {if !_cdf (_ccg .Fonts ,_fcad .FontName ){_ccg .Fonts =append (_ccg .Fonts ,_fcad ); +};};};return &PageFonts {Fonts :_ccg .Fonts },nil ;};func (_ecebef paraList )inTile (_bddgd gridTile )paraList {var _aegc paraList ;for _ ,_ddcd :=range _ecebef {if _bddgd .contains (_ddcd .PdfRectangle ){_aegc =append (_aegc ,_ddcd );};};if _adfgd {_bc .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_bddgd ,len (_aegc )); +for _gffga ,_gebfc :=range _aegc {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gffga ,_gebfc );};_bc .Println ("");};return _aegc ;}; + +// ExtractStrokePaths processes and extracts all stroke paths in content streams. +func (_eef *Extractor )ExtractStrokePaths ()([]StrokePath ,error ){_dfg ,_ ,_ ,_fcd :=_eef .ExtractPageText ();if _fcd !=nil {return nil ,_fcd ;};_fdba :=[]StrokePath {};for _ ,_ecbf :=range _dfg ._bcgae {_fgg :=StrokePath {Color :_ecbf .Color ,Points :[]_aec .Point {}}; +for _ ,_deaa :=range _ecbf ._ddgc {_fgg .Points =append (_fgg .Points ,_deaa ._gcac ...);};_fdba =append (_fdba ,_fgg );};return _fdba ,nil ;}; + +// ToText returns the page text as a single string. +// Deprecated: This function is deprecated and will be removed in a future major version. Please use +// Text() instead. +func (_bdbc PageText )ToText ()string {return _bdbc .Text ()};func (_faba paraList )yNeighbours (_bfacd float64 )map[*textPara ][]int {_bcbgc :=make ([]event ,2*len (_faba ));if _bfacd ==0{for _fcgbe ,_dfgdg :=range _faba {_bcbgc [2*_fcgbe ]=event {_dfgdg .Lly ,true ,_fcgbe }; +_bcbgc [2*_fcgbe +1]=event {_dfgdg .Ury ,false ,_fcgbe };};}else {for _addd ,_dcdcd :=range _faba {_bcbgc [2*_addd ]=event {_dcdcd .Lly -_bfacd *_dcdcd .fontsize (),true ,_addd };_bcbgc [2*_addd +1]=event {_dcdcd .Ury +_bfacd *_dcdcd .fontsize (),false ,_addd }; +};};return _faba .eventNeighbours (_bcbgc );};func _cddac (_aadbb []rulingList )(rulingList ,rulingList ){var _abcfdc rulingList ;for _ ,_ddcba :=range _aadbb {_abcfdc =append (_abcfdc ,_ddcba ...);};return _abcfdc .vertsHorzs ();}; + +// New returns an Extractor instance for extracting content from the input PDF page. +func New (page *_fd .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};func (_ddbb paraList )addNeighbours (){_ffdae :=func (_ceeed []int ,_agcfe *textPara )([]*textPara ,[]*textPara ){_gfbge :=make ([]*textPara ,0,len (_ceeed )-1);_ecdb :=make ([]*textPara ,0,len (_ceeed )-1); +for _ ,_adcc :=range _ceeed {_gbad :=_ddbb [_adcc ];if _gbad .Urx <=_agcfe .Llx {_gfbge =append (_gfbge ,_gbad );}else if _gbad .Llx >=_agcfe .Urx {_ecdb =append (_ecdb ,_gbad );};};return _gfbge ,_ecdb ;};_beef :=func (_cgae []int ,_fcee *textPara )([]*textPara ,[]*textPara ){_bcdab :=make ([]*textPara ,0,len (_cgae )-1); +_egfeb :=make ([]*textPara ,0,len (_cgae )-1);for _ ,_bbcd :=range _cgae {_aaebf :=_ddbb [_bbcd ];if _aaebf .Ury <=_fcee .Lly {_egfeb =append (_egfeb ,_aaebf );}else if _aaebf .Lly >=_fcee .Ury {_bcdab =append (_bcdab ,_aaebf );};};return _bcdab ,_egfeb ; +};_bfda :=_ddbb .yNeighbours (_fedb );for _ ,_dcda :=range _ddbb {_befa :=_bfda [_dcda ];if len (_befa )==0{continue ;};_ccfa ,_dbcea :=_ffdae (_befa ,_dcda );if len (_ccfa )==0&&len (_dbcea )==0{continue ;};if len (_ccfa )> 0{_afaf :=_ccfa [0];for _ ,_cfcdb :=range _ccfa [1:]{if _cfcdb .Urx >=_afaf .Urx {_afaf =_cfcdb ; +};};for _ ,_adegd :=range _ccfa {if _adegd !=_afaf &&_adegd .Urx > _afaf .Llx {_afaf =nil ;break ;};};if _afaf !=nil &&_cdffa (_dcda .PdfRectangle ,_afaf .PdfRectangle ){_dcda ._gfbb =_afaf ;};};if len (_dbcea )> 0{_fbdf :=_dbcea [0];for _ ,_ccbc :=range _dbcea [1:]{if _ccbc .Llx <=_fbdf .Llx {_fbdf =_ccbc ; +};};for _ ,_cfefb :=range _dbcea {if _cfefb !=_fbdf &&_cfefb .Llx < _fbdf .Urx {_fbdf =nil ;break ;};};if _fbdf !=nil &&_cdffa (_dcda .PdfRectangle ,_fbdf .PdfRectangle ){_dcda ._ebea =_fbdf ;};};};_bfda =_ddbb .xNeighbours (_ecbb );for _ ,_dace :=range _ddbb {_gaad :=_bfda [_dace ]; +if len (_gaad )==0{continue ;};_dacfad ,_gfbfa :=_beef (_gaad ,_dace );if len (_dacfad )==0&&len (_gfbfa )==0{continue ;};if len (_gfbfa )> 0{_eegac :=_gfbfa [0];for _ ,_dbfg :=range _gfbfa [1:]{if _dbfg .Ury >=_eegac .Ury {_eegac =_dbfg ;};};for _ ,_cdfe :=range _gfbfa {if _cdfe !=_eegac &&_cdfe .Ury > _eegac .Lly {_eegac =nil ; +break ;};};if _eegac !=nil &&_ebdf (_dace .PdfRectangle ,_eegac .PdfRectangle ){_dace ._gfafd =_eegac ;};};if len (_dacfad )> 0{_dgaa :=_dacfad [0];for _ ,_gdaea :=range _dacfad [1:]{if _gdaea .Lly <=_dgaa .Lly {_dgaa =_gdaea ;};};for _ ,_faecd :=range _dacfad {if _faecd !=_dgaa &&_faecd .Lly < _dgaa .Ury {_dgaa =nil ; +break ;};};if _dgaa !=nil &&_ebdf (_dace .PdfRectangle ,_dgaa .PdfRectangle ){_dace ._ebaf =_dgaa ;};};};for _ ,_ecdae :=range _ddbb {if _ecdae ._gfbb !=nil &&_ecdae ._gfbb ._ebea !=_ecdae {_ecdae ._gfbb =nil ;};if _ecdae ._ebaf !=nil &&_ecdae ._ebaf ._gfafd !=_ecdae {_ecdae ._ebaf =nil ; +};if _ecdae ._ebea !=nil &&_ecdae ._ebea ._gfbb !=_ecdae {_ecdae ._ebea =nil ;};if _ecdae ._gfafd !=nil &&_ecdae ._gfafd ._ebaf !=_ecdae {_ecdae ._gfafd =nil ;};};}; + +// ImageMark represents an image drawn on a page and its position in device coordinates. +// All coordinates are in device coordinates. +type ImageMark struct{Image *_fd .Image ; + +// Dimensions of the image as displayed in the PDF. +Width float64 ;Height float64 ; + +// Position of the image in PDF coordinates (lower left corner). +X float64 ;Y float64 ; + +// Angle in degrees, if rotated. +Angle float64 ;};func _fde (_gfd string ,_af bool ,_fdb bool )BidiText {_fad :="\u006c\u0074\u0072";if _fdb {_fad ="\u0074\u0074\u0062";}else if !_af {_fad ="\u0072\u0074\u006c";};return BidiText {_gf :_gfd ,_eg :_fad };};func _eddge (_aagd string )string {_bddd :=[]rune (_aagd ); +return string (_bddd [:len (_bddd )-1])};func _daed (_gaga int ,_egdbe map[int ][]float64 )([]int ,int ){_cffefd :=make ([]int ,_gaga );_cdffc :=0;for _cged :=0;_cged < _gaga ;_cged ++{_cffefd [_cged ]=_cdffc ;_cdffc +=len (_egdbe [_cged ])+1;};return _cffefd ,_cdffc ; +};func (_cbed *textTable )getRight ()paraList {_aedcc :=make (paraList ,_cbed ._bfffcc );for _ffed :=0;_ffed < _cbed ._bfffcc ;_ffed ++{_fgaec :=_cbed .get (_cbed ._deafe -1,_ffed )._ebea ;if _fgaec .taken (){return nil ;};_aedcc [_ffed ]=_fgaec ;};for _ageac :=0; +_ageac < _cbed ._bfffcc -1;_ageac ++{if _aedcc [_ageac ]._gfafd !=_aedcc [_ageac +1]{return nil ;};};return _aedcc ;};func _fcefc (_bfgc []TextMark ,_agdc *int )[]TextMark {_cfgeb :=_bfgc [len (_bfgc )-1];_eace :=[]rune (_cfgeb .Text );if len (_eace )==1{_bfgc =_bfgc [:len (_bfgc )-1]; +_gfdgg :=_bfgc [len (_bfgc )-1];*_agdc =_gfdgg .Offset +len (_gfdgg .Text );}else {_aaad :=_eddge (_cfgeb .Text );*_agdc +=len (_aaad )-len (_cfgeb .Text );_cfgeb .Text =_aaad ;};return _bfgc ;};func (_dagae rulingList )asTiling ()gridTiling {if _eagd {_df .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_dagae )); +};for _afcbe ,_ddaa :=range _dagae [1:]{_dbcgg :=_dagae [_afcbe ];if _dbcgg .alignsPrimary (_ddaa )&&_dbcgg .alignsSec (_ddaa ){_df .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_ddaa ,_dbcgg ); +};};_dagae .sortStrict ();_dagae .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_gcece ,_beaa :=_dagae .vertsHorzs ();_eacg :=_gcece .primaries ();_cdfcc :=_beaa .primaries ();_begc :=len (_eacg )-1;_cagf :=len (_cdfcc )-1;if _begc ==0||_cagf ==0{return gridTiling {}; +};_dfaa :=_fd .PdfRectangle {Llx :_eacg [0],Urx :_eacg [_begc ],Lly :_cdfcc [0],Ury :_cdfcc [_cagf ]};if _eagd {_df .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_gcece )); +for _ceagc ,_fgaf :=range _gcece {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ceagc ,_fgaf );};_df .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_beaa )); +for _cdcg ,_gaace :=range _beaa {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cdcg ,_gaace );};_df .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_begc ,_cagf ,_eacg ,_cdfcc ); +};_eaeda :=make ([]gridTile ,_begc *_cagf );for _bdee :=_cagf -1;_bdee >=0;_bdee --{_gcgc :=_cdfcc [_bdee ];_eacc :=_cdfcc [_bdee +1];for _gcad :=0;_gcad < _begc ;_gcad ++{_cfbbb :=_eacg [_gcad ];_gceg :=_eacg [_gcad +1];_cbef :=_gcece .findPrimSec (_cfbbb ,_gcgc ); +_cfbdaf :=_gcece .findPrimSec (_gceg ,_gcgc );_cgfea :=_beaa .findPrimSec (_gcgc ,_cfbbb );_aggcc :=_beaa .findPrimSec (_eacc ,_cfbbb );_cacce :=_fd .PdfRectangle {Llx :_cfbbb ,Urx :_gceg ,Lly :_gcgc ,Ury :_eacc };_fffaa :=_gcdf (_cacce ,_cbef ,_cfbdaf ,_cgfea ,_aggcc ); +_eaeda [_bdee *_begc +_gcad ]=_fffaa ;if _eagd {_bc .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gcad ,_bdee ,_fffaa .String (),_fffaa .Width (),_fffaa .Height ()); +};};};if _eagd {_df .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_dfaa ); +};_adbec :=make ([]map[float64 ]gridTile ,_cagf );for _efgc :=_cagf -1;_efgc >=0;_efgc --{if _eagd {_bc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_efgc );};_adbec [_efgc ]=make (map[float64 ]gridTile ,_begc );for _debg :=0;_debg < _begc ; +_debg ++{_fgab :=_eaeda [_efgc *_begc +_debg ];if _eagd {_bc .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_debg ,_fgab );};if !_fgab ._agbbg {continue ;};_egff :=_debg ;for _ggbd :=_debg +1;!_fgab ._cbag &&_ggbd < _begc ;_ggbd ++{_dedfb :=_eaeda [_efgc *_begc +_ggbd ]; +_fgab .Urx =_dedfb .Urx ;_fgab ._eecbb =_fgab ._eecbb ||_dedfb ._eecbb ;_fgab ._eeeaf =_fgab ._eeeaf ||_dedfb ._eeeaf ;_fgab ._cbag =_dedfb ._cbag ;if _eagd {_bc .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_ggbd ,_dedfb ,_fgab ); +};_egff =_ggbd ;};if _eagd {_bc .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_debg ,_egff ,_fgab );};_debg =_egff ;_adbec [_efgc ][_fgab .Llx ]=_fgab ;};};_afbcf :=make (map[float64 ]map[float64 ]gridTile ,_cagf ); +_gdagd :=make (map[float64 ]map[float64 ]struct{},_cagf );for _bfbd :=_cagf -1;_bfbd >=0;_bfbd --{_aecbgd :=_eaeda [_bfbd *_begc ].Lly ;_afbcf [_aecbgd ]=make (map[float64 ]gridTile ,_begc );_gdagd [_aecbgd ]=make (map[float64 ]struct{},_begc );};if _eagd {_df .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_dfaa ); +};for _ecgbg :=_cagf -1;_ecgbg >=0;_ecgbg --{_bfegb :=_eaeda [_ecgbg *_begc ].Lly ;_bfgca :=_adbec [_ecgbg ];if _eagd {_bc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ecgbg );};for _ ,_gfeegb :=range _befbd (_bfgca ){if _ ,_ffeca :=_gdagd [_bfegb ][_gfeegb ]; +_ffeca {continue ;};_dffb :=_bfgca [_gfeegb ];if _eagd {_bc .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_dffb .String ());};for _fgge :=_ecgbg -1;_fgge >=0;_fgge --{if _dffb ._eeeaf {break ;};_fbeb :=_adbec [_fgge ];_cgee ,_efabe :=_fbeb [_gfeegb ]; +if !_efabe {break ;};if _cgee .Urx !=_dffb .Urx {break ;};_dffb ._eeeaf =_cgee ._eeeaf ;_dffb .Lly =_cgee .Lly ;if _eagd {_bc .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_cgee .String (),_dffb .String ()); +};_gdagd [_cgee .Lly ][_cgee .Llx ]=struct{}{};};if _ecgbg ==0{_dffb ._eeeaf =true ;};if _dffb .complete (){_afbcf [_bfegb ][_gfeegb ]=_dffb ;};};};_edea :=gridTiling {PdfRectangle :_dfaa ,_afdaa :_fefga (_afbcf ),_cecbb :_fdddc (_afbcf ),_cdeeb :_afbcf }; +_edea .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _edea ;};func (_dgfeb *textWord )toTextMarks (_bbggb *int )[]TextMark {var _ceage []TextMark ;for _ ,_bedd :=range _dgfeb ._fcdae {_ceage =_gdag (_ceage ,_bbggb ,_bedd .ToTextMark ());};return _ceage ; +}; + +// TextTable represents a table. +// Cells are ordered top-to-bottom, left-to-right. +// Cells[y] is the (0-offset) y'th row in the table. +// Cells[y][x] is the (0-offset) x'th column in the table. +type TextTable struct{_fd .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};func _ged (_bdgd []*TextMarkArray ,_ddgf ,_fece string )(string ,error ){_gegc :=0;for _ ,_dgc :=range _bdgd {_adb :=_dgc ._dbce [0].DirectObject ;if _adb ==nil {continue ;};_cdb :=_adb .String (); +if len (_cdb )> 1{_afc :=_ccfb (_dgc ,&_ddgf ,&_gegc ,_fece );if _afc !=nil {return _ddgf ,_afc ;};}else if len (_cdb )==1{_degf :=_aba (_dgc ,&_ddgf ,&_gegc ,_fece );if _degf !=nil {return _ddgf ,_degf ;};};};return _ddgf ,nil ;}; + +// String returns a description of `w`. +func (_ggec *textWord )String ()string {return _bc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ggec ._dcggb ,_ggec .PdfRectangle ,_ggec ._gage ,_ggec ._gabac ); +};func (_fdab *textTable )subdivide ()*textTable {_fdab .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_ccagb :=_fdab .compositeRowCorridors ();_fdge :=_fdab .compositeColCorridors ();if _adfgd {_df .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_bdaf (_ccagb ),_bdaf (_fdge )); +};if len (_ccagb )==0||len (_fdge )==0{return _fdab ;};_cbegde (_ccagb );_cbegde (_fdge );if _adfgd {_df .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_bdaf (_ccagb ),_bdaf (_fdge )); +};_dgca ,_acac :=_daed (_fdab ._bfffcc ,_ccagb );_cgggf ,_dffed :=_daed (_fdab ._deafe ,_fdge );_gfdd :=make (map[uint64 ]*textPara ,_dffed *_acac );_adbeg :=&textTable {PdfRectangle :_fdab .PdfRectangle ,_bbcb :_fdab ._bbcb ,_bfffcc :_acac ,_deafe :_dffed ,_afcbd :_gfdd }; +if _adfgd {_df .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_fdab ._deafe ,_fdab ._bfffcc ,_dffed ,_acac ,_bdaf (_ccagb ),_bdaf (_fdge ),_dgca ,_cgggf ); +};for _abcb :=0;_abcb < _fdab ._bfffcc ;_abcb ++{_gadef :=_dgca [_abcb ];for _cbbb :=0;_cbbb < _fdab ._deafe ;_cbbb ++{_efbac :=_cgggf [_cbbb ];if _adfgd {_bc .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_cbbb ,_abcb ,_efbac ,_gadef ); +};_cddd ,_feffa :=_fdab ._bbgcf [_fbacd (_cbbb ,_abcb )];if !_feffa {continue ;};_afaea :=_cddd .split (_ccagb [_abcb ],_fdge [_cbbb ]);for _ecdc :=0;_ecdc < _afaea ._bfffcc ;_ecdc ++{for _cfebf :=0;_cfebf < _afaea ._deafe ;_cfebf ++{_aeccf :=_afaea .get (_cfebf ,_ecdc ); +_adbeg .put (_efbac +_cfebf ,_gadef +_ecdc ,_aeccf );if _adfgd {_bc .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_efbac +_cfebf ,_gadef +_ecdc ,_aeccf );};};};};};return _adbeg ;};func _badec (_ceabe []*textLine ,_afcdg map[float64 ][]*textLine ,_ecbbd []float64 ,_bgdf int ,_cfgd ,_dcca float64 )[]*list {_fddfa :=[]*list {}; +_bcag :=_bgdf ;_bgdf =_bgdf +1;_cdbe :=_ecbbd [_bcag ];_gbcc :=_afcdg [_cdbe ];_ggbg :=_ebba (_gbcc ,_dcca ,_cfgd );for _dcgbfg ,_ggfb :=range _ggbg {var _agcc float64 ;_eaac :=[]*list {};_cadg :=_ggfb ._eefeg ;_aac :=_dcca ;if _dcgbfg < len (_ggbg )-1{_aac =_ggbg [_dcgbfg +1]._eefeg ; +};if _bgdf < len (_ecbbd ){_eaac =_badec (_ceabe ,_afcdg ,_ecbbd ,_bgdf ,_cadg ,_aac );};_agcc =_aac ;if len (_eaac )> 0{_gefg :=_eaac [0];if len (_gefg ._ddage )> 0{_agcc =_gefg ._ddage [0]._eefeg ;};};_fedc :=[]*textLine {_ggfb };_aacc :=_gfad (_ggfb ,_ceabe ,_cadg ,_agcc ); +_fedc =append (_fedc ,_aacc ...);_cfbda :=_dead (_fedc ,"\u0062\u0075\u006c\u006c\u0065\u0074",_eaac );_cfbda ._eeca =_gbcd (_fedc ,"");_fddfa =append (_fddfa ,_cfbda );};return _fddfa ;};func (_cbc *imageExtractContext )extractContentStreamImages (_abcg string ,_gff *_fd .PdfPageResources )error {_aa :=_cb .NewContentStreamParser (_abcg ); +_bge ,_fec :=_aa .Parse ();if _fec !=nil {return _fec ;};if _cbc ._cfae ==nil {_cbc ._cfae =map[*_gc .PdfObjectStream ]*cachedImage {};};if _cbc ._fea ==nil {_cbc ._fea =&ImageExtractOptions {};};_ad :=_cb .NewContentStreamProcessor (*_bge );_ad .AddHandler (_cb .HandlerConditionEnumAllOperands ,"",_cbc .processOperand ); +return _ad .Process (_gff );};func _feee (_eeaa ,_daabb bounded )float64 {_fgdd :=_dfcf (_eeaa ,_daabb );if !_cfdab (_fgdd ){return _fgdd ;};return _dfad (_eeaa ,_daabb );};func _cdffa (_bbebe ,_bfbbe _fd .PdfRectangle )bool {return _bbebe .Lly <=_bfbbe .Ury &&_bfbbe .Lly <=_bbebe .Ury ; +};func _bgfe (_gacaa []pathSection )rulingList {_ggadf (_gacaa );if _bfdd {_df .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_gacaa )); +};var _ggcd rulingList ;for _ ,_dgge :=range _gacaa {for _ ,_fdggc :=range _dgge ._ddgc {if len (_fdggc ._gcac )< 2{continue ;};_aacge :=_fdggc ._gcac [0];for _ ,_ecdda :=range _fdggc ._gcac [1:]{if _bfagf ,_fbbe :=_dddef (_aacge ,_ecdda ,_dgge .Color ); +_fbbe {_ggcd =append (_ggcd ,_bfagf );};_aacge =_ecdda ;};};};if _bfdd {_df .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_ggcd );};return _ggcd ;};func _dfcf (_gcbe ,_bebf bounded )float64 {return _gcbe .bbox ().Llx -_bebf .bbox ().Llx }; +func (_dbbaa paraList )xNeighbours (_ddeag float64 )map[*textPara ][]int {_cdcfa :=make ([]event ,2*len (_dbbaa ));if _ddeag ==0{for _fddfag ,_dccd :=range _dbbaa {_cdcfa [2*_fddfag ]=event {_dccd .Llx ,true ,_fddfag };_cdcfa [2*_fddfag +1]=event {_dccd .Urx ,false ,_fddfag }; +};}else {for _abff ,_fbece :=range _dbbaa {_cdcfa [2*_abff ]=event {_fbece .Llx -_ddeag *_fbece .fontsize (),true ,_abff };_cdcfa [2*_abff +1]=event {_fbece .Urx +_ddeag *_fbece .fontsize (),false ,_abff };};};return _dbbaa .eventNeighbours (_cdcfa );}; +func _eeefe (_dbaf _gc .PdfObject ,_dbbee _ff .Color )(_ae .Image ,error ){_gcadd ,_fedba :=_gc .GetStream (_dbaf );if !_fedba {return nil ,nil ;};_eefbe ,_adadf :=_fd .NewXObjectImageFromStream (_gcadd );if _adadf !=nil {return nil ,_adadf ;};_dgbcg ,_adadf :=_eefbe .ToImage (); +if _adadf !=nil {return nil ,_adadf ;};return _fdfg (_dgbcg ,_dbbee ),nil ;};func (_decf rulingList )vertsHorzs ()(rulingList ,rulingList ){var _affae ,_adedc rulingList ;for _ ,_fcgefe :=range _decf {switch _fcgefe ._fbgb {case _bfdef :_affae =append (_affae ,_fcgefe ); +case _caec :_adedc =append (_adedc ,_fcgefe );};};return _affae ,_adedc ;};func _bfgb (_ggdad []*textMark ,_fbda _fd .PdfRectangle )*textWord {_eded :=_ggdad [0].PdfRectangle ;_bdfg :=_ggdad [0]._cbdd ;for _ ,_dceb :=range _ggdad [1:]{_eded =_gcacd (_eded ,_dceb .PdfRectangle ); +if _dceb ._cbdd > _bdfg {_bdfg =_dceb ._cbdd ;};};return &textWord {PdfRectangle :_eded ,_fcdae :_ggdad ,_dcggb :_fbda .Ury -_eded .Lly ,_gage :_bdfg };};func (_bedcd *textTable )isExportable ()bool {if _bedcd ._bbcb {return true ;};_aaebg :=func (_dffeg int )bool {_bedeb :=_bedcd .get (0,_dffeg ); +if _bedeb ==nil {return false ;};_abgf :=_bedeb .text ();_dffc :=_e .RuneCountInString (_abgf );_baaf :=_ecebe .MatchString (_abgf );return _dffc <=1||_baaf ;};for _ggdaba :=0;_ggdaba < _bedcd ._bfffcc ;_ggdaba ++{if !_aaebg (_ggdaba ){return true ;};}; +return false ;};func _cbegde (_gebff map[int ][]float64 ){if len (_gebff )<=1{return ;};_bgbf :=_abdc (_gebff );if _adfgd {_df .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_bgbf );};var _daeda ,_ebfba int ; +for _daeda ,_ebfba =range _bgbf {if _gebff [_ebfba ]!=nil {break ;};};for _dbdb ,_befg :=range _bgbf [_daeda :]{_ffdcd :=_gebff [_befg ];if _ffdcd ==nil {continue ;};if _adfgd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_daeda +_dbdb ,_ebfba ,_befg ); +};_dffde :=_gebff [_befg ];if _dffde [len (_dffde )-1]> _ffdcd [0]{_dffde [len (_dffde )-1]=_ffdcd [0];_gebff [_ebfba ]=_dffde ;};_ebfba =_befg ;};};type textMark struct{_fd .PdfRectangle ;_gdcb int ;_dfdde string ;_aeeb string ;_edbb *_fd .PdfFont ;_cbdd float64 ; +_bbgc float64 ;_edgad _aec .Matrix ;_gdcg _aec .Point ;_cgcb _fd .PdfRectangle ;_agfcg _ff .Color ;_acdc _ff .Color ;_gbae _gc .PdfObject ;_abfde []string ;Tw float64 ;Th float64 ;_cgaa int ;_ceag int ;};func _ccfb (_edd *TextMarkArray ,_agd *string ,_cgea *int ,_ebe string )error {_ecab :=_edd .Elements ()[0].DirectObject ; +_cfbc ,_ceg :=_gc .GetString (_ecab );if !_ceg {return _bc .Errorf ("\u0075n\u0061\u0062l\u0065\u0020\u0074\u006f \u0067\u0065\u0074 \u0073\u0074\u0072\u0069\u006e\u0067\u0020\u0042\u0079te\u0073\u0020\u0066r\u006f\u006d \u0064\u0069\u0072\u0065\u0063\u0074O\u0062\u006ae\u0063\u0074"); +};_cea :=_cgcg (_edd );_dbff ,_ceg :=_gc .GetStringBytes (_ecab );if !_ceg {return _gc .ErrTypeError ;};_efgg :=_edd .Elements ()[0].Font ;_fff :=_cdfc (_dbff ,_efgg );_cde :="";_cfff :=*_agd ;if len (_cfff )> *_cgea {_cde =_cfff [*_cgea :*_cgea +len (_cea )]; +}else if *_cgea ==len (_ebe )-1&&len (_cfff )> *_cgea {_cde =_cfff [*_cgea :];};_gag :="";_ccgc :=_c .Split (_fff ,"\u0020");_cdff :=_ccgc [len (_ccgc )-1];if _cdff ==_cea &&*_cgea ==0{_efdf :=_c .LastIndex (_fff ,_cea );_gag =_aeae (_fff ,_efdf ,len (_cea )+_efdf ,_cde ); +}else if *_cgea ==len (_ebe )-1&&len (_cfff )> *_cgea {_gag =_c .ReplaceAll (_fff ,_cea ,_cfff [*_cgea :]);}else {_gag =_c .Replace (_fff ,_cea ,_cde ,1);};_ecbc (_cfbc ,_gag ,_efgg );*_cgea +=len (_cea );return nil ;};func _bcee (_bfec *wordBag ,_dffgc *textWord ,_dcffb float64 )bool {return _bfec .Urx <=_dffgc .Llx &&_dffgc .Llx < _bfec .Urx +_dcffb ; +};func (_eeef *textLine )endsInHyphen ()bool {_fcdg :=_eeef ._eecg [len (_eeef ._eecg )-1];_fecb :=_fcdg ._gabac ;_fdfa ,_dgbc :=_e .DecodeLastRuneInString (_fecb );if _dgbc <=0||!_eb .Is (_eb .Hyphen ,_fdfa ){return false ;};if _fcdg ._debgd &&_beaf (_fecb ){return true ; +};return _beaf (_eeef .text ());};func (_bfaag *shapesState )stroke (_bcfbd *[]pathSection ){_bebd :=pathSection {_ddgc :_bfaag ._bgf ,Color :_bfaag ._eefg .getStrokeColor ()};*_bcfbd =append (*_bcfbd ,_bebd );if _bfdd {_bc .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_bcfbd ),_bfaag ,_bfaag ._eefg .getStrokeColor (),_bebd .bbox ()); +if _cgde {for _bfcbc ,_egcd :=range _bfaag ._bgf {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bfcbc ,_egcd );if _bfcbc ==10{break ;};};};};}; + +// String returns a string describing `ma`. +func (_cfdb TextMarkArray )String ()string {_aab :=len (_cfdb ._dbce );if _aab ==0{return "\u0045\u004d\u0050T\u0059";};_beg :=_cfdb ._dbce [0];_bbba :=_cfdb ._dbce [_aab -1];return _bc .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_aab ,_beg ,_bbba ); +};func _fcfde (_fbdee ,_fbgab _aec .Point )bool {return _fbdee .X ==_fbgab .X &&_fbdee .Y ==_fbgab .Y };func (_adeec *wordBag )firstReadingIndex (_deaad int )int {_cbad :=_adeec .firstWord (_deaad )._gage ;_debfg :=float64 (_deaad +1)*_gcfb ;_deae :=_debfg +_febf *_cbad ; +_fbfd :=_deaad ;for _ ,_dcgc :=range _adeec .depthBand (_debfg ,_deae ){if _dfcf (_adeec .firstWord (_dcgc ),_adeec .firstWord (_fbfd ))< 0{_fbfd =_dcgc ;};};return _fbfd ;};func (_bdga *textTable )logComposite (_ceabc string ){if !_adfgd {return ;};_df .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_bdga ._deafe ,_bdga ._bfffcc ,_ceabc ); +_bc .Printf ("\u0025\u0035\u0073 \u007c","");for _bbbdc :=0;_bbbdc < _bdga ._deafe ;_bbbdc ++{_bc .Printf ("\u0025\u0033\u0064 \u007c",_bbbdc );};_bc .Println ("");_bc .Printf ("\u0025\u0035\u0073 \u002b","");for _bdcde :=0;_bdcde < _bdga ._deafe ;_bdcde ++{_bc .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d"); +};_bc .Println ("");for _gfgbab :=0;_gfgbab < _bdga ._bfffcc ;_gfgbab ++{_bc .Printf ("\u0025\u0035\u0064 \u007c",_gfgbab );for _cccg :=0;_cccg < _bdga ._deafe ;_cccg ++{_dcdee ,_ :=_bdga ._bbgcf [_fbacd (_cccg ,_gfgbab )].parasBBox ();_bc .Printf ("\u0025\u0033\u0064 \u007c",len (_dcdee )); +};_bc .Println ("");};_df .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_bdga ._deafe ,_bdga ._bfffcc ,_ceabc );_bc .Printf ("\u0025\u0035\u0073 \u007c","");for _cdba :=0;_cdba < _bdga ._deafe ;_cdba ++{_bc .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_cdba ); +};_bc .Println ("");_bc .Printf ("\u0025\u0035\u0073 \u002b","");for _dabdb :=0;_dabdb < _bdga ._deafe ;_dabdb ++{_bc .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_bc .Println ("");for _afcg :=0;_afcg < _bdga ._bfffcc ; +_afcg ++{_bc .Printf ("\u0025\u0035\u0064 \u007c",_afcg );for _ebff :=0;_ebff < _bdga ._deafe ;_ebff ++{_agadf ,_ :=_bdga ._bbgcf [_fbacd (_ebff ,_afcg )].parasBBox ();_ceadf :="";_dfgc :=_agadf .merge ();if _dfgc !=nil {_ceadf =_dfgc .text ();};_ceadf =_bc .Sprintf ("\u0025\u0071",_fdafd (_ceadf ,12)); +_ceadf =_ceadf [1:len (_ceadf )-1];_bc .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_ceadf );};_bc .Println ("");};};var _aefc =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ff .White ,StrokeColor :_ff .White };type gridTiling struct{_fd .PdfRectangle ; +_afdaa []float64 ;_cecbb []float64 ;_cdeeb map[float64 ]map[float64 ]gridTile ;};func (_gfeb *textObject )showTextAdjusted (_dcbe *_gc .PdfObjectArray ,_agdd int ,_gfab string )error {_bbaf :=false ;for _ ,_dgdb :=range _dcbe .Elements (){switch _dgdb .(type ){case *_gc .PdfObjectFloat ,*_gc .PdfObjectInteger :_dgcf ,_fbg :=_gc .GetNumberAsFloat (_dgdb ); +if _fbg !=nil {_df .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_dgdb ,_dcbe ); +return _fbg ;};_gbcg ,_gda :=-_dgcf *0.001*_gfeb ._ddgb ._ddeg ,0.0;if _bbaf {_gda ,_gbcg =_gbcg ,_gda ;};_bfe :=_facc (_aec .Point {X :_gbcg ,Y :_gda });_gfeb ._def .Concat (_bfe );case *_gc .PdfObjectString :_aafb :=_gc .TraceToDirectObject (_dgdb ); +_gbe ,_cada :=_gc .GetStringBytes (_aafb );if !_cada {_df .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_dgdb ,_dcbe ); +return _gc .ErrTypeError ;};_gfeb .renderText (_aafb ,_gbe ,_agdd ,_gfab );default:_df .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_dgdb ,_dcbe ); +return _gc .ErrTypeError ;};};return nil ;};func (_dcdgd paraList )log (_cbce string ){if !_acab {return ;};_df .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_cbce ,len (_dcdgd )); +for _eafg ,_fdgbf :=range _dcdgd {if _fdgbf ==nil {continue ;};_agadb :=_fdgbf .text ();_eebfc :="\u0020\u0020";if _fdgbf ._bbdgd !=nil {_eebfc =_bc .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_fdgbf ._bbdgd ._deafe ,_fdgbf ._bbdgd ._bfffcc );};_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_eafg ,_fdgbf .PdfRectangle ,_eebfc ,_fdafd (_agadb ,50)); +};};func (_dgde *PageText )getText ()string {_dcga :="";_dcae :=len (_dgde ._facbc );for _badgf :=0;_badgf < 360&&_dcae > 0;_badgf +=90{_ccbd :=make ([]*textMark ,0,len (_dgde ._facbc )-_dcae );for _ ,_ffc :=range _dgde ._facbc {if _ffc ._gdcb ==_badgf {_ccbd =append (_ccbd ,_ffc ); +};};if len (_ccbd )> 0{_dcga +=_ggaa (_ccbd ,_dgde ._fdcd );_dcae -=len (_ccbd );};};return _dcga ;};func (_cgccd pathSection )bbox ()_fd .PdfRectangle {_cgcd :=_cgccd ._ddgc [0]._gcac [0];_aebc :=_fd .PdfRectangle {Llx :_cgcd .X ,Urx :_cgcd .X ,Lly :_cgcd .Y ,Ury :_cgcd .Y }; +_aeca :=func (_eeae _aec .Point ){if _eeae .X < _aebc .Llx {_aebc .Llx =_eeae .X ;}else if _eeae .X > _aebc .Urx {_aebc .Urx =_eeae .X ;};if _eeae .Y < _aebc .Lly {_aebc .Lly =_eeae .Y ;}else if _eeae .Y > _aebc .Ury {_aebc .Ury =_eeae .Y ;};};for _ ,_gea :=range _cgccd ._ddgc [0]._gcac [1:]{_aeca (_gea ); +};for _ ,_daea :=range _cgccd ._ddgc [1:]{for _ ,_gace :=range _daea ._gcac {_aeca (_gace );};};return _aebc ;};func (_ecgb *stateStack )size ()int {return len (*_ecgb )};func (_bgcf *wordBag )text ()string {_cgcaa :=_bgcf .allWords ();_gfcd :=make ([]string ,len (_cgcaa )); +for _cdee ,_bggd :=range _cgcaa {_gfcd [_cdee ]=_bggd ._gabac ;};return _c .Join (_gfcd ,"\u0020");}; + +// String returns a description of `b`. +func (_fggcg *wordBag )String ()string {var _aggf []string ;for _ ,_geega :=range _fggcg .depthIndexes (){_eaba :=_fggcg ._edaag [_geega ];for _ ,_ccdd :=range _eaba {_aggf =append (_aggf ,_ccdd ._gabac );};};return _bc .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_fggcg .PdfRectangle ,_fggcg ._dffab ,len (_aggf ),_aggf ); +};func (_ecdf *wordBag )removeWord (_cfgg *textWord ,_aecaa int ){_bfaee :=_ecdf ._edaag [_aecaa ];_bfaee =_ggbf (_bfaee ,_cfgg );if len (_bfaee )==0{delete (_ecdf ._edaag ,_aecaa );}else {_ecdf ._edaag [_aecaa ]=_bfaee ;};};var (_ccbae =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"}; +);func _ccagd (_aedc *textLine ,_aedf []*textLine )float64 {var _ffbcb float64 =-1;for _ ,_edebd :=range _aedf {if _edebd ._eefeg > _aedc ._eefeg {if _g .Round (_edebd .Llx )>=_g .Round (_aedc .Llx ){_ffbcb =_edebd ._eefeg ;}else {break ;};};};return _ffbcb ; +};func (_acaf *textObject )moveLP (_acfc ,_ccff float64 ){_acaf ._febb .Concat (_aec .NewMatrix (1,0,0,1,_acfc ,_ccff ));_acaf ._def =_acaf ._febb ;};func _gcegb (_eabca _fd .PdfColorspace ,_ffaa _fd .PdfColor )_ff .Color {if _eabca ==nil ||_ffaa ==nil {return _ff .Black ; +};_adebf ,_eddd :=_eabca .ColorToRGB (_ffaa );if _eddd !=nil {_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_ffaa ,_eabca ,_eddd ); +return _ff .Black ;};_fege ,_dbfca :=_adebf .(*_fd .PdfColorDeviceRGB );if !_dbfca {_df .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_adebf ); +return _ff .Black ;};return _ff .NRGBA {R :uint8 (_fege .R ()*255),G :uint8 (_fege .G ()*255),B :uint8 (_fege .B ()*255),A :uint8 (255)};};func _cbcgg (_dgfef string )bool {for _ ,_aceae :=range _dgfef {if !_eb .IsSpace (_aceae ){return false ;};};return true ; +};func _cebcef (_dgdfg string )(string ,bool ){_adea :=[]rune (_dgdfg );if len (_adea )!=1{return "",false ;};_cdddd ,_aecbf :=_ccbae [_adea [0]];return _cdddd ,_aecbf ;};func _gcbf (_aded []*textMark ,_fedg _fd .PdfRectangle ,_edba rulingList ,_ebeb []gridTiling ,_ceefb bool )paraList {_df .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_aded ),_fedg ); +if len (_aded )==0{return nil ;};_aged :=_fbgbac (_aded ,_fedg ,false );if len (_aged )==0{return nil ;};_edba .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_cbdbd ,_eggf :=_edba .vertsHorzs ();_cdcac :=_efba (_aged ,_fedg .Ury ,_cbdbd ,_eggf ); +_bbfc :=_cacb (_cdcac ,_fedg .Ury ,_cbdbd ,_eggf ,_ceefb );_bbfc =_bfeb (_bbfc );_cgdd :=make (paraList ,0,len (_bbfc ));for _ ,_efdg :=range _bbfc {_fdbg :=_efdg .arrangeText (_ceefb );if _fdbg !=nil {_cgdd =append (_cgdd ,_fdbg );};};if len (_cgdd )>=_bacc {_cgdd =_cgdd .extractTables (_ebeb ); +};_cgdd .sortReadingOrder ();_cgdd .sortTopoOrder ();_cgdd .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _cgdd ;};func _fdafd (_bddcac string ,_cgccg int )string {if len (_bddcac )< _cgccg {return _bddcac ; +};return _bddcac [:_cgccg ];};func _adcbc (_addee *paraList )map[int ][]*textLine {_cbdb :=map[int ][]*textLine {};for _ ,_afcb :=range *_addee {for _ ,_dffge :=range _afcb ._bgdfb {if !_cfefc (_dffge ){_df .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e"); +continue ;};_gadc :=_dffge ._eecg [0]._fcdae [0]._cgaa ;_cbdb [_gadc ]=append (_cbdb [_gadc ],_dffge );};if _afcb ._bbdgd !=nil {_beea :=_afcb ._bbdgd ._afcbd ;for _ ,_gfcga :=range _beea {for _ ,_bfgea :=range _gfcga ._bgdfb {if !_cfefc (_bfgea ){_df .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e"); +continue ;};_bade :=_bfgea ._eecg [0]._fcdae [0]._cgaa ;_cbdb [_bade ]=append (_cbdb [_bade ],_bfgea );};};};};return _cbdb ;};type paraList []*textPara ;func _gffbd (_egbb ,_fdafc _aec .Point )rulingKind {_bbafe :=_g .Abs (_egbb .X -_fdafc .X );_cebb :=_g .Abs (_egbb .Y -_fdafc .Y ); +return _ecebf (_bbafe ,_cebb ,_bccgd );};func (_acba rulingList )merge ()*ruling {_becc :=_acba [0]._faad ;_fecec :=_acba [0]._feae ;_fffd :=_acba [0]._feagb ;for _ ,_cgggd :=range _acba [1:]{_becc +=_cgggd ._faad ;if _cgggd ._feae < _fecec {_fecec =_cgggd ._feae ; +};if _cgggd ._feagb > _fffd {_fffd =_cgggd ._feagb ;};};_cebce :=&ruling {_fbgb :_acba [0]._fbgb ,_bcab :_acba [0]._bcab ,Color :_acba [0].Color ,_faad :_becc /float64 (len (_acba )),_feae :_fecec ,_feagb :_fffd };if _addf {_df .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_acba ),_cebce ); +for _cfbbg ,_deadg :=range _acba {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cfbbg ,_deadg );};};return _cebce ;};var _ba =[]string {"\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0053","\u0042","\u0053","\u0057\u0053","\u0042","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042","\u0042","\u0042","\u0053","\u0057\u0053","\u004f\u004e","\u004f\u004e","\u0045\u0054","\u0045\u0054","\u0045\u0054","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u0045\u0053","\u0043\u0053","\u0045\u0053","\u0043\u0053","\u0043\u0053","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0043\u0053","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0043\u0053","\u004f\u004e","\u0045\u0054","\u0045\u0054","\u0045\u0054","\u0045\u0054","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004f\u004e","\u004f\u004e","\u0042\u004e","\u004f\u004e","\u004f\u004e","\u0045\u0054","\u0045\u0054","\u0045\u004e","\u0045\u004e","\u004f\u004e","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u0045\u004e","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c"}; +func (_gddaeeg compositeCell )String ()string {_eccc :="";if len (_gddaeeg .paraList )> 0{_eccc =_fdafd (_gddaeeg .paraList .merge ().text (),50);};return _bc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_gddaeeg .PdfRectangle ,len (_gddaeeg .paraList ),_eccc ); +}; + +// NewFromContents creates a new extractor from contents and page resources. +func NewFromContents (contents string ,resources *_fd .PdfPageResources )(*Extractor ,error ){const _ddga ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_gcc :=&Extractor {_fed :contents ,_dd :resources ,_afd :map[string ]fontEntry {},_ga :map[string ]textResult {}}; +_bb .TrackUse (_ddga );return _gcc ,nil ;};func (_eabc *subpath )clear (){*_eabc =subpath {}}; + +// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`. +func (_fcdca *TextMarkArray )BBox ()(_fd .PdfRectangle ,bool ){var _fgdc _fd .PdfRectangle ;_fgef :=false ;for _ ,_egaf :=range _fcdca ._dbce {if _egaf .Meta ||_cbcgg (_egaf .Text ){continue ;};if _fgef {_fgdc =_gcacd (_fgdc ,_egaf .BBox );}else {_fgdc =_egaf .BBox ; +_fgef =true ;};};return _fgdc ,_fgef ;};func (_fdgc paraList )list ()[]*list {var _ffbgf []*textLine ;var _fafg []*textLine ;for _ ,_eebgg :=range _fdgc {_cddb :=_eebgg .getListLines ();_ffbgf =append (_ffbgf ,_cddb ...);_fafg =append (_fafg ,_eebgg ._bgdfb ...); +};_daga :=_bagf (_ffbgf );_ddefa :=_bcad (_fafg ,_daga );return _ddefa ;}; + +// PageTextOptions holds various options available in extraction process. +type PageTextOptions struct{_affg bool ;_egfd ExtractionMode ;};func _cbbd (_adac int ,_dccf func (int ,int )bool )[]int {_gbce :=make ([]int ,_adac );for _ebca :=range _gbce {_gbce [_ebca ]=_ebca ;};_bg .Slice (_gbce ,func (_fdafa ,_afdc int )bool {return _dccf (_gbce [_fdafa ],_gbce [_afdc ])}); +return _gbce ;}; + +// Len returns the number of TextMarks in `ma`. +func (_acgb *TextMarkArray )Len ()int {if _acgb ==nil {return 0;};return len (_acgb ._dbce );};func _cfdab (_bcbdc float64 )bool {return _g .Abs (_bcbdc )< _acde };type textObject struct{_faec *Extractor ;_gcbc *_fd .PdfPageResources ;_ceca _cb .GraphicsState ; +_ddgb *textState ;_dgad *stateStack ;_def _aec .Matrix ;_febb _aec .Matrix ;_agfc []*textMark ;_gbga bool ;}; + +// Text returns the text content of the `bulletLists`. +func (_fafac *lists )Text ()string {_cbfaa :=&_c .Builder {};for _ ,_gaba :=range *_fafac {_bgefg :=_gaba .Text ();_cbfaa .WriteString (_bgefg );};return _cbfaa .String ();};func (_gbaea intSet )add (_eecgb int ){_gbaea [_eecgb ]=struct{}{}};func (_bebe rulingList )sort (){_bg .Slice (_bebe ,_bebe .comp )}; +func (_fabdf *textObject )newTextMark (_aebg string ,_caa _aec .Matrix ,_aedg _aec .Point ,_aedfb float64 ,_dcbbd *_fd .PdfFont ,_cfdgg float64 ,_bfgf ,_aeff _ff .Color ,_gfcb _gc .PdfObject ,_egfee []string ,_bgaf int ,_bbga int )(textMark ,bool ){_ddefe :=_caa .Angle (); +_ebf :=_bcfbe (_ddefe ,_bfcda );var _gcacb float64 ;if _ebf %180!=90{_gcacb =_caa .ScalingFactorY ();}else {_gcacb =_caa .ScalingFactorX ();};_eebfd :=_ecgaa (_caa );_ffda :=_fd .PdfRectangle {Llx :_eebfd .X ,Lly :_eebfd .Y ,Urx :_aedg .X ,Ury :_aedg .Y }; +switch _ebf %360{case 90:_ffda .Urx -=_gcacb ;case 180:_ffda .Ury -=_gcacb ;case 270:_ffda .Urx +=_gcacb ;case 0:_ffda .Ury +=_gcacb ;default:_ebf =0;_ffda .Ury +=_gcacb ;};if _ffda .Llx > _ffda .Urx {_ffda .Llx ,_ffda .Urx =_ffda .Urx ,_ffda .Llx ;};if _ffda .Lly > _ffda .Ury {_ffda .Lly ,_ffda .Ury =_ffda .Ury ,_ffda .Lly ; +};_egcab :=true ;if _fabdf ._faec ._aea .Width ()> 0{_cfge ,_cbab :=_dadf (_ffda ,_fabdf ._faec ._aea );if !_cbab {_egcab =false ;_df .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_ffda ,_fabdf ._faec ._aea ,_aebg ); +};_ffda =_cfge ;};_abcf :=_ffda ;_defcg :=_fabdf ._faec ._aea ;switch _ebf %360{case 90:_defcg .Urx ,_defcg .Ury =_defcg .Ury ,_defcg .Urx ;_abcf =_fd .PdfRectangle {Llx :_defcg .Urx -_ffda .Ury ,Urx :_defcg .Urx -_ffda .Lly ,Lly :_ffda .Llx ,Ury :_ffda .Urx }; +case 180:_abcf =_fd .PdfRectangle {Llx :_defcg .Urx -_ffda .Llx ,Urx :_defcg .Urx -_ffda .Urx ,Lly :_defcg .Ury -_ffda .Lly ,Ury :_defcg .Ury -_ffda .Ury };case 270:_defcg .Urx ,_defcg .Ury =_defcg .Ury ,_defcg .Urx ;_abcf =_fd .PdfRectangle {Llx :_ffda .Ury ,Urx :_ffda .Lly ,Lly :_defcg .Ury -_ffda .Llx ,Ury :_defcg .Ury -_ffda .Urx }; +};if _abcf .Llx > _abcf .Urx {_abcf .Llx ,_abcf .Urx =_abcf .Urx ,_abcf .Llx ;};if _abcf .Lly > _abcf .Ury {_abcf .Lly ,_abcf .Ury =_abcf .Ury ,_abcf .Lly ;};_dggb :=textMark {_dfdde :_aebg ,PdfRectangle :_abcf ,_cgcb :_ffda ,_edbb :_dcbbd ,_cbdd :_gcacb ,_bbgc :_cfdgg ,_edgad :_caa ,_gdcg :_aedg ,_gdcb :_ebf ,_agfcg :_bfgf ,_acdc :_aeff ,_gbae :_gfcb ,_abfde :_egfee ,Th :_fabdf ._ddgb ._fgcd ,Tw :_fabdf ._ddgb ._cdeb ,_cgaa :_bbga ,_ceag :_bgaf }; +if _egeg {_df .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_eebfd ,_aedg ,_dggb .String ());};return _dggb ,_egcab ; +};func (_bgcd *textPara )fontsize ()float64 {return _bgcd ._bgdfb [0]._acaea }; + +// String returns a description of `k`. +func (_gdefe markKind )String ()string {_fafcf ,_dfcd :=_egbg [_gdefe ];if !_dfcd {return _bc .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_gdefe );};return _fafcf ;};const (RenderModeStroke RenderMode =1<<iota ; +RenderModeFill ;RenderModeClip ;);func _gebbe (_faaa *list )[]*textLine {for _ ,_ggad :=range _faaa ._eegf {switch _ggad ._ecbgd {case "\u004c\u0042\u006fd\u0079":if len (_ggad ._ddage )!=0{return _ggad ._ddage ;};return _gebbe (_ggad );case "\u0053\u0070\u0061\u006e":return _ggad ._ddage ; +case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _ggad ._ddage ;};};return nil ;};func (_afed paraList )findTextTables ()[]*textTable {var _ceae []*textTable ;for _ ,_cdebg :=range _afed {if _cdebg .taken ()||_cdebg .Width ()==0{continue ; +};_edfg :=_cdebg .isAtom ();if _edfg ==nil {continue ;};_edfg .growTable ();if _edfg ._deafe *_edfg ._bfffcc < _bacc {continue ;};_edfg .markCells ();_edfg .log ("\u0067\u0072\u006fw\u006e");_ceae =append (_ceae ,_edfg );};return _ceae ;};type pathSection struct{_ddgc []*subpath ; +_ff .Color ;};func (_gffa rulingList )secMinMax ()(float64 ,float64 ){_bcdg ,_bdbfc :=_gffa [0]._feae ,_gffa [0]._feagb ;for _ ,_daebb :=range _gffa [1:]{if _daebb ._feae < _bcdg {_bcdg =_daebb ._feae ;};if _daebb ._feagb > _bdbfc {_bdbfc =_daebb ._feagb ; +};};return _bcdg ,_bdbfc ;};func (_eddbe *textTable )getDown ()paraList {_dafcf :=make (paraList ,_eddbe ._deafe );for _gagde :=0;_gagde < _eddbe ._deafe ;_gagde ++{_caccc :=_eddbe .get (_gagde ,_eddbe ._bfffcc -1)._gfafd ;if _caccc .taken (){return nil ; +};_dafcf [_gagde ]=_caccc ;};for _ebab :=0;_ebab < _eddbe ._deafe -1;_ebab ++{if _dafcf [_ebab ]._ebea !=_dafcf [_ebab +1]{return nil ;};};return _dafcf ;};func _dfad (_faef ,_aecbg bounded )float64 {return _afef (_faef )-_afef (_aecbg )};func (_cdac *textObject )setHorizScaling (_aaga float64 ){if _cdac ==nil {return ; +};_cdac ._ddgb ._fgcd =_aaga ;}; + +// NewWithOptions an Extractor instance for extracting content from the input PDF page with options. +func NewWithOptions (page *_fd .PdfPage ,options *Options )(*Extractor ,error ){const _dba ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_gbc ,_cfdg :=page .GetAllContentStreams (); +if _cfdg !=nil {return nil ,_cfdg ;};var _dfe *_fd .StructTreeRoot ;_ccf ,_ee :=page .GetStructTreeRoot ();if !_ee {_df .Log .Debug ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e"); +}else {_dfe ,_cfdg =_fd .NewStructTreeRootFromPdfObject (*_ccf );if _cfdg !=nil {return nil ,_bc .Errorf ("\u0065\u0072\u0072or\u0020\u006c\u006f\u0061\u0064\u0069\u006e\u0067\u0020s\u0074r\u0075c\u0074 \u0074\u0072\u0065\u0065\u0020\u0072\u006f\u006f\u0074\u003a\u0020\u0025\u0076",_cfdg ); +};};_efd :=page .GetContainingPdfObject ();_gec ,_cfdg :=page .GetMediaBox ();if _cfdg !=nil {return nil ,_bc .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_cfdg ); +};_bca :=&Extractor {_fed :_gbc ,_dd :page .Resources ,_aea :*_gec ,_bdc :page .CropBox ,_cc :page .GetStructParentsKey (),_afd :map[string ]fontEntry {},_ga :map[string ]textResult {},_bff :map[string ]textResult {},_cab :options ,_gaf :_dfe ,_ea :_efd }; +if _bca ._aea .Llx > _bca ._aea .Urx {_df .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bca ._aea ); +_bca ._aea .Llx ,_bca ._aea .Urx =_bca ._aea .Urx ,_bca ._aea .Llx ;};if _bca ._aea .Lly > _bca ._aea .Ury {_df .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bca ._aea ); +_bca ._aea .Lly ,_bca ._aea .Ury =_bca ._aea .Ury ,_bca ._aea .Lly ;};if _bca ._cab !=nil {if _bca ._cab .IncludeAnnotations {_bca ._fdg ,_cfdg =page .GetAnnotations ();if _cfdg !=nil {_df .Log .Debug ("\u0045\u0072r\u006f\u0072\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006e\u006e\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0073: \u0025\u0076",_cfdg ); +};};};_bb .TrackUse (_dba );return _bca ,nil ;}; + +// Match defines the structure for each match, including pattern, indexes, and locations. +type Match struct{Pattern string ;Indexes [][]int ;Locations []Box ;};func (_bfdc rectRuling )asRuling ()(*ruling ,bool ){_feac :=ruling {_fbgb :_bfdc ._gadd ,Color :_bfdc .Color ,_bcab :_febcb };switch _bfdc ._gadd {case _bfdef :_feac ._faad =0.5*(_bfdc .Llx +_bfdc .Urx ); +_feac ._feae =_bfdc .Lly ;_feac ._feagb =_bfdc .Ury ;_eaag ,_eecdd :=_bfdc .checkWidth (_bfdc .Llx ,_bfdc .Urx );if !_eecdd {if _gdae {_df .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_bfdc ); +};return nil ,false ;};_feac ._bccfg =_eaag ;case _caec :_feac ._faad =0.5*(_bfdc .Lly +_bfdc .Ury );_feac ._feae =_bfdc .Llx ;_feac ._feagb =_bfdc .Urx ;_adfb ,_dbed :=_bfdc .checkWidth (_bfdc .Lly ,_bfdc .Ury );if !_dbed {if _gdae {_df .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_bfdc ); +};return nil ,false ;};_feac ._bccfg =_adfb ;default:_df .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_bfdc ._gadd );return nil ,false ;};return &_feac ,true ;};func _ffbfc (_gceb ,_bffda bounded )float64 {_dee :=_dfad (_gceb ,_bffda ); +if !_cfdab (_dee ){return _dee ;};return _dfcf (_gceb ,_bffda );};func (_bafad *TextMarkArray )exists (_efbe TextMark )bool {for _ ,_gabg :=range _bafad .Elements (){if _d .DeepEqual (_efbe .DirectObject ,_gabg .DirectObject )&&_d .DeepEqual (_efbe .BBox ,_gabg .BBox )&&_gabg .Text ==_efbe .Text {return true ; +};};return false ;};func _ecfac (_fadb *wordBag ,_ecfd int )*textLine {_bbec :=_fadb .firstWord (_ecfd );_feada :=textLine {PdfRectangle :_bbec .PdfRectangle ,_acaea :_bbec ._gage ,_eefeg :_bbec ._dcggb };_feada .pullWord (_fadb ,_bbec ,_ecfd );return &_feada ; +};func (_cfed *shapesState )closePath (){if _cfed ._baffg {_cfed ._bgf =append (_cfed ._bgf ,_bfcbce (_cfed ._fabe ));_cfed ._baffg =false ;}else if len (_cfed ._bgf )==0{if _face {_df .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068"); +};_cfed ._baffg =false ;return ;};_cfed ._bgf [len (_cfed ._bgf )-1].close ();if _face {_df .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_cfed );};};
\ No newline at end of file |
