Пример #1
0
        /// <summary>
        /// extract text in the pdf page
        /// </summary>
        void ExtractText()
        {
            _charCode = -1;

            _pageMatrix = PdfPage.GetDefaultMatrix();

            _pageHeight = PdfPage.GetPageHeight();

            _textLines = new PdfLineOfText[((int)_pageHeight >> 2) + 8];

            using (ElementReader page_reader = new ElementReader())
            {
                page_reader.Begin(PdfPage);
                ProcessElements(page_reader);
            }

            PdfLineOfText preLt = null;

            for (int i = 0; i < _textLines.Length; i++)
            {
                var lt = _textLines[i];

                if (preLt != null && lt != null && preLt.FirstChar.Top - lt.FirstChar.Top < 3)
                {
                    preLt.AddPdfChars(lt.Chars);

                    _textLines[i] = lt = null;
                }

                preLt = lt;
            }
        }