Exemple #1
0
        private string GetPdfText(PageData pageData, PdfTextSpan textSpan)
        {
            var result = new byte[(textSpan.Length + 1) * 2];

            NativeMethods.FPDFText_GetText(pageData.TextPage, textSpan.Offset, textSpan.Length, result);
            return(FPDFEncoding.GetString(result, 0, textSpan.Length * 2));
        }
Exemple #2
0
 public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan)
 {
     using (var pageData = new PageData(_document, _form, textSpan.Page))
     {
         return(GetTextBounds(pageData.TextPage, textSpan.Page, textSpan.Offset, textSpan.Length));
     }
 }
Exemple #3
0
 public string GetPdfText(PdfTextSpan textSpan)
 {
     using (var pageData = new PageData(_document, _form, textSpan.Page))
     {
         return(GetPdfText(pageData, textSpan));
     }
 }
Exemple #4
0
 public string GetPdfText(PdfTextSpan textSpan)
 {
     var pageData = GetPageData(_document, _form, textSpan.Page);
     {
         return(GetPdfText(pageData, textSpan));
     }
 }
Exemple #5
0
        public bool GetWordAtPosition(PdfPoint location, double xTolerance, double yTolerance, out PdfTextSpan span)
        {
            var index = GetCharIndexAtPos(location, xTolerance, yTolerance);

            if (index < 0)
            {
                span = default(PdfTextSpan);
                return(false);
            }

            var baseCharacter = GetCharacter(location.Page, index);

            if (IsWordSeparator(baseCharacter))
            {
                span = default(PdfTextSpan);
                return(false);
            }

            int start = index, end = index;

            for (int i = index - 1; i >= 0; i--)
            {
                var c = GetCharacter(location.Page, i);
                if (IsWordSeparator(c))
                {
                    break;
                }
                start = i;
            }

            var count = CountChars(location.Page);

            for (int i = index + 1; i < count; i++)
            {
                var c = GetCharacter(location.Page, i);
                if (IsWordSeparator(c))
                {
                    break;
                }
                end = i;
            }

            span = new PdfTextSpan(location.Page, start, end - start);
            return(true);

            bool IsWordSeparator(char c)
            {
                return(char.IsSeparator(c) || char.IsPunctuation(c) || char.IsControl(c) || char.IsWhiteSpace(c) || c == '\r' || c == '\n');
            }
        }
Exemple #6
0
        private string GetPdfText(PageData pageData, PdfTextSpan textSpan)
        {
            // NOTE: The count parameter in FPDFText_GetText seems to include the null terminator, even though the documentation does not specify this.
            // So to read 40 characters, we need to allocate 82 bytes (2 for the terminator), and request 41 characters from GetText.
            // The return value also includes the terminator (which is documented)
            var result = new byte[(textSpan.Length + 1) * 2];
            int count  = NativeMethods.FPDFText_GetText(pageData.TextPage, textSpan.Offset, textSpan.Length + 1, result);

            if (count <= 0)
            {
                return(string.Empty);
            }
            return(FPDFEncoding.GetString(result, 0, (count - 1) * 2));
        }
Exemple #7
0
        private string GetPdfText(PageData pageData, PdfTextSpan textSpan)
        {
            var result = new byte[(textSpan.Length + 1) * 2];

            NativeMethods.FPDFText_GetText(pageData.TextPage, textSpan.Offset, textSpan.Length, result);
            string s = FPDFEncoding.GetString(result, 0, textSpan.Length * 2);

            string az = "QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm0123456789";

            s = s.Replace('\0', ' ');
            string[] a = s.Split(new char[] { ' ', '\n' });

            string si = a[0];

            for (int i = 1; i < a.Length; i++)
            {
                string it = a[i];
                if (it.Trim().Length == 1)
                {
                    if (az.IndexOf(it) == -1)
                    {
                        si += ' ' + it;
                    }
                    else
                    {
                        if (i < a.Length && a[i + 1].Trim().Length == 1)
                        {
                            si = si.Trim() + ' ' + it + a[i + 1].Trim();
                            i++;
                        }
                        else
                        {
                            si = si.Trim() + it;
                        }
                    }
                }
                else
                {
                    si += ' ' + it + ' ';
                }
            }
            si = si.Replace("\r", "\r\n").Trim();

            return(si);
        }
Exemple #8
0
        public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan)
        {
            var result = new List <PdfRectangle>();

            var pageData = GetPageData(_document, _form, textSpan.Page);

            {
                int rect_count = NativeMethods.FPDFText_CountRects(pageData.TextPage, textSpan.Offset, textSpan.Length);

                for (int i = 0; i < rect_count; i++)
                {
                    NativeMethods.FPDFText_GetRect(pageData.TextPage, i, out var left, out var top, out var right, out var bottom);

                    RectangleF bounds = new RectangleF((float)left, (float)top, (float)(right - left), (float)(bottom - top));

                    result.Add(new PdfRectangle(textSpan.Page, bounds));
                }

                return(result);
            }
        }
Exemple #9
0
 public string GetPdfText(PdfTextSpan textSpan)
 {
     return(GetPdfText(GetPageData(textSpan.Page), textSpan));
 }
Exemple #10
0
 public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan)
 {
     return(GetTextBounds(GetPageData(textSpan.Page).TextPage, textSpan.Page, textSpan.Offset, textSpan.Length));
 }
Exemple #11
0
 public PdfMatch(string text, PdfTextSpan textSpan, int page)
 {
     Text     = text;
     TextSpan = textSpan;
     Page     = page;
 }
Exemple #12
0
 /// <summary>
 /// Get the full word at or nearby a specific position
 /// </summary>
 /// <param name="location">The location to inspect</param>
 /// <param name="xTolerance">An x-axis tolerance value for character hit detection, in point unit.</param>
 /// <param name="yTolerance">A y-axis tolerance value for character hit detection, in point unit.</param>
 /// <param name="span">The location of the found word, if any</param>
 /// <returns>A value indicating whether a word was found at the specified location</returns>
 public bool GetWordAtPosition(PdfPoint location, double xTolerance, double yTolerance, out PdfTextSpan span)
 {
     return(_file.GetWordAtPosition(location, xTolerance, yTolerance, out span));
 }
Exemple #13
0
 /// <summary>
 /// Get all bounding rectangles for the text span.
 /// </summary>
 /// <description>
 /// The algorithm used to get the bounding rectangles tries to join
 /// adjacent character bounds into larger rectangles.
 /// </description>
 /// <param name="textSpan">The span to get the bounding rectangles for.</param>
 /// <returns>The bounding rectangles.</returns>
 public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan)
 {
     return(_file.GetTextBounds(textSpan));
 }
Exemple #14
0
 /// <summary>
 /// Get all text matching the text span.
 /// </summary>
 /// <param name="textSpan">The span to get the text for.</param>
 /// <returns>The text matching the span.</returns>
 public string GetPdfText(PdfTextSpan textSpan)
 {
     return(_file.GetPdfText(textSpan));
 }
 public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan)
 {
     return(Document.GetTextBounds(textSpan));
 }
 public string GetPdfText(PdfTextSpan textSpan)
 {
     return(Document.GetPdfText(textSpan));
 }