private string GetPdfText(PageData pageData, PdfTextSpan textSpan) { var result = new byte[(textSpan.Length + 1) * 2]; NativeMethods.FPDFText_GetText(pageData.TextPage, textSpan.Offset, textSpan.Length, result); return(FPDFEncoding.GetString(result, 0, textSpan.Length * 2)); }
public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan) { using (var pageData = new PageData(_document, _form, textSpan.Page)) { return(GetTextBounds(pageData.TextPage, textSpan.Page, textSpan.Offset, textSpan.Length)); } }
public string GetPdfText(PdfTextSpan textSpan) { using (var pageData = new PageData(_document, _form, textSpan.Page)) { return(GetPdfText(pageData, textSpan)); } }
public string GetPdfText(PdfTextSpan textSpan) { var pageData = GetPageData(_document, _form, textSpan.Page); { return(GetPdfText(pageData, textSpan)); } }
public bool GetWordAtPosition(PdfPoint location, double xTolerance, double yTolerance, out PdfTextSpan span) { var index = GetCharIndexAtPos(location, xTolerance, yTolerance); if (index < 0) { span = default(PdfTextSpan); return(false); } var baseCharacter = GetCharacter(location.Page, index); if (IsWordSeparator(baseCharacter)) { span = default(PdfTextSpan); return(false); } int start = index, end = index; for (int i = index - 1; i >= 0; i--) { var c = GetCharacter(location.Page, i); if (IsWordSeparator(c)) { break; } start = i; } var count = CountChars(location.Page); for (int i = index + 1; i < count; i++) { var c = GetCharacter(location.Page, i); if (IsWordSeparator(c)) { break; } end = i; } span = new PdfTextSpan(location.Page, start, end - start); return(true); bool IsWordSeparator(char c) { return(char.IsSeparator(c) || char.IsPunctuation(c) || char.IsControl(c) || char.IsWhiteSpace(c) || c == '\r' || c == '\n'); } }
private string GetPdfText(PageData pageData, PdfTextSpan textSpan) { // NOTE: The count parameter in FPDFText_GetText seems to include the null terminator, even though the documentation does not specify this. // So to read 40 characters, we need to allocate 82 bytes (2 for the terminator), and request 41 characters from GetText. // The return value also includes the terminator (which is documented) var result = new byte[(textSpan.Length + 1) * 2]; int count = NativeMethods.FPDFText_GetText(pageData.TextPage, textSpan.Offset, textSpan.Length + 1, result); if (count <= 0) { return(string.Empty); } return(FPDFEncoding.GetString(result, 0, (count - 1) * 2)); }
private string GetPdfText(PageData pageData, PdfTextSpan textSpan) { var result = new byte[(textSpan.Length + 1) * 2]; NativeMethods.FPDFText_GetText(pageData.TextPage, textSpan.Offset, textSpan.Length, result); string s = FPDFEncoding.GetString(result, 0, textSpan.Length * 2); string az = "QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm0123456789"; s = s.Replace('\0', ' '); string[] a = s.Split(new char[] { ' ', '\n' }); string si = a[0]; for (int i = 1; i < a.Length; i++) { string it = a[i]; if (it.Trim().Length == 1) { if (az.IndexOf(it) == -1) { si += ' ' + it; } else { if (i < a.Length && a[i + 1].Trim().Length == 1) { si = si.Trim() + ' ' + it + a[i + 1].Trim(); i++; } else { si = si.Trim() + it; } } } else { si += ' ' + it + ' '; } } si = si.Replace("\r", "\r\n").Trim(); return(si); }
public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan) { var result = new List <PdfRectangle>(); var pageData = GetPageData(_document, _form, textSpan.Page); { int rect_count = NativeMethods.FPDFText_CountRects(pageData.TextPage, textSpan.Offset, textSpan.Length); for (int i = 0; i < rect_count; i++) { NativeMethods.FPDFText_GetRect(pageData.TextPage, i, out var left, out var top, out var right, out var bottom); RectangleF bounds = new RectangleF((float)left, (float)top, (float)(right - left), (float)(bottom - top)); result.Add(new PdfRectangle(textSpan.Page, bounds)); } return(result); } }
public string GetPdfText(PdfTextSpan textSpan) { return(GetPdfText(GetPageData(textSpan.Page), textSpan)); }
public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan) { return(GetTextBounds(GetPageData(textSpan.Page).TextPage, textSpan.Page, textSpan.Offset, textSpan.Length)); }
public PdfMatch(string text, PdfTextSpan textSpan, int page) { Text = text; TextSpan = textSpan; Page = page; }
/// <summary> /// Get the full word at or nearby a specific position /// </summary> /// <param name="location">The location to inspect</param> /// <param name="xTolerance">An x-axis tolerance value for character hit detection, in point unit.</param> /// <param name="yTolerance">A y-axis tolerance value for character hit detection, in point unit.</param> /// <param name="span">The location of the found word, if any</param> /// <returns>A value indicating whether a word was found at the specified location</returns> public bool GetWordAtPosition(PdfPoint location, double xTolerance, double yTolerance, out PdfTextSpan span) { return(_file.GetWordAtPosition(location, xTolerance, yTolerance, out span)); }
/// <summary> /// Get all bounding rectangles for the text span. /// </summary> /// <description> /// The algorithm used to get the bounding rectangles tries to join /// adjacent character bounds into larger rectangles. /// </description> /// <param name="textSpan">The span to get the bounding rectangles for.</param> /// <returns>The bounding rectangles.</returns> public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan) { return(_file.GetTextBounds(textSpan)); }
/// <summary> /// Get all text matching the text span. /// </summary> /// <param name="textSpan">The span to get the text for.</param> /// <returns>The text matching the span.</returns> public string GetPdfText(PdfTextSpan textSpan) { return(_file.GetPdfText(textSpan)); }
public IList <PdfRectangle> GetTextBounds(PdfTextSpan textSpan) { return(Document.GetTextBounds(textSpan)); }
public string GetPdfText(PdfTextSpan textSpan) { return(Document.GetPdfText(textSpan)); }