public void Contains() { PdfRectangle rectangle = new PdfRectangle(10, 10, 20, 20); Assert.True(rectangle.Contains(new PdfPoint(15, 15))); Assert.False(rectangle.Contains(new PdfPoint(10, 15))); Assert.True(rectangle.Contains(new PdfPoint(10, 15), true)); Assert.False(rectangle.Contains(new PdfPoint(100, 100), true)); }
public static IReadOnlyList <Hyperlink> GetHyperlinks(Page page, IPdfTokenScanner pdfScanner, AnnotationProvider annotationProvider) { var result = new List <Hyperlink>(); var annotations = annotationProvider.GetAnnotations(); foreach (var annotation in annotations) { if (annotation.Type != AnnotationType.Link) { continue; } // Must be a link annotation with an action of type /URI. if (!annotation.AnnotationDictionary.TryGet(NameToken.A, pdfScanner, out DictionaryToken actionDictionary) || !actionDictionary.TryGet(NameToken.S, pdfScanner, out NameToken actionType) || actionType != NameToken.Uri) { continue; } // (Required) The uniform resource identifier to resolve, encoded in 7-bit ASCII. if (!actionDictionary.TryGet(NameToken.Uri, pdfScanner, out IDataToken <string> uriStringToken)) { continue; } var bounds = annotation.Rectangle; // Build in tolerance for letters close to the link region. var tolerantBounds = new PdfRectangle(bounds.TopLeft.Translate(-0.5, 0), bounds.BottomRight.Translate(0.5, 0)); var linkLetters = new List <Letter>(); foreach (var letter in page.Letters) { if (tolerantBounds.Contains(letter.Location, true)) { linkLetters.Add(letter); } } var words = DefaultWordExtractor.Instance.GetWords(linkLetters); var presentationText = string.Join(" ", words.Select(x => x.Text)); result.Add(new Hyperlink(bounds, linkLetters, presentationText, uriStringToken.Data, annotation)); } return(result); }
public static string ExtractStringFromPdf(Stream stream) { var stringBuilder = new StringBuilder(); using var document = PdfDocument.Open(stream); foreach (var page in document.GetPages()) { var areaWithoutBorders = new PdfRectangle(0, 50, page.Width, page.Height - 50); var words = page.GetWords().Where(w => areaWithoutBorders.Contains(w.BoundingBox)).ToList(); var pageText = string.Join(" ", words); stringBuilder.Append(pageText); } return(stringBuilder.ToString()); }