Esempio n. 1
0
        public void Contains()
        {
            PdfRectangle rectangle = new PdfRectangle(10, 10, 20, 20);

            Assert.True(rectangle.Contains(new PdfPoint(15, 15)));
            Assert.False(rectangle.Contains(new PdfPoint(10, 15)));
            Assert.True(rectangle.Contains(new PdfPoint(10, 15), true));
            Assert.False(rectangle.Contains(new PdfPoint(100, 100), true));
        }
Esempio n. 2
0
        public static IReadOnlyList <Hyperlink> GetHyperlinks(Page page, IPdfTokenScanner pdfScanner, AnnotationProvider annotationProvider)
        {
            var result = new List <Hyperlink>();

            var annotations = annotationProvider.GetAnnotations();

            foreach (var annotation in annotations)
            {
                if (annotation.Type != AnnotationType.Link)
                {
                    continue;
                }

                // Must be a link annotation with an action of type /URI.
                if (!annotation.AnnotationDictionary.TryGet(NameToken.A, pdfScanner, out DictionaryToken actionDictionary) ||
                    !actionDictionary.TryGet(NameToken.S, pdfScanner, out NameToken actionType) ||
                    actionType != NameToken.Uri)
                {
                    continue;
                }

                // (Required) The uniform resource identifier to resolve, encoded in 7-bit ASCII.
                if (!actionDictionary.TryGet(NameToken.Uri, pdfScanner, out IDataToken <string> uriStringToken))
                {
                    continue;
                }

                var bounds = annotation.Rectangle;

                // Build in tolerance for letters close to the link region.
                var tolerantBounds = new PdfRectangle(bounds.TopLeft.Translate(-0.5, 0), bounds.BottomRight.Translate(0.5, 0));

                var linkLetters = new List <Letter>();

                foreach (var letter in page.Letters)
                {
                    if (tolerantBounds.Contains(letter.Location, true))
                    {
                        linkLetters.Add(letter);
                    }
                }

                var words = DefaultWordExtractor.Instance.GetWords(linkLetters);

                var presentationText = string.Join(" ", words.Select(x => x.Text));

                result.Add(new Hyperlink(bounds, linkLetters, presentationText, uriStringToken.Data, annotation));
            }

            return(result);
        }
        public static string ExtractStringFromPdf(Stream stream)
        {
            var stringBuilder = new StringBuilder();

            using var document = PdfDocument.Open(stream);
            foreach (var page in document.GetPages())
            {
                var areaWithoutBorders = new PdfRectangle(0, 50, page.Width, page.Height - 50);
                var words    = page.GetWords().Where(w => areaWithoutBorders.Contains(w.BoundingBox)).ToList();
                var pageText = string.Join(" ", words);
                stringBuilder.Append(pageText);
            }

            return(stringBuilder.ToString());
        }