예제 #1
1
        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(File file = new File(filePath))
              {
            // Define the text pattern to look for!
            string textRegEx = PromptChoice("Please enter the pattern to look for: ");
            Regex pattern = new Regex(textRegEx, RegexOptions.IgnoreCase);

            // 2. Iterating through the document pages...
            TextExtractor textExtractor = new TextExtractor(true, true);
            foreach(Page page in file.Document.Pages)
            {
              Console.WriteLine("\nScanning page " + (page.Index+1) + "...\n");

              // 2.1. Extract the page text!
              IDictionary<RectangleF?,IList<ITextString>> textStrings = textExtractor.Extract(page);

              // 2.2. Find the text pattern matches!
              MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

              // 2.3. Highlight the text pattern matches!
              textExtractor.Filter(
            textStrings,
            new TextHighlighter(page, matches)
            );
            }

            // 3. Highlighted file serialization.
            Serialize(file);
              }
        }
예제 #2
0
        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (var file = new File(filePath))
            {
                // Define the text pattern to look for!
                string textRegEx = PromptChoice("Please enter the pattern to look for: ");
                Regex  pattern   = new Regex(textRegEx, RegexOptions.IgnoreCase);

                // 2. Iterating through the document pages...
                TextExtractor textExtractor = new TextExtractor(true, true);
                foreach (Page page in file.Document.Pages)
                {
                    Console.WriteLine("\nScanning page " + page.Number + "...\n");

                    // 2.1. Extract the page text!
                    IDictionary <RectangleF?, IList <ITextString> > textStrings = textExtractor.Extract(page);

                    // 2.2. Find the text pattern matches!
                    MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

                    // 2.3. Highlight the text pattern matches!
                    textExtractor.Filter(
                        textStrings,
                        new TextHighlighter(page, matches)
                        );
                }

                // 3. Highlighted file serialization.
                Serialize(file);
            }
        }
예제 #3
0
        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (files::File file = new files::File(filePath))
            {
                Document document = file.Document;

                // 2. Link extraction from the document pages.
                TextExtractor extractor = new TextExtractor();
                extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
                bool linkFound = false;
                foreach (Page page in document.Pages)
                {
                    if (!PromptNextPage(page, !linkFound))
                    {
                        Quit();
                        break;
                    }

                    IDictionary <RectangleF?, IList <ITextString> > textStrings = null;
                    linkFound = false;

                    // Get the page annotations!
                    PageAnnotations annotations = page.Annotations;
                    if (!annotations.Exists())
                    {
                        Console.WriteLine("No annotations here.");
                        continue;
                    }

                    // Iterating through the page annotations looking for links...
                    foreach (Annotation annotation in annotations)
                    {
                        if (annotation is Link)
                        {
                            linkFound = true;

                            if (textStrings == null)
                            {
                                textStrings = extractor.Extract(page);
                            }

                            Link       link    = (Link)annotation;
                            RectangleF linkBox = link.Box;

                            // Text.

                            /*
                             * Extracting text superimposed by the link...
                             * NOTE: As links have no strong relation to page text but a weak location correspondence,
                             * we have to filter extracted text by link area.
                             */
                            StringBuilder linkTextBuilder = new StringBuilder();
                            foreach (ITextString linkTextString in extractor.Filter(textStrings, linkBox))
                            {
                                linkTextBuilder.Append(linkTextString.Text);
                            }
                            Console.WriteLine("Link '" + linkTextBuilder + "' ");

                            // Position.
                            Console.WriteLine(
                                "    Position: "
                                + "x:" + Math.Round(linkBox.X) + ","
                                + "y:" + Math.Round(linkBox.Y) + ","
                                + "w:" + Math.Round(linkBox.Width) + ","
                                + "h:" + Math.Round(linkBox.Height)
                                );

                            // Target.
                            Console.Write("    Target: ");
                            PdfObjectWrapper target = link.Target;
                            if (target is Destination)
                            {
                                PrintDestination((Destination)target);
                            }
                            else if (target is actions::Action)
                            {
                                PrintAction((actions::Action)target);
                            }
                            else if (target == null)
                            {
                                Console.WriteLine("[not available]");
                            }
                            else
                            {
                                Console.WriteLine("[unknown type: " + target.GetType().Name + "]");
                            }
                        }
                    }
                    if (!linkFound)
                    {
                        Console.WriteLine("No links here.");
                        continue;
                    }
                }
            }
        }
예제 #4
0
        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(files::File file = new files::File(filePath))
              {
            Document document = file.Document;

            // 2. Link extraction from the document pages.
            TextExtractor extractor = new TextExtractor();
            extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
            bool linkFound = false;
            foreach(Page page in document.Pages)
            {
              if(!PromptNextPage(page, !linkFound))
              {
            Quit();
            break;
              }

              IDictionary<RectangleF?,IList<ITextString>> textStrings = null;
              linkFound = false;

              // Get the page annotations!
              PageAnnotations annotations = page.Annotations;
              if(!annotations.Exists())
              {
            Console.WriteLine("No annotations here.");
            continue;
              }

              // Iterating through the page annotations looking for links...
              foreach(Annotation annotation in annotations)
              {
            if(annotation is Link)
            {
              linkFound = true;

              if(textStrings == null)
              {textStrings = extractor.Extract(page);}

              Link link = (Link)annotation;
              RectangleF linkBox = link.Box;

              // Text.
              /*
                Extracting text superimposed by the link...
                NOTE: As links have no strong relation to page text but a weak location correspondence,
                we have to filter extracted text by link area.
              */
              StringBuilder linkTextBuilder = new StringBuilder();
              foreach(ITextString linkTextString in extractor.Filter(textStrings,linkBox))
              {linkTextBuilder.Append(linkTextString.Text);}
              Console.WriteLine("Link '" + linkTextBuilder + "' ");

              // Position.
              Console.WriteLine(
                "    Position: "
                  + "x:" + Math.Round(linkBox.X) + ","
                  + "y:" + Math.Round(linkBox.Y) + ","
                  + "w:" + Math.Round(linkBox.Width) + ","
                  + "h:" + Math.Round(linkBox.Height)
                  );

              // Target.
              Console.Write("    Target: ");
              PdfObjectWrapper target = link.Target;
              if(target is Destination)
              {PrintDestination((Destination)target);}
              else if(target is actions::Action)
              {PrintAction((actions::Action)target);}
              else if(target == null)
              {Console.WriteLine("[not available]");}
              else
              {Console.WriteLine("[unknown type: " + target.GetType().Name + "]");}
            }
              }
              if(!linkFound)
              {
            Console.WriteLine("No links here.");
            continue;
              }
            }
              }
        }