Ejemplos de TextExtractor.Filter en C# (CSharp)

Lenguaje de programación: C# (CSharp)

Clase / Tipo: TextExtractor

Método / Función: Filter

Ejemplos en hotexamples.com: 4

C# (CSharp) TextExtractor.Filter - 4 ejemplos encontrados. Estos son los ejemplos en C# (CSharp) del mundo real mejor valorados de TextExtractor.Filter extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

LoadDocumentFromFile(30)

Extract(29)

GetPageCount(22)

Find(19)

SaveTextToFile(18)

FindNext(15)

ExtractText(15)

Dispose(13)

SetExtractionArea(13)

GetText(11)

GetTextFromPage(9)

FindAll(8)

IsValidFileType(6)

Begin(6)

Open(5)

SavePageTextToFile(5)

ExtractLine(4)

SaveTextToStream(4)

ExtractAll(4)

GetAsXML(4)

SavePageTextToStream(4)

GetFirstLine(3)

Reset(3)

TextExtractingWillBePotentiallySlow(3)

ResetExtractionArea(2)

PostImageAsync(2)

LoadProfiles(2)

ToString(2)

GetValue(2)

LoadDocumentFromStream(2)

GetPageRectangle(2)

Filter(2)

GetPageRect_Width(2)

GetPageRect_Height(2)

GetTextFromBitmapAsync(2)

GetWordCount(2)

Replace(1)

NextPage(1)

GetListValues(1)

IsOCRRecommendedForPage(1)

SelectStrategy(1)

ExtractFullText(1)

SupportedFormats(1)

SupportedLanguages(1)

CreateDocument(1)

AddFilter(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: TextHighlightSample.cs Proyecto: n9/pdfclown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(File file = new File(filePath))
              {
            // Define the text pattern to look for!
            string textRegEx = PromptChoice("Please enter the pattern to look for: ");
            Regex pattern = new Regex(textRegEx, RegexOptions.IgnoreCase);

            // 2. Iterating through the document pages...
            TextExtractor textExtractor = new TextExtractor(true, true);
            foreach(Page page in file.Document.Pages)
            {
              Console.WriteLine("\nScanning page " + (page.Index+1) + "...\n");

              // 2.1. Extract the page text!
              IDictionary<RectangleF?,IList<ITextString>> textStrings = textExtractor.Extract(page);

              // 2.2. Find the text pattern matches!
              MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

              // 2.3. Highlight the text pattern matches!
              textExtractor.Filter(
            textStrings,
            new TextHighlighter(page, matches)
            );
            }

            // 3. Highlighted file serialization.
            Serialize(file);
              }
        }

Ejemplo n.º 2

Mostrar archivo

Archivo: TextHighlightSample.cs Proyecto: evertop/PDFClown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (var file = new File(filePath))
            {
                // Define the text pattern to look for!
                string textRegEx = PromptChoice("Please enter the pattern to look for: ");
                Regex  pattern   = new Regex(textRegEx, RegexOptions.IgnoreCase);

                // 2. Iterating through the document pages...
                TextExtractor textExtractor = new TextExtractor(true, true);
                foreach (Page page in file.Document.Pages)
                {
                    Console.WriteLine("\nScanning page " + page.Number + "...\n");

                    // 2.1. Extract the page text!
                    IDictionary <RectangleF?, IList <ITextString> > textStrings = textExtractor.Extract(page);

                    // 2.2. Find the text pattern matches!
                    MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

                    // 2.3. Highlight the text pattern matches!
                    textExtractor.Filter(
                        textStrings,
                        new TextHighlighter(page, matches)
                        );
                }

                // 3. Highlighted file serialization.
                Serialize(file);
            }
        }

Ejemplo n.º 3

Mostrar archivo

Archivo: LinkParsingSample.cs Proyecto: flaklypa/pdfclown-sign

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (files::File file = new files::File(filePath))
            {
                Document document = file.Document;

                // 2. Link extraction from the document pages.
                TextExtractor extractor = new TextExtractor();
                extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
                bool linkFound = false;
                foreach (Page page in document.Pages)
                {
                    if (!PromptNextPage(page, !linkFound))
                    {
                        Quit();
                        break;
                    }

                    IDictionary <RectangleF?, IList <ITextString> > textStrings = null;
                    linkFound = false;

                    // Get the page annotations!
                    PageAnnotations annotations = page.Annotations;
                    if (!annotations.Exists())
                    {
                        Console.WriteLine("No annotations here.");
                        continue;
                    }

                    // Iterating through the page annotations looking for links...
                    foreach (Annotation annotation in annotations)
                    {
                        if (annotation is Link)
                        {
                            linkFound = true;

                            if (textStrings == null)
                            {
                                textStrings = extractor.Extract(page);
                            }

                            Link       link    = (Link)annotation;
                            RectangleF linkBox = link.Box;

                            // Text.

                            /*
                             * Extracting text superimposed by the link...
                             * NOTE: As links have no strong relation to page text but a weak location correspondence,
                             * we have to filter extracted text by link area.
                             */
                            StringBuilder linkTextBuilder = new StringBuilder();
                            foreach (ITextString linkTextString in extractor.Filter(textStrings, linkBox))
                            {
                                linkTextBuilder.Append(linkTextString.Text);
                            }
                            Console.WriteLine("Link '" + linkTextBuilder + "' ");

                            // Position.
                            Console.WriteLine(
                                "    Position: "
                                + "x:" + Math.Round(linkBox.X) + ","
                                + "y:" + Math.Round(linkBox.Y) + ","
                                + "w:" + Math.Round(linkBox.Width) + ","
                                + "h:" + Math.Round(linkBox.Height)
                                );

                            // Target.
                            Console.Write("    Target: ");
                            PdfObjectWrapper target = link.Target;
                            if (target is Destination)
                            {
                                PrintDestination((Destination)target);
                            }
                            else if (target is actions::Action)
                            {
                                PrintAction((actions::Action)target);
                            }
                            else if (target == null)
                            {
                                Console.WriteLine("[not available]");
                            }
                            else
                            {
                                Console.WriteLine("[unknown type: " + target.GetType().Name + "]");
                            }
                        }
                    }
                    if (!linkFound)
                    {
                        Console.WriteLine("No links here.");
                        continue;
                    }
                }
            }
        }

Ejemplo n.º 4

Mostrar archivo

Archivo: LinkParsingSample.cs Proyecto: n9/pdfclown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(files::File file = new files::File(filePath))
              {
            Document document = file.Document;

            // 2. Link extraction from the document pages.
            TextExtractor extractor = new TextExtractor();
            extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
            bool linkFound = false;
            foreach(Page page in document.Pages)
            {
              if(!PromptNextPage(page, !linkFound))
              {
            Quit();
            break;
              }

              IDictionary<RectangleF?,IList<ITextString>> textStrings = null;
              linkFound = false;

              // Get the page annotations!
              PageAnnotations annotations = page.Annotations;
              if(!annotations.Exists())
              {
            Console.WriteLine("No annotations here.");
            continue;
              }

              // Iterating through the page annotations looking for links...
              foreach(Annotation annotation in annotations)
              {
            if(annotation is Link)
            {
              linkFound = true;

              if(textStrings == null)
              {textStrings = extractor.Extract(page);}

              Link link = (Link)annotation;
              RectangleF linkBox = link.Box;

              // Text.
              /*
                Extracting text superimposed by the link...
                NOTE: As links have no strong relation to page text but a weak location correspondence,
                we have to filter extracted text by link area.
              */
              StringBuilder linkTextBuilder = new StringBuilder();
              foreach(ITextString linkTextString in extractor.Filter(textStrings,linkBox))
              {linkTextBuilder.Append(linkTextString.Text);}
              Console.WriteLine("Link '" + linkTextBuilder + "' ");

              // Position.
              Console.WriteLine(
                "    Position: "
                  + "x:" + Math.Round(linkBox.X) + ","
                  + "y:" + Math.Round(linkBox.Y) + ","
                  + "w:" + Math.Round(linkBox.Width) + ","
                  + "h:" + Math.Round(linkBox.Height)
                  );

              // Target.
              Console.Write("    Target: ");
              PdfObjectWrapper target = link.Target;
              if(target is Destination)
              {PrintDestination((Destination)target);}
              else if(target is actions::Action)
              {PrintAction((actions::Action)target);}
              else if(target == null)
              {Console.WriteLine("[not available]");}
              else
              {Console.WriteLine("[unknown type: " + target.GetType().Name + "]");}
            }
              }
              if(!linkFound)
              {
            Console.WriteLine("No links here.");
            continue;
              }
            }
              }
        }