C# (CSharp) TextExtractor.Filter примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: TextExtractor

Метод/Функция: Filter

Примеров на hotexamples.com: 4

C# (CSharp) TextExtractor.Filter - 4 примера найдено. Это лучшие примеры C# (CSharp) кода для TextExtractor.Filter, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LoadDocumentFromFile(30)

Extract(29)

GetPageCount(22)

Find(19)

SaveTextToFile(18)

FindNext(15)

ExtractText(15)

Dispose(13)

SetExtractionArea(13)

GetText(11)

GetTextFromPage(9)

FindAll(8)

IsValidFileType(6)

Begin(6)

Open(5)

SavePageTextToFile(5)

ExtractLine(4)

SaveTextToStream(4)

ExtractAll(4)

GetAsXML(4)

SavePageTextToStream(4)

GetFirstLine(3)

Reset(3)

TextExtractingWillBePotentiallySlow(3)

ResetExtractionArea(2)

PostImageAsync(2)

LoadProfiles(2)

ToString(2)

GetValue(2)

LoadDocumentFromStream(2)

GetPageRectangle(2)

Filter(2)

GetPageRect_Width(2)

GetPageRect_Height(2)

GetTextFromBitmapAsync(2)

GetWordCount(2)

Replace(1)

NextPage(1)

GetListValues(1)

IsOCRRecommendedForPage(1)

SelectStrategy(1)

ExtractFullText(1)

SupportedFormats(1)

SupportedLanguages(1)

CreateDocument(1)

AddFilter(1)

Пример #1

Показать файл

Файл: TextHighlightSample.cs Проект: n9/pdfclown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(File file = new File(filePath))
              {
            // Define the text pattern to look for!
            string textRegEx = PromptChoice("Please enter the pattern to look for: ");
            Regex pattern = new Regex(textRegEx, RegexOptions.IgnoreCase);

            // 2. Iterating through the document pages...
            TextExtractor textExtractor = new TextExtractor(true, true);
            foreach(Page page in file.Document.Pages)
            {
              Console.WriteLine("\nScanning page " + (page.Index+1) + "...\n");

              // 2.1. Extract the page text!
              IDictionary<RectangleF?,IList<ITextString>> textStrings = textExtractor.Extract(page);

              // 2.2. Find the text pattern matches!
              MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

              // 2.3. Highlight the text pattern matches!
              textExtractor.Filter(
            textStrings,
            new TextHighlighter(page, matches)
            );
            }

            // 3. Highlighted file serialization.
            Serialize(file);
              }
        }

Пример #2

Показать файл

Файл: TextHighlightSample.cs Проект: evertop/PDFClown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (var file = new File(filePath))
            {
                // Define the text pattern to look for!
                string textRegEx = PromptChoice("Please enter the pattern to look for: ");
                Regex  pattern   = new Regex(textRegEx, RegexOptions.IgnoreCase);

                // 2. Iterating through the document pages...
                TextExtractor textExtractor = new TextExtractor(true, true);
                foreach (Page page in file.Document.Pages)
                {
                    Console.WriteLine("\nScanning page " + page.Number + "...\n");

                    // 2.1. Extract the page text!
                    IDictionary <RectangleF?, IList <ITextString> > textStrings = textExtractor.Extract(page);

                    // 2.2. Find the text pattern matches!
                    MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

                    // 2.3. Highlight the text pattern matches!
                    textExtractor.Filter(
                        textStrings,
                        new TextHighlighter(page, matches)
                        );
                }

                // 3. Highlighted file serialization.
                Serialize(file);
            }
        }

Пример #3

Показать файл

Файл: LinkParsingSample.cs Проект: flaklypa/pdfclown-sign

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (files::File file = new files::File(filePath))
            {
                Document document = file.Document;

                // 2. Link extraction from the document pages.
                TextExtractor extractor = new TextExtractor();
                extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
                bool linkFound = false;
                foreach (Page page in document.Pages)
                {
                    if (!PromptNextPage(page, !linkFound))
                    {
                        Quit();
                        break;
                    }

                    IDictionary <RectangleF?, IList <ITextString> > textStrings = null;
                    linkFound = false;

                    // Get the page annotations!
                    PageAnnotations annotations = page.Annotations;
                    if (!annotations.Exists())
                    {
                        Console.WriteLine("No annotations here.");
                        continue;
                    }

                    // Iterating through the page annotations looking for links...
                    foreach (Annotation annotation in annotations)
                    {
                        if (annotation is Link)
                        {
                            linkFound = true;

                            if (textStrings == null)
                            {
                                textStrings = extractor.Extract(page);
                            }

                            Link       link    = (Link)annotation;
                            RectangleF linkBox = link.Box;

                            // Text.

                            /*
                             * Extracting text superimposed by the link...
                             * NOTE: As links have no strong relation to page text but a weak location correspondence,
                             * we have to filter extracted text by link area.
                             */
                            StringBuilder linkTextBuilder = new StringBuilder();
                            foreach (ITextString linkTextString in extractor.Filter(textStrings, linkBox))
                            {
                                linkTextBuilder.Append(linkTextString.Text);
                            }
                            Console.WriteLine("Link '" + linkTextBuilder + "' ");

                            // Position.
                            Console.WriteLine(
                                "    Position: "
                                + "x:" + Math.Round(linkBox.X) + ","
                                + "y:" + Math.Round(linkBox.Y) + ","
                                + "w:" + Math.Round(linkBox.Width) + ","
                                + "h:" + Math.Round(linkBox.Height)
                                );

                            // Target.
                            Console.Write("    Target: ");
                            PdfObjectWrapper target = link.Target;
                            if (target is Destination)
                            {
                                PrintDestination((Destination)target);
                            }
                            else if (target is actions::Action)
                            {
                                PrintAction((actions::Action)target);
                            }
                            else if (target == null)
                            {
                                Console.WriteLine("[not available]");
                            }
                            else
                            {
                                Console.WriteLine("[unknown type: " + target.GetType().Name + "]");
                            }
                        }
                    }
                    if (!linkFound)
                    {
                        Console.WriteLine("No links here.");
                        continue;
                    }
                }
            }
        }

Пример #4

Показать файл

Файл: LinkParsingSample.cs Проект: n9/pdfclown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(files::File file = new files::File(filePath))
              {
            Document document = file.Document;

            // 2. Link extraction from the document pages.
            TextExtractor extractor = new TextExtractor();
            extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
            bool linkFound = false;
            foreach(Page page in document.Pages)
            {
              if(!PromptNextPage(page, !linkFound))
              {
            Quit();
            break;
              }

              IDictionary<RectangleF?,IList<ITextString>> textStrings = null;
              linkFound = false;

              // Get the page annotations!
              PageAnnotations annotations = page.Annotations;
              if(!annotations.Exists())
              {
            Console.WriteLine("No annotations here.");
            continue;
              }

              // Iterating through the page annotations looking for links...
              foreach(Annotation annotation in annotations)
              {
            if(annotation is Link)
            {
              linkFound = true;

              if(textStrings == null)
              {textStrings = extractor.Extract(page);}

              Link link = (Link)annotation;
              RectangleF linkBox = link.Box;

              // Text.
              /*
                Extracting text superimposed by the link...
                NOTE: As links have no strong relation to page text but a weak location correspondence,
                we have to filter extracted text by link area.
              */
              StringBuilder linkTextBuilder = new StringBuilder();
              foreach(ITextString linkTextString in extractor.Filter(textStrings,linkBox))
              {linkTextBuilder.Append(linkTextString.Text);}
              Console.WriteLine("Link '" + linkTextBuilder + "' ");

              // Position.
              Console.WriteLine(
                "    Position: "
                  + "x:" + Math.Round(linkBox.X) + ","
                  + "y:" + Math.Round(linkBox.Y) + ","
                  + "w:" + Math.Round(linkBox.Width) + ","
                  + "h:" + Math.Round(linkBox.Height)
                  );

              // Target.
              Console.Write("    Target: ");
              PdfObjectWrapper target = link.Target;
              if(target is Destination)
              {PrintDestination((Destination)target);}
              else if(target is actions::Action)
              {PrintAction((actions::Action)target);}
              else if(target == null)
              {Console.WriteLine("[not available]");}
              else
              {Console.WriteLine("[unknown type: " + target.GetType().Name + "]");}
            }
              }
              if(!linkFound)
              {
            Console.WriteLine("No links here.");
            continue;
              }
            }
              }
        }