C# (CSharp) TextExtractor.Filter 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: TextExtractor

메소드/함수: Filter

hotexamples.com에서의 예제들: 4

C# (CSharp) TextExtractor.Filter - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 TextExtractor.Filter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

LoadDocumentFromFile(30)

Extract(29)

GetPageCount(22)

Find(19)

SaveTextToFile(18)

FindNext(15)

ExtractText(15)

Dispose(13)

SetExtractionArea(13)

GetText(11)

GetTextFromPage(9)

FindAll(8)

IsValidFileType(6)

Begin(6)

Open(5)

SavePageTextToFile(5)

ExtractLine(4)

SaveTextToStream(4)

ExtractAll(4)

GetAsXML(4)

SavePageTextToStream(4)

GetFirstLine(3)

Reset(3)

TextExtractingWillBePotentiallySlow(3)

ResetExtractionArea(2)

PostImageAsync(2)

LoadProfiles(2)

ToString(2)

GetValue(2)

LoadDocumentFromStream(2)

GetPageRectangle(2)

Filter(2)

GetPageRect_Width(2)

GetPageRect_Height(2)

GetTextFromBitmapAsync(2)

GetWordCount(2)

Replace(1)

NextPage(1)

GetListValues(1)

IsOCRRecommendedForPage(1)

SelectStrategy(1)

ExtractFullText(1)

SupportedFormats(1)

SupportedLanguages(1)

CreateDocument(1)

AddFilter(1)

예제 #1

파일 보기

파일: TextHighlightSample.cs 프로젝트: n9/pdfclown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(File file = new File(filePath))
              {
            // Define the text pattern to look for!
            string textRegEx = PromptChoice("Please enter the pattern to look for: ");
            Regex pattern = new Regex(textRegEx, RegexOptions.IgnoreCase);

            // 2. Iterating through the document pages...
            TextExtractor textExtractor = new TextExtractor(true, true);
            foreach(Page page in file.Document.Pages)
            {
              Console.WriteLine("\nScanning page " + (page.Index+1) + "...\n");

              // 2.1. Extract the page text!
              IDictionary<RectangleF?,IList<ITextString>> textStrings = textExtractor.Extract(page);

              // 2.2. Find the text pattern matches!
              MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

              // 2.3. Highlight the text pattern matches!
              textExtractor.Filter(
            textStrings,
            new TextHighlighter(page, matches)
            );
            }

            // 3. Highlighted file serialization.
            Serialize(file);
              }
        }

예제 #2

파일 보기

파일: TextHighlightSample.cs 프로젝트: evertop/PDFClown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (var file = new File(filePath))
            {
                // Define the text pattern to look for!
                string textRegEx = PromptChoice("Please enter the pattern to look for: ");
                Regex  pattern   = new Regex(textRegEx, RegexOptions.IgnoreCase);

                // 2. Iterating through the document pages...
                TextExtractor textExtractor = new TextExtractor(true, true);
                foreach (Page page in file.Document.Pages)
                {
                    Console.WriteLine("\nScanning page " + page.Number + "...\n");

                    // 2.1. Extract the page text!
                    IDictionary <RectangleF?, IList <ITextString> > textStrings = textExtractor.Extract(page);

                    // 2.2. Find the text pattern matches!
                    MatchCollection matches = pattern.Matches(TextExtractor.ToString(textStrings));

                    // 2.3. Highlight the text pattern matches!
                    textExtractor.Filter(
                        textStrings,
                        new TextHighlighter(page, matches)
                        );
                }

                // 3. Highlighted file serialization.
                Serialize(file);
            }
        }

예제 #3

파일 보기

파일: LinkParsingSample.cs 프로젝트: flaklypa/pdfclown-sign

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
            string filePath = PromptFileChoice("Please select a PDF file");

            using (files::File file = new files::File(filePath))
            {
                Document document = file.Document;

                // 2. Link extraction from the document pages.
                TextExtractor extractor = new TextExtractor();
                extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
                bool linkFound = false;
                foreach (Page page in document.Pages)
                {
                    if (!PromptNextPage(page, !linkFound))
                    {
                        Quit();
                        break;
                    }

                    IDictionary <RectangleF?, IList <ITextString> > textStrings = null;
                    linkFound = false;

                    // Get the page annotations!
                    PageAnnotations annotations = page.Annotations;
                    if (!annotations.Exists())
                    {
                        Console.WriteLine("No annotations here.");
                        continue;
                    }

                    // Iterating through the page annotations looking for links...
                    foreach (Annotation annotation in annotations)
                    {
                        if (annotation is Link)
                        {
                            linkFound = true;

                            if (textStrings == null)
                            {
                                textStrings = extractor.Extract(page);
                            }

                            Link       link    = (Link)annotation;
                            RectangleF linkBox = link.Box;

                            // Text.

                            /*
                             * Extracting text superimposed by the link...
                             * NOTE: As links have no strong relation to page text but a weak location correspondence,
                             * we have to filter extracted text by link area.
                             */
                            StringBuilder linkTextBuilder = new StringBuilder();
                            foreach (ITextString linkTextString in extractor.Filter(textStrings, linkBox))
                            {
                                linkTextBuilder.Append(linkTextString.Text);
                            }
                            Console.WriteLine("Link '" + linkTextBuilder + "' ");

                            // Position.
                            Console.WriteLine(
                                "    Position: "
                                + "x:" + Math.Round(linkBox.X) + ","
                                + "y:" + Math.Round(linkBox.Y) + ","
                                + "w:" + Math.Round(linkBox.Width) + ","
                                + "h:" + Math.Round(linkBox.Height)
                                );

                            // Target.
                            Console.Write("    Target: ");
                            PdfObjectWrapper target = link.Target;
                            if (target is Destination)
                            {
                                PrintDestination((Destination)target);
                            }
                            else if (target is actions::Action)
                            {
                                PrintAction((actions::Action)target);
                            }
                            else if (target == null)
                            {
                                Console.WriteLine("[not available]");
                            }
                            else
                            {
                                Console.WriteLine("[unknown type: " + target.GetType().Name + "]");
                            }
                        }
                    }
                    if (!linkFound)
                    {
                        Console.WriteLine("No links here.");
                        continue;
                    }
                }
            }
        }

예제 #4

파일 보기

파일: LinkParsingSample.cs 프로젝트: n9/pdfclown

        public override void Run(
            )
        {
            // 1. Opening the PDF file...
              string filePath = PromptFileChoice("Please select a PDF file");
              using(files::File file = new files::File(filePath))
              {
            Document document = file.Document;

            // 2. Link extraction from the document pages.
            TextExtractor extractor = new TextExtractor();
            extractor.AreaTolerance = 2; // 2 pt tolerance on area boundary detection.
            bool linkFound = false;
            foreach(Page page in document.Pages)
            {
              if(!PromptNextPage(page, !linkFound))
              {
            Quit();
            break;
              }

              IDictionary<RectangleF?,IList<ITextString>> textStrings = null;
              linkFound = false;

              // Get the page annotations!
              PageAnnotations annotations = page.Annotations;
              if(!annotations.Exists())
              {
            Console.WriteLine("No annotations here.");
            continue;
              }

              // Iterating through the page annotations looking for links...
              foreach(Annotation annotation in annotations)
              {
            if(annotation is Link)
            {
              linkFound = true;

              if(textStrings == null)
              {textStrings = extractor.Extract(page);}

              Link link = (Link)annotation;
              RectangleF linkBox = link.Box;

              // Text.
              /*
                Extracting text superimposed by the link...
                NOTE: As links have no strong relation to page text but a weak location correspondence,
                we have to filter extracted text by link area.
              */
              StringBuilder linkTextBuilder = new StringBuilder();
              foreach(ITextString linkTextString in extractor.Filter(textStrings,linkBox))
              {linkTextBuilder.Append(linkTextString.Text);}
              Console.WriteLine("Link '" + linkTextBuilder + "' ");

              // Position.
              Console.WriteLine(
                "    Position: "
                  + "x:" + Math.Round(linkBox.X) + ","
                  + "y:" + Math.Round(linkBox.Y) + ","
                  + "w:" + Math.Round(linkBox.Width) + ","
                  + "h:" + Math.Round(linkBox.Height)
                  );

              // Target.
              Console.Write("    Target: ");
              PdfObjectWrapper target = link.Target;
              if(target is Destination)
              {PrintDestination((Destination)target);}
              else if(target is actions::Action)
              {PrintAction((actions::Action)target);}
              else if(target == null)
              {Console.WriteLine("[not available]");}
              else
              {Console.WriteLine("[unknown type: " + target.GetType().Name + "]");}
            }
              }
              if(!linkFound)
              {
            Console.WriteLine("No links here.");
            continue;
              }
            }
              }
        }