TextExtractor.FindNext C# (CSharp) 코드 예제들

예제 #1

0

파일 보기

파일: Default.aspx.cs 프로젝트: remlex/ByteScout-SDK-SourceCode

        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath("words-with-hyphens.pdf");

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            Response.Clear();
            Response.ContentType = "text/html";

            Rectangle location;
            int       pageIndex;

            Response.Write("Searching for \"hyphen\" string:<br><br>");

            // Search for "hyphen" string
            if (extractor.Find(0, "hyphen", false))
            {
                do
                {
                    Response.Write("Found at location " + extractor.FoundText.Bounds.ToString() + "<br>");
                } while (extractor.FindNext());
            }

            Response.End();
        }

예제 #2

0

파일 보기

파일: Program.cs 프로젝트: repohoarder/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("words-with-hyphens.pdf");

            int pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                // Search each page for "hyphen" string
                if (extractor.Find(i, "hyphen", false))
                {
                    do
                    {
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                    }while (extractor.FindNext());
                }
            }

            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }

예제 #3

0

파일 보기

파일: MainWindow.xaml.cs 프로젝트: remlex/ByteScout-SDK-SourceCode

        private void Button_Find(object sender, RoutedEventArgs e)
        {
            if (textBoxFind.Text.Length > 0)
            {
                StringBuilder builder = new StringBuilder();

                builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");

                if (extractor.Find(0, textBoxFind.Text, false))
                {
                    do
                    {
                        builder.AppendLine("");
                        builder.AppendLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        builder.AppendLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            builder.AppendLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            builder.AppendLine("Text: " + element.Text);
                            builder.AppendLine("Font is bold: " + element.FontIsBold);
                            builder.AppendLine("Font is italic:" + element.FontIsItalic);
                            builder.AppendLine("Font name: " + element.FontName);
                            builder.AppendLine("Font size:" + element.FontSize);
                            builder.AppendLine("Font color:" + element.FontColor);
                        }
                    }while (extractor.FindNext());
                }

                builder.AppendLine("Finished.");

                textBox1.Text = builder.ToString();
            }
        }

예제 #4

0

파일 보기

파일: Program.cs 프로젝트: jboddiford/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\Invoice.pdf");

            extractor.RegexSearch = true; // Enable the regular expressions

            int pageCount = extractor.GetPageCount();

            // Search through pages
            for (int i = 0; i < pageCount; i++)
            {
                // Search dates in format 12/31/1999
                string regexPattern = "[0-9]{2}/[0-9]{2}/[0-9]{4}";
                // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx

                // Search each page for the pattern
                if (extractor.Find(i, regexPattern, false))
                {
                    do
                    {
                        Console.WriteLine("");
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds);
                        Console.WriteLine("");

                        // Iterate through each element in the found text
                        foreach (ISearchResultElement element in extractor.FoundText.Elements)
                        {
                            Console.WriteLine("   Text: " + element.Text);
                            Console.WriteLine("   Font is bold: " + element.FontIsBold);
                            Console.WriteLine("   Font is italic: " + element.FontIsItalic);
                            Console.WriteLine("   Font name: " + element.FontName);
                            Console.WriteLine("   Font size: " + element.FontSize);
                            Console.WriteLine("   Font color: " + element.FontColor);
                            Console.WriteLine();
                        }
                    }while (extractor.FindNext());
                }
            }

            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }

예제 #5

0

파일 보기

파일: Program.cs 프로젝트: babylon3389/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            const string inputFile           = @"sample.pdf";
            const int    pageIndex           = 0;
            const float  renderingResolution = 300f;
            const string searchPattern       = "\\d+\\.\\d+";

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                textExtractor.RegexSearch = true;
                textExtractor.LoadDocumentFromFile(inputFile);

                // Preapre RasterRenderer
                using (RasterRenderer rasterRenderer = new RasterRenderer("demo", "demo"))
                {
                    rasterRenderer.LoadDocumentFromFile(inputFile);

                    // Render document page to image
                    Image image = rasterRenderer.GetImage(pageIndex, renderingResolution);

                    // Prepare highlight brush
                    Brush highlightBrush = new SolidBrush(Color.FromArgb(128, Color.Yellow));

                    using (Graphics graphics = Graphics.FromImage(image))
                    {
                        // Search for pattern and paint found piecese
                        if (textExtractor.Find(pageIndex, searchPattern, caseSensitive: false))
                        {
                            do
                            {
                                foreach (var foundPiece in textExtractor.FoundText.Elements)
                                {
                                    // Convert from document Points to pixels
                                    Rectangle pixelRect = new Rectangle(
                                        (int)(foundPiece.Bounds.Left / 72f * renderingResolution),
                                        (int)(foundPiece.Bounds.Top / 72f * renderingResolution),
                                        (int)(foundPiece.Bounds.Width / 72f * renderingResolution),
                                        (int)(foundPiece.Bounds.Height / 72f * renderingResolution)
                                        );

                                    // Paint rectangle
                                    graphics.FillRectangle(highlightBrush, pixelRect);
                                }
                            } while (textExtractor.FindNext());
                        }
                    }

                    image.Save("result.png");
                    Process.Start("result.png");
                }
            }
        }

예제 #6

0

파일 보기

파일: Default.aspx.cs 프로젝트: repohoarder/ByteScout-SDK-SourceCode

        /*
         * IF YOU SEE TEMPORARY FOLDER ACCESS ERRORS:
         *
         * Temporary folder access is required for web application when you use ByteScout SDK in it.
         * If you are getting errors related to the access to temporary folder like "Access to the path 'C:\Windows\TEMP\... is denied" then you need to add permission for this temporary folder to make ByteScout SDK working on that machine and IIS configuration because ByteScout SDK requires access to temp folder to cache some of its data for more efficient work.
         *
         * SOLUTION:
         *
         * If your IIS Application Pool has "Load User Profile" option enabled the IIS provides access to user's temp folder. Check user's temporary folder
         *
         * If you are running Web Application under an impersonated account or IIS_IUSRS group, IIS may redirect all requests into separate temp folder like "c:\temp\".
         *
         * In this case
         * - check the User or User Group your web application is running under
         * - then add permissions for this User or User Group to read and write into that temp folder (c:\temp or c:\windows\temp\ folder)
         * - restart your web application and try again
         *
         */

        protected void Page_Load(object sender, EventArgs e)
        {
            String inputFile = Server.MapPath(@".\bin\sample1.pdf");

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            // Set the matching mode.
            // WordMatchingMode.None - treats the search string as substring
            // WordMatchingMode.ExactMatch - treats the search string as separate word
            // WordMatchingMode.SmartMatch - will find the word in various forms (like Adobe Reader).
            extractor.WordMatchingMode = WordMatchingMode.ExactMatch;

            Response.Clear();
            Response.ContentType = "text/html";

            Response.Write("Searching for \"ipsum\" string:<br>");

            // Search for "ipsum" string
            if (extractor.Find(0, "ipsum", false))
            {
                do
                {
                    Response.Write("<br/>");
                    Response.Write("Found on page 1 at location " + extractor.FoundText.Bounds + "<br/>");
                    Response.Write("<br/>");

                    // The found text may be splitted to parts.
                    // Iterate through each part of the found text.
                    for (var i = 0; i < extractor.FoundText.Elements.Count; i++)
                    {
                        ISearchResultElement element = extractor.FoundText.Elements[i];

                        Response.Write("Element #" + i + " at " + element.Bounds + "<br/>");
                        Response.Write("Text: " + element.Text + "<br/>");
                        Response.Write("Font is bold: " + element.FontIsBold + "<br/>");
                        Response.Write("Font is italic:" + element.FontIsItalic + "<br/>");
                        Response.Write("Font name: " + element.FontName + "<br/>");
                        Response.Write("Font size:" + element.FontSize + "<br/>");
                        Response.Write("Font color:" + element.FontColor + "<br/>");
                    }
                }while (extractor.FindNext());
            }

            Response.End();
        }

예제 #7

0

파일 보기

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\sample1.pdf");

            // Set the matching mode.
            // WordMatchingMode.None - treats the search string as substring
            // WordMatchingMode.ExactMatch - treats the search string as separate word
            // WordMatchingMode.SmartMatch - will find the word in various forms (like Adobe Reader).
            extractor.WordMatchingMode = WordMatchingMode.ExactMatch;

            int pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                // Search each page for "ipsum" string
                if (extractor.Find(i, "ipsum", false))
                {
                    do
                    {
                        Console.WriteLine("");
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        Console.WriteLine("");
                        // Iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            Console.WriteLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            Console.WriteLine("Text: " + element.Text);
                            Console.WriteLine("Font is bold: " + element.FontIsBold);
                            Console.WriteLine("Font is italic:" + element.FontIsItalic);
                            Console.WriteLine("Font name: " + element.FontName);
                            Console.WriteLine("Font size:" + element.FontSize);
                            Console.WriteLine("Font color:" + element.FontColor);
                        }
                    }while (extractor.FindNext());
                }
            }

            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }

예제 #8

0

파일 보기

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample1.pdf");


            int pageCount = extractor.GetPageCount();

            extractor.RegexSearch = true; //  ' turn on the regular expression search

            // search through pages
            for (int i = 0; i < pageCount; i++)
            {
                // searches for the text starting from LABORIS and ending with VELIT words
                string regexPattern = "LABORIS.*VELIT";
                // see the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx

                // Search each page for the pattern
                if (extractor.Find(i, regexPattern, false))
                {
                    do
                    {
                        Console.WriteLine("");
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        Console.WriteLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            Console.WriteLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            Console.WriteLine("Text: " + element.Text);
                            Console.WriteLine("Font is bold: " + element.FontIsBold);
                            Console.WriteLine("Font is italic:" + element.FontIsItalic);
                            Console.WriteLine("Font name: " + element.FontName);
                            Console.WriteLine("Font size:" + element.FontSize);
                            Console.WriteLine("Font color:" + element.FontColor);
                        }
                    }while (extractor.FindNext());
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }

예제 #9

0

파일 보기

        static void Main(string[] args)
        {
            try
            {
                // Create Bytescout.PDFExtractor.TextExtractor instance
                using (TextExtractor extractor = new TextExtractor())
                {
                    extractor.RegistrationName = "demo";
                    extractor.RegistrationKey  = "demo";

                    // Load sample PDF document
                    extractor.LoadDocumentFromFile("SampleInvoice.pdf");

                    extractor.RegexSearch = true; // Enable the regular expressions

                    int pageCount = extractor.GetPageCount();

                    // Search through pages
                    for (int i = 0; i < pageCount; i++)
                    {
                        // Search credit card number in format of (XXXX XXXX XXXX XXXX)
                        string regexPattern = @"[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}";
                        // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx

                        // Search each page for the pattern
                        if (extractor.Find(i, regexPattern, false))
                        {
                            do
                            {
                                // Iterate through each element in the found text
                                foreach (ISearchResultElement element in extractor.FoundText.Elements)
                                {
                                    Console.WriteLine("Found Credit Card Number: " + element.Text);
                                }
                            }while (extractor.FindNext());
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error: " + ex.Message);
            }

            Console.WriteLine();
            Console.WriteLine("Press enter key to continue...");
            Console.ReadLine();
        }

예제 #10

0

파일 보기

        static void Main(string[] args)
        {
            const string inputFile     = @"sample.pdf";
            const int    pageIndex     = 0;
            const string searchPattern = "\\d+\\.\\d+";

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                textExtractor.RegexSearch = true;
                textExtractor.LoadDocumentFromFile(inputFile);

                // Load document with PDF SDK
                using (Document pdfDocument = new Document(inputFile))
                {
                    pdfDocument.RegistrationName = "demo";
                    pdfDocument.RegistrationKey  = "demo";

                    Page   pdfDocumentPage = pdfDocument.Pages[pageIndex];
                    Canvas canvas          = pdfDocumentPage.Canvas;

                    SolidBrush fillBrush = new SolidBrush(new ColorRGB(255, 0, 0));
                    fillBrush.Opacity = 50;                     // make the brush transparent

                    // Search for pattern and highlight found pieces
                    if (textExtractor.Find(pageIndex, searchPattern, caseSensitive: false))
                    {
                        do
                        {
                            foreach (var foundPiece in textExtractor.FoundText.Elements)
                            {
                                // Inflate the rectangle a bit
                                RectangleF rect = RectangleF.Inflate(foundPiece.Bounds, 1, 2);
                                // Draw rectangle over the PDF page
                                canvas.DrawRectangle(fillBrush, rect);
                            }
                        } while (textExtractor.FindNext());
                    }

                    // Save as new PDF document
                    pdfDocument.Save("result.pdf");

                    // Open result document in default associated application (for demo purposes)
                    Process.Start("result.pdf");
                }
            }
        }

예제 #11

0

파일 보기

        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath("sample1.pdf");

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);

            Response.Clear();
            Response.ContentType = "text/html";

            Rectangle location;
            int       pageIndex;

            Response.Write("Searching for \"ipsum\" string:<br><br>");

            // Search for "ipsum" string
            if (extractor.Find(0, "ipsum", false))
            {
                do
                {
                    Response.Write("<br/>");
                    Response.Write("Found on page 1 at location " + extractor.FoundText.Bounds.ToString() + "<br/>");
                    Response.Write("<br/>");
                    // iterate through each element in the found text
                    foreach (SearchResultElement element in extractor.FoundText.Elements)
                    {
                        Response.Write("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height + "<br/>");
                        Response.Write("Text: " + element.Text + "<br/>");
                        Response.Write("Font is bold: " + element.FontIsBold + "<br/>");
                        Response.Write("Font is italic:" + element.FontIsItalic + "<br/>");
                        Response.Write("Font name: " + element.FontName + "<br/>");
                        Response.Write("Font size:" + element.FontSize + "<br/>");
                        Response.Write("Font color:" + element.FontColor + "<br/>");
                    }
                }while (extractor.FindNext());
            }

            Response.End();
        }

예제 #12

0

파일 보기

파일: Program.cs 프로젝트: remlex/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample1.pdf");

            int pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                // Search each page for "ipsum" string
                if (extractor.Find(i, "ipsum", false))
                {
                    do
                    {
                        Console.WriteLine("");
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        Console.WriteLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            Console.WriteLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            Console.WriteLine("Text: " + element.Text);
                            Console.WriteLine("Font is bold: " + element.FontIsBold);
                            Console.WriteLine("Font is italic:" + element.FontIsItalic);
                            Console.WriteLine("Font name: " + element.FontName);
                            Console.WriteLine("Font size:" + element.FontSize);
                            Console.WriteLine("Font color:" + element.FontColor);
                        }
                    }while (extractor.FindNext());
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }

예제 #13

0

파일 보기

        static void Main(string[] args)
        {
            TextExtractor extractor = new TextExtractor("demo", "demo");

            // Load the document
            extractor.LoadDocumentFromFile("sample2.pdf");

            // Smart match the search string like Adobe Reader
            extractor.WordMatchingMode = WordMatchingMode.SmartMatch;

            string searchString = "land";

            // Get page count
            int pageCount = extractor.GetPageCount();

            // Iterate through pages
            for (int i = 0; i < pageCount; i++)
            {
                // Search for text string
                if (extractor.Find(i, searchString, false))
                {
                    do
                    {
                        // Output search results
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());

                        // Now we are getting the found text
                        string extractedString = extractor.FoundText.Text;
                        Console.WriteLine("Found text: " + extractedString);
                    }while (extractor.FindNext()); // Search next occurrence of the search string
                }
            }

            // Cleanup
            extractor.Dispose();


            Console.WriteLine();
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }

예제 #14

0

파일 보기

파일: MainWindow.xaml.cs 프로젝트: remlex/ByteScout-SDK-SourceCode

        private void Button_Find(object sender, RoutedEventArgs e)
        {
            if (textBoxFind.Text.Length > 0)
            {
                StringBuilder builder = new StringBuilder();

                builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");

                if (extractor.Find(0, textBoxFind.Text, false))
                {
                    do
                    {
                        builder.AppendLine("Found on page 0 at location " + extractor.FoundText.Location.ToString());
                    }while (extractor.FindNext());
                }

                builder.AppendLine("Finished.");

                textBox1.Text = builder.ToString();
            }
        }

예제 #15

0

파일 보기

        static void Main(string[] args)
        {
            TextExtractor extractor = new TextExtractor("demo", "demo");

            // load the document
            extractor.LoadDocumentFromFile("sample2.pdf");

            string searchString = "what";

            // get page count
            int pageCount = extractor.GetPageCount();
            int count     = 0;

            // iterate through pages
            for (int i = 0; i < pageCount; i++)
            {
                // search for text string
                if (extractor.Find(i, searchString, false))
                {
                    do
                    {
                        count++;

                        // output search results
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());

                        // now we are getting the found text
                        string extractedString = extractor.FoundText.Text;
                        Console.WriteLine("Extracted string: " + extractedString);
                    }while (extractor.FindNext()); // search next occurance of the search string
                }
            }



            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }

C# (CSharp) TextExtractor.FindNext 예제들