TextRecognizer.LoadDocument C# (CSharp) Code-Beispiele

Beispiel #1

0

Datei anzeigen

Datei: Program.cs Projekt: wushian/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            string inputDocument  = @".\areas-sample.pdf";
            int    pageIndex      = 0;
            string outputDocument = @".\result.txt";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Load document (image or PDF)
                    textRecognizer.LoadDocument(inputDocument);

                    // Set location of "tessdata" folder containing language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\tessdata\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "tessdata" folder
                    // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00
                    textRecognizer.OCRLanguage = "eng";

                    // Get page size (in pixels). Size of PDF document is computed from PDF Points
                    // and the rendering resolution specified by `textRecognizer.PDFRenderingResolution` (default 300 DPI)
                    Size pageSize = textRecognizer.GetPageSize(pageIndex);

                    // Add area of interest as a rectangle at the top-right corner of the page
                    textRecognizer.RecognitionAreas.Add(pageSize.Width / 2, 0, pageSize.Width / 2, 300);
                    // Add area of interest as a rectangle at the bottom-left corner of the page,
                    // and indicate it should be rotated at 90 deg
                    textRecognizer.RecognitionAreas.Add(0, pageSize.Height / 2, 300, pageSize.Height / 2, AreaRotation.Rotate90FlipNone);

                    // Now, you can get recognized text for further analysis as a list of objects
                    // containing coordinates, object kind, confidence.
                    OCRObjectList ocrObjectList = textRecognizer.GetOCRObjects(pageIndex);
                    foreach (OCRObject ocrObject in ocrObjectList)
                    {
                        Console.WriteLine(ocrObject.ToString());
                    }

                    // ... or you can save recognized text pieces to file
                    textRecognizer.KeepTextFormatting = false; // save without formatting
                    textRecognizer.SaveText(outputDocument, pageIndex, pageIndex);


                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Beispiel #2

0

Datei anzeigen

Datei: Program.cs Projekt: jboddiford/ByteScout-SDK-SourceCode

        static void Main(string[] args)
        {
            string inputDocument  = @".\bad-quality.png";
            string outputDocument = @".\result.txt";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Load document (image or PDF)
                    textRecognizer.LoadDocument(inputDocument);

                    // Set the location of OCR language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_fast\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
                    // Find more language files at https://github.com/bytescout/ocrdata
                    textRecognizer.OCRLanguage = "eng";


                    // Add error corrections that will be applied after the recognition.
                    textRecognizer.Corrections.Add("Tut ", "Test ");
                    textRecognizer.Corrections.Add("Recog\\w{1,}on", "Recognition", true);


                    // Recognize text from all pages and save it to file
                    textRecognizer.SaveText(outputDocument);

                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Beispiel #3

0

Datei anzeigen

Datei: Program.cs Projekt: bytescout/text-recognition-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            string outputDocument = @".\result.txt";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Create ScreenshotMaker instance
                    ScreenshotMaker screenshotMaker = new ScreenshotMaker();
                    // Set rectangle to take screenshot from
                    screenshotMaker.SetScreenshotArea(0, 0, 200, 200);

                    // Load screenshot
                    textRecognizer.LoadDocument(screenshotMaker);

                    // Set the location of OCR language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
                    // Find more language files at https://github.com/bytescout/ocrdata
                    textRecognizer.OCRLanguage = "eng";

                    // Recognize text from all pages and save it to file
                    textRecognizer.SaveText(outputDocument);

                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Beispiel #4

0

Datei anzeigen

        static void Main(string[] args)
        {
            string inputUrl       = @"https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/image-to-pdf/image1.png";
            string outputDocument = @".\result.txt";

            // Get stream from input url
            var inputStream = GetStreamFromUrl(inputUrl);

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Load document (image or PDF)
                    textRecognizer.LoadDocument(inputStream);

                    // Set the location of OCR language data files
                    textRecognizer.OCRLanguageDataFolder = @"C:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
                    // Find more language files at https://github.com/bytescout/ocrdata
                    textRecognizer.OCRLanguage = "eng";

                    // Recognize text from all pages and save it to file
                    textRecognizer.SaveText(outputDocument);

                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Beispiel #5

0

Datei anzeigen

Datei: Program.cs Projekt: bytescout/text-recognition-sdk-samples-c-sharp

        static void Main(string[] args)
        {
            string inputDocument  = @".\ocr-sample.pdf";
            string outputDocument = @".\result.json";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Load document (image or PDF)
                    textRecognizer.LoadDocument(inputDocument);

                    // Set the location of OCR language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
                    // Find more language files at https://github.com/bytescout/ocrdata
                    textRecognizer.OCRLanguage = "eng";

                    // Recognize text from page and save each ocr word object to json
                    textRecognizer.SaveOCRObjectsAsJSON(outputDocument, 0, OCRObjectType.Word);

                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Beispiel #6

0

Datei anzeigen

        static void Main(string[] args)
        {
            string inputDocument  = @".\invoice-sample.png";
            string outputDocument = @".\result.txt";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Load document (image or PDF)
                    textRecognizer.LoadDocument(inputDocument);

                    // Set location of "tessdata" folder containing language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\tessdata\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "tessdata" folder
                    // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00
                    textRecognizer.OCRLanguage = "eng";

                    // Recognize text from all pages and save it to file
                    textRecognizer.SaveText(outputDocument);

                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

            Console.WriteLine();
            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }

Beispiel #7

0

Datei anzeigen

        static void Main(string[] args)
        {
            string inputDocument  = @".\skewed.png";
            string outputDocument = @".\result.txt";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
            {
                try
                {
                    // Load document (image or PDF)
                    textRecognizer.LoadDocument(inputDocument);

                    // Set the location of OCR language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
                    // Find more language files at https://github.com/bytescout/ocrdata
                    textRecognizer.OCRLanguage = "eng";


                    // Add deskew filter that automatically rotates the image to make the text horizontal.
                    // Note, it analyzes the left edge of scanned text. Any dark artifacts may prevent
                    // the correct angle detection.
                    textRecognizer.ImagePreprocessingFilters.AddDeskew();

                    // Other filters that may be useful to improve recognition
                    // (note, the filters are applied in the order they were added):

                    // Improve image contrast.
                    //textRecognizer.ImagePreprocessingFilters.AddContrast();

                    // Apply gamma correction.
                    //textRecognizer.ImagePreprocessingFilters.AddGammaCorrection();

                    // Apply median filter. Helps to remove noise.
                    //textRecognizer.ImagePreprocessingFilters.AddMedian();

                    // Apply dilate filter. Helps to cure symbols erosion.
                    //textRecognizer.ImagePreprocessingFilters.AddDilate();

                    // Lines removers. Removing borders of some tables may improve the recognition.
                    //textRecognizer.ImagePreprocessingFilters.AddHorizontalLinesRemover();
                    //textRecognizer.ImagePreprocessingFilters.AddVerticalLinesRemover();


                    // Recognize text from all pages and save it to file
                    textRecognizer.SaveText(outputDocument);

                    // Open the result file in default associated application (for demo purposes)
                    Process.Start(outputDocument);
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception);
                }
            }

//            Console.WriteLine();
//            Console.WriteLine("Press any key...");
//            Console.ReadKey();
        }

C# (CSharp) TextRecognizer.LoadDocument Beispiele