static void Main(string[] args) { string inputDocument = @".\areas-sample.pdf"; int pageIndex = 0; string outputDocument = @".\result.txt"; // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo")) { try { // Load document (image or PDF) textRecognizer.LoadDocument(inputDocument); // Set location of "tessdata" folder containing language data files textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\tessdata\"; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "tessdata" folder // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00 textRecognizer.OCRLanguage = "eng"; // Get page size (in pixels). Size of PDF document is computed from PDF Points // and the rendering resolution specified by `textRecognizer.PDFRenderingResolution` (default 300 DPI) Size pageSize = textRecognizer.GetPageSize(pageIndex); // Add area of interest as a rectangle at the top-right corner of the page textRecognizer.RecognitionAreas.Add(pageSize.Width / 2, 0, pageSize.Width / 2, 300); // Add area of interest as a rectangle at the bottom-left corner of the page, // and indicate it should be rotated at 90 deg textRecognizer.RecognitionAreas.Add(0, pageSize.Height / 2, 300, pageSize.Height / 2, AreaRotation.Rotate90FlipNone); // Now, you can get recognized text for further analysis as a list of objects // containing coordinates, object kind, confidence. OCRObjectList ocrObjectList = textRecognizer.GetOCRObjects(pageIndex); foreach (OCRObject ocrObject in ocrObjectList) { Console.WriteLine(ocrObject.ToString()); } // ... or you can save recognized text pieces to file textRecognizer.KeepTextFormatting = false; // save without formatting textRecognizer.SaveText(outputDocument, pageIndex, pageIndex); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); }
static void Main(string[] args) { string inputDocument = @".\bad-quality.png"; string outputDocument = @".\result.txt"; // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo")) { try { // Load document (image or PDF) textRecognizer.LoadDocument(inputDocument); // Set the location of OCR language data files textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_fast\"; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata textRecognizer.OCRLanguage = "eng"; // Add error corrections that will be applied after the recognition. textRecognizer.Corrections.Add("Tut ", "Test "); textRecognizer.Corrections.Add("Recog\\w{1,}on", "Recognition", true); // Recognize text from all pages and save it to file textRecognizer.SaveText(outputDocument); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); }
static void Main(string[] args) { string outputDocument = @".\result.txt"; // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo")) { try { // Create ScreenshotMaker instance ScreenshotMaker screenshotMaker = new ScreenshotMaker(); // Set rectangle to take screenshot from screenshotMaker.SetScreenshotArea(0, 0, 200, 200); // Load screenshot textRecognizer.LoadDocument(screenshotMaker); // Set the location of OCR language data files textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\"; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata textRecognizer.OCRLanguage = "eng"; // Recognize text from all pages and save it to file textRecognizer.SaveText(outputDocument); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); }
static void Main(string[] args) { string inputUrl = @"https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/image-to-pdf/image1.png"; string outputDocument = @".\result.txt"; // Get stream from input url var inputStream = GetStreamFromUrl(inputUrl); // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo")) { try { // Load document (image or PDF) textRecognizer.LoadDocument(inputStream); // Set the location of OCR language data files textRecognizer.OCRLanguageDataFolder = @"C:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\"; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata textRecognizer.OCRLanguage = "eng"; // Recognize text from all pages and save it to file textRecognizer.SaveText(outputDocument); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); }
static void Main(string[] args) { string inputDocument = @".\invoice-sample.png"; string outputDocument = @".\result.txt"; // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo")) { try { // Load document (image or PDF) textRecognizer.LoadDocument(inputDocument); // Set location of "tessdata" folder containing language data files textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\tessdata\"; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "tessdata" folder // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00 textRecognizer.OCRLanguage = "eng"; // Recognize text from all pages and save it to file textRecognizer.SaveText(outputDocument); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); }
static void Main(string[] args) { string inputDocument = @".\skewed.png"; string outputDocument = @".\result.txt"; // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo")) { try { // Load document (image or PDF) textRecognizer.LoadDocument(inputDocument); // Set the location of OCR language data files textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\"; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata textRecognizer.OCRLanguage = "eng"; // Add deskew filter that automatically rotates the image to make the text horizontal. // Note, it analyzes the left edge of scanned text. Any dark artifacts may prevent // the correct angle detection. textRecognizer.ImagePreprocessingFilters.AddDeskew(); // Other filters that may be useful to improve recognition // (note, the filters are applied in the order they were added): // Improve image contrast. //textRecognizer.ImagePreprocessingFilters.AddContrast(); // Apply gamma correction. //textRecognizer.ImagePreprocessingFilters.AddGammaCorrection(); // Apply median filter. Helps to remove noise. //textRecognizer.ImagePreprocessingFilters.AddMedian(); // Apply dilate filter. Helps to cure symbols erosion. //textRecognizer.ImagePreprocessingFilters.AddDilate(); // Lines removers. Removing borders of some tables may improve the recognition. //textRecognizer.ImagePreprocessingFilters.AddHorizontalLinesRemover(); //textRecognizer.ImagePreprocessingFilters.AddVerticalLinesRemover(); // Recognize text from all pages and save it to file textRecognizer.SaveText(outputDocument); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } // Console.WriteLine(); // Console.WriteLine("Press any key..."); // Console.ReadKey(); }