static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\net2.00\tessdata"; // Load sample PDF document extractor.LoadDocumentFromFile("sample_ocr.pdf"); // Apply predefined profiles extractor.Profiles = "scanned, no-layout"; // Extract text to file extractor.SaveTextToFile("result1.txt"); extractor.Reset(); // Load another document extractor.LoadDocumentFromFile("sample_ocr.pdf"); // Load and apply custom profiles extractor.LoadProfiles("profiles.json"); extractor.Profiles = "keep-formatting, ocr-forced-200dpi"; // Extract text to file extractor.SaveTextToFile("result2.txt"); extractor.Dispose(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample scanned document extractor.LoadDocumentFromFile("InvoiceWithNoise.png"); // Enable Optical Character Recognition (OCR) // in .Auto mode (SDK automatically checks if needs to use OCR or not) extractor.OCRMode = OCRMode.Auto; // Set the location of OCR language data files extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"; // Set OCR language extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata // Set PDF document rendering resolution extractor.OCRResolution = 300; // Add profiles to fix issues with date. // To deal with wrong V in dates you can use a regular expression. The following will replace only V characters which are located between numbers: extractor.LoadProfiles("profiles.json"); extractor.Profiles = "ocr-dateIssue"; // Save extracted text to file extractor.SaveTextToFile("output.txt"); // Cleanup extractor.Dispose(); // Open result document in default associated application (for demo purpose) ProcessStartInfo processStartInfo = new ProcessStartInfo("output.txt"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }