Exemplo n.º 1
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName      = "demo";
            extractor.RegistrationKey       = "demo";
            extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\net2.00\tessdata";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample_ocr.pdf");

            // Apply predefined profiles
            extractor.Profiles = "scanned, no-layout";
            // Extract text to file
            extractor.SaveTextToFile("result1.txt");


            extractor.Reset();


            // Load another document
            extractor.LoadDocumentFromFile("sample_ocr.pdf");

            // Load and apply custom profiles
            extractor.LoadProfiles("profiles.json");
            extractor.Profiles = "keep-formatting, ocr-forced-200dpi";
            // Extract text to file
            extractor.SaveTextToFile("result2.txt");


            extractor.Dispose();
        }
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();

            extractor.RegistrationName = "demo";
            extractor.RegistrationKey  = "demo";

            // Load sample scanned document
            extractor.LoadDocumentFromFile("InvoiceWithNoise.png");

            // Enable Optical Character Recognition (OCR)
            // in .Auto mode (SDK automatically checks if needs to use OCR or not)
            extractor.OCRMode = OCRMode.Auto;

            // Set the location of OCR language data files
            extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

            // Set OCR language
            extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
            // Find more language files at https://github.com/bytescout/ocrdata

            // Set PDF document rendering resolution
            extractor.OCRResolution = 300;

            // Add profiles to fix issues with date.
            // To deal with wrong V in dates you can use a regular expression. The following will replace only V characters which are located between numbers:
            extractor.LoadProfiles("profiles.json");
            extractor.Profiles = "ocr-dateIssue";

            // Save extracted text to file
            extractor.SaveTextToFile("output.txt");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.txt");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }