Ejemplo n.º 1
0
        private static void ThreadProc(object stateInfo)
        {
            int threadIndex             = (int)((object[])stateInfo)[0];
            ManualResetEvent doneEvent  = (ManualResetEvent)((object[])stateInfo)[1];
            string           inputFile  = (string)((object[])stateInfo)[2];
            string           outputFile = (string)((object[])stateInfo)[3];
            int startPage = (int)((object[])stateInfo)[4];
            int endPage   = (int)((object[])stateInfo)[5];

            try
            {
                Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage);

                Stopwatch stopwatch = Stopwatch.StartNew();

                // Extract a piece of document
                string chunk = string.Format("temp-{0}-{1}", startPage, endPage);
                using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
                    splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1);

                // Process the piece
                using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo"))
                {
                    searchablePdfMaker.OCRDetectPageRotation = true;
                    searchablePdfMaker.OCRLanguageDataFolder = @"C:\Program Files\Bytescout PDF Extractor SDK\net4.00\tessdata";
                    searchablePdfMaker.LoadDocumentFromFile(chunk);

                    // 300 DPI resolution is recommended.
                    // Using of higher values will slow down the processing but does not guarantee the higher quality.
                    searchablePdfMaker.OCRResolution = 300;

                    searchablePdfMaker.MakePDFSearchable(outputFile);
                }

                File.Delete(chunk);

                Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed);
            }
            finally
            {
                // Signal the thread is finished
                doneEvent.Set();

                // Release semaphore
                ThreadLimiter.Release();
            }
        }
Ejemplo n.º 2
0
        private static void ThreadProc(object stateInfo)
        {
            int threadIndex             = (int)((object[])stateInfo)[0];
            ManualResetEvent doneEvent  = (ManualResetEvent)((object[])stateInfo)[1];
            string           inputFile  = (string)((object[])stateInfo)[2];
            string           outputFile = (string)((object[])stateInfo)[3];
            int startPage = (int)((object[])stateInfo)[4];
            int endPage   = (int)((object[])stateInfo)[5];

            try
            {
                Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage);

                Stopwatch stopwatch = Stopwatch.StartNew();

                // Extract a piece of document
                string chunk = string.Format("temp-{0}-{1}", startPage, endPage);
                using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
                    splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1);

                /*
                 * By default, "SearchablePDFMaker" uses one of the standard PDF fonts to apply
                 * recognized text over the scanned document. Such fonts contain only basic characters
                 * from ISO-8859-1 charset.
                 * If you run OCR for one of the languages with characters that are not present in the default
                 * encoding, you should explicitly specify the font that contains the required characters
                 * using ".LabelingFont" property.
                 * If you run the application in Windows with a selected locale that matches OCR language,
                 * it will be enough to specify the usual font "Arial". But if your app will run in an unknown
                 * environment (for example, in some virtual machine) you will need to install some full Unicode
                 * font (e.g. "Arial Unicode MS") and then use it with SearchablePDFMaker:
                 *
                 * //searchablePDFMaker.LabelingFont = "Arial Unicode MS";
                 */
                // Process the piece
                using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo"))
                {
                    searchablePdfMaker.OCRDetectPageRotation = true;
                    searchablePdfMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";
                    searchablePdfMaker.LoadDocumentFromFile(chunk);

                    // 300 DPI resolution is recommended.
                    // Using of higher values will slow down the processing but does not guarantee the higher quality.
                    searchablePdfMaker.OCRResolution = 300;

                    searchablePdfMaker.MakePDFSearchable(outputFile);
                }

                File.Delete(chunk);

                Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed);
            }
            finally
            {
                // Signal the thread is finished
                doneEvent.Set();

                // Release semaphore
                ThreadLimiter.Release();
            }
        }