private static void ThreadProc(object stateInfo) { int threadIndex = (int)((object[])stateInfo)[0]; ManualResetEvent doneEvent = (ManualResetEvent)((object[])stateInfo)[1]; string inputFile = (string)((object[])stateInfo)[2]; string outputFile = (string)((object[])stateInfo)[3]; int startPage = (int)((object[])stateInfo)[4]; int endPage = (int)((object[])stateInfo)[5]; try { Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage); Stopwatch stopwatch = Stopwatch.StartNew(); // Extract a piece of document string chunk = string.Format("temp-{0}-{1}", startPage, endPage); using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo")) splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1); // Process the piece using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo")) { searchablePdfMaker.OCRDetectPageRotation = true; searchablePdfMaker.OCRLanguageDataFolder = @"C:\Program Files\Bytescout PDF Extractor SDK\net4.00\tessdata"; searchablePdfMaker.LoadDocumentFromFile(chunk); // 300 DPI resolution is recommended. // Using of higher values will slow down the processing but does not guarantee the higher quality. searchablePdfMaker.OCRResolution = 300; searchablePdfMaker.MakePDFSearchable(outputFile); } File.Delete(chunk); Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed); } finally { // Signal the thread is finished doneEvent.Set(); // Release semaphore ThreadLimiter.Release(); } }
private static void ThreadProc(object stateInfo) { int threadIndex = (int)((object[])stateInfo)[0]; ManualResetEvent doneEvent = (ManualResetEvent)((object[])stateInfo)[1]; string inputFile = (string)((object[])stateInfo)[2]; string outputFile = (string)((object[])stateInfo)[3]; int startPage = (int)((object[])stateInfo)[4]; int endPage = (int)((object[])stateInfo)[5]; try { Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage); Stopwatch stopwatch = Stopwatch.StartNew(); // Extract a piece of document string chunk = string.Format("temp-{0}-{1}", startPage, endPage); using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo")) splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1); /* * By default, "SearchablePDFMaker" uses one of the standard PDF fonts to apply * recognized text over the scanned document. Such fonts contain only basic characters * from ISO-8859-1 charset. * If you run OCR for one of the languages with characters that are not present in the default * encoding, you should explicitly specify the font that contains the required characters * using ".LabelingFont" property. * If you run the application in Windows with a selected locale that matches OCR language, * it will be enough to specify the usual font "Arial". But if your app will run in an unknown * environment (for example, in some virtual machine) you will need to install some full Unicode * font (e.g. "Arial Unicode MS") and then use it with SearchablePDFMaker: * * //searchablePDFMaker.LabelingFont = "Arial Unicode MS"; */ // Process the piece using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo")) { searchablePdfMaker.OCRDetectPageRotation = true; searchablePdfMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"; searchablePdfMaker.LoadDocumentFromFile(chunk); // 300 DPI resolution is recommended. // Using of higher values will slow down the processing but does not guarantee the higher quality. searchablePdfMaker.OCRResolution = 300; searchablePdfMaker.MakePDFSearchable(outputFile); } File.Delete(chunk); Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed); } finally { // Signal the thread is finished doneEvent.Set(); // Release semaphore ThreadLimiter.Release(); } }