private static void ProcessWithPage(object st) { OCRPage p = ProcessWithPageAsync((InternalOCRProcessorStatus)st).Result; return; }
public void AddPage(OCRPage page) { _Pages.Add(page); }
public static async Task <OCRResult> GetTextFromFileAsync(string filePath, OctarineEngine.IEngine engine, OCRStatus status, int pageFirst = 0, int pageLast = 0) { var result = new OCRResult(filePath); try { var file = await StorageFile.GetFileFromPathAsync(filePath); if (Path.GetExtension(filePath).ToLower() == ".pdf") { var pdfDoc = await PdfDocument.LoadFromFileAsync(file); if (pageFirst <= 0) { pageFirst = 1; } if (pageLast <= 0 || pageLast > pdfDoc.PageCount) { pageLast = (int)pdfDoc.PageCount; } uint pagesToRecognize = (uint)(pageLast - pageFirst + 1); status.PageCount = pagesToRecognize; Stream[] streams = new Stream[pagesToRecognize]; int pagesRunning = 0; int numThreads = Environment.ProcessorCount; for (int p = pageFirst - 1; p < pageLast; p++) { while (pagesRunning > numThreads) { await Task.Delay(10); } using (PdfPage pdfPage = pdfDoc.GetPage((uint)p)) { int i = (int)p - pageFirst + 1; _ = Task.Run(async() => { ++pagesRunning; var stream = new InMemoryRandomAccessStream(); await pdfPage.RenderToStreamAsync(stream); streams[i] = stream.AsStream(); --pagesRunning; }); } await Task.Delay(10); } while (pagesRunning > 0) { await Task.Delay(200); } OCRPage[] pages = new OCRPage[status.PageCount]; for (int i = pageFirst - 1; i < pageLast; i++) { if (status.Error != OctarineError.Success) { break; } while (status.ActiveWorkers > numThreads) { await Task.Delay(200); } var st = new InternalOCRProcessorStatus(engine, status, streams[i - pageFirst + 1], i - pageFirst + 1, pages); _ = Task.Run(() => ProcessWithPageAsync(st)); await Task.Delay(250); } if (status.Error != OctarineError.Success) { return(null); } while (status.ActiveWorkers > 0) { await Task.Delay(200); } if (status.Error != OctarineError.Success) { return(null); } foreach (OCRPage page in pages) { result.AddPage(page); } return(result); } else { var stream = await file.OpenAsync(FileAccessMode.Read); (OCRPage page, OctarineError error, string errorMessage) = await engine.GetTextFromStreamAsync(stream.AsStream()); if (page == null) { status.Error = error; status.ErrorMessage = errorMessage; return(null); } if (status.OCRCancellationToken.IsCancellationRequested) { status.Error = OctarineError.CancellationRequested; return(null); } result.AddPage(page); return(result); } } catch (Exception ex) { status.Error = OctarineError.WrongFileFormat; status.ErrorMessage = ex.Message; return(null); } }