public InternalOCRProcessorStatus(OctarineEngine.IEngine engine, OCRStatus status, Stream source, int pageNumber, OCRPage[] pages) { Engine = engine; Status = status; Source = source; PageNumber = pageNumber; Pages = pages; Started = false; }
public void PrepareOCR(Stream[] streams, string name = "Plik") { if (updateWorker != null && !updateWorker.IsCompleted) { updateWorkerCTS.Cancel(); try { updateWorker.Wait(); } catch {} updateWorker = null; } LoadingWindow wnd_waiter = new LoadingWindow("Rozpoznawanie"); updateWorkerCTS = new CancellationTokenSource(); updateWorkerCT = updateWorkerCTS.Token; updateWorker = Task.Factory.StartNew(() => { wnd_waiter.SetStatus("Przygotowywanie"); OCRStatus status = new OCRStatus(); status.OCRCancellationToken = updateWorkerCT; status.PageCurrentChanged += (sender, e) => { wnd_waiter.SetStatus($"Rozpoznawanie strony {status.PageCurrent} z {status.PageCount}"); wnd_waiter.SetPercentage((int)(100 * status.PageCurrent / status.PageCount)); }; OCRResult result = OCR.GetTextFromStreamsAsync(streams, this.Engine, status).Result; if (result != null) { wnd.SetResult(name, result); } else if (status.Error != OctarineError.CancellationRequested) { wnd.ShowError(status.Error, status.ErrorMessage); } wnd_waiter.Close(); updateWorker = null; }, updateWorkerCT); wnd_waiter.ShowDialog(wnd); if (updateWorker != null && !updateWorker.IsCompleted) { updateWorkerCTS.Cancel(); try { updateWorker.Wait(); } catch {} updateWorker = null; } }
public void PrepareOCR(string file) { if (updateWorker != null && !updateWorker.IsCompleted) { updateWorkerCTS.Cancel(); try { updateWorker.Wait(); } catch {} updateWorker = null; } int pageFirst = 0, pageLast = 0; if (Path.GetExtension(file).ToLower() == ".pdf") { int cnt = (int)OCR.GetPDFPagesCount(file); if (cnt > 1) { var wnd_range = new PageRangeWindow(cnt); wnd_range.ShowDialog(wnd); if (wnd_range.Cancelled) { return; } pageFirst = wnd_range.PageFirst; pageLast = wnd_range.PageLast; } } LoadingWindow wnd_waiter = new LoadingWindow("Rozpoznawanie"); updateWorkerCTS = new CancellationTokenSource(); updateWorkerCT = updateWorkerCTS.Token; updateWorker = Task.Factory.StartNew(() => { wnd_waiter.SetStatus("Przygotowywanie"); OCRStatus status = new OCRStatus(); status.OCRCancellationToken = updateWorkerCT; status.PageCurrentChanged += (sender, e) => { wnd_waiter.SetStatus($"Rozpoznawanie strony {status.PageCurrent} z {status.PageCount}"); int prc = (int)(100 * status.PageCurrent / status.PageCount); if (prc > 100) { prc = 100; } wnd_waiter.SetPercentage(prc); }; OCRResult result = OCR.GetTextFromFileAsync(file, this.Engine, status, pageFirst, pageLast).Result; if (result != null) { wnd.SetResult(file, result); } else if (status.Error != OctarineError.CancellationRequested) { wnd.ShowError(status.Error, status.ErrorMessage); } wnd_waiter.Close(); updateWorker = null; }, updateWorkerCT); wnd_waiter.ShowDialog(wnd); if (updateWorker != null && !updateWorker.IsCompleted) { updateWorkerCTS.Cancel(); try { updateWorker.Wait(); } catch {} updateWorker = null; } }
public static async Task <OCRResult> GetTextFromStreamsAsync(Stream[] streams, OctarineEngine.IEngine engine, OCRStatus status) { var result = new OCRResult(); try { foreach (Stream stream in streams) { (OCRPage page, OctarineError error, string errorMessage) = await engine.GetTextFromStreamAsync(stream); if (page == null) { status.Error = error; status.ErrorMessage = errorMessage; return(null); } if (status.OCRCancellationToken.IsCancellationRequested) { status.Error = OctarineError.CancellationRequested; return(null); } result.AddPage(page); } return(result); } catch (Exception ex) { status.Error = OctarineError.WrongFileFormat; status.ErrorMessage = ex.Message; return(null); } }
public static async Task <OCRResult> GetTextFromFileAsync(string filePath, OctarineEngine.IEngine engine, OCRStatus status, int pageFirst = 0, int pageLast = 0) { var result = new OCRResult(filePath); try { var file = await StorageFile.GetFileFromPathAsync(filePath); if (Path.GetExtension(filePath).ToLower() == ".pdf") { var pdfDoc = await PdfDocument.LoadFromFileAsync(file); if (pageFirst <= 0) { pageFirst = 1; } if (pageLast <= 0 || pageLast > pdfDoc.PageCount) { pageLast = (int)pdfDoc.PageCount; } uint pagesToRecognize = (uint)(pageLast - pageFirst + 1); status.PageCount = pagesToRecognize; Stream[] streams = new Stream[pagesToRecognize]; int pagesRunning = 0; int numThreads = Environment.ProcessorCount; for (int p = pageFirst - 1; p < pageLast; p++) { while (pagesRunning > numThreads) { await Task.Delay(10); } using (PdfPage pdfPage = pdfDoc.GetPage((uint)p)) { int i = (int)p - pageFirst + 1; _ = Task.Run(async() => { ++pagesRunning; var stream = new InMemoryRandomAccessStream(); await pdfPage.RenderToStreamAsync(stream); streams[i] = stream.AsStream(); --pagesRunning; }); } await Task.Delay(10); } while (pagesRunning > 0) { await Task.Delay(200); } OCRPage[] pages = new OCRPage[status.PageCount]; for (int i = pageFirst - 1; i < pageLast; i++) { if (status.Error != OctarineError.Success) { break; } while (status.ActiveWorkers > numThreads) { await Task.Delay(200); } var st = new InternalOCRProcessorStatus(engine, status, streams[i - pageFirst + 1], i - pageFirst + 1, pages); _ = Task.Run(() => ProcessWithPageAsync(st)); await Task.Delay(250); } if (status.Error != OctarineError.Success) { return(null); } while (status.ActiveWorkers > 0) { await Task.Delay(200); } if (status.Error != OctarineError.Success) { return(null); } foreach (OCRPage page in pages) { result.AddPage(page); } return(result); } else { var stream = await file.OpenAsync(FileAccessMode.Read); (OCRPage page, OctarineError error, string errorMessage) = await engine.GetTextFromStreamAsync(stream.AsStream()); if (page == null) { status.Error = error; status.ErrorMessage = errorMessage; return(null); } if (status.OCRCancellationToken.IsCancellationRequested) { status.Error = OctarineError.CancellationRequested; return(null); } result.AddPage(page); return(result); } } catch (Exception ex) { status.Error = OctarineError.WrongFileFormat; status.ErrorMessage = ex.Message; return(null); } }