Example #1
0
        private static void ProcessWithPage(object st)
        {
            OCRPage p = ProcessWithPageAsync((InternalOCRProcessorStatus)st).Result;

            return;
        }
Example #2
0
 public void AddPage(OCRPage page)
 {
     _Pages.Add(page);
 }
Example #3
0
        public static async Task <OCRResult> GetTextFromFileAsync(string filePath, OctarineEngine.IEngine engine, OCRStatus status, int pageFirst = 0, int pageLast = 0)
        {
            var result = new OCRResult(filePath);

            try {
                var file = await StorageFile.GetFileFromPathAsync(filePath);

                if (Path.GetExtension(filePath).ToLower() == ".pdf")
                {
                    var pdfDoc = await PdfDocument.LoadFromFileAsync(file);

                    if (pageFirst <= 0)
                    {
                        pageFirst = 1;
                    }
                    if (pageLast <= 0 || pageLast > pdfDoc.PageCount)
                    {
                        pageLast = (int)pdfDoc.PageCount;
                    }
                    uint pagesToRecognize = (uint)(pageLast - pageFirst + 1);
                    status.PageCount = pagesToRecognize;

                    Stream[] streams      = new Stream[pagesToRecognize];
                    int      pagesRunning = 0;
                    int      numThreads   = Environment.ProcessorCount;
                    for (int p = pageFirst - 1; p < pageLast; p++)
                    {
                        while (pagesRunning > numThreads)
                        {
                            await Task.Delay(10);
                        }
                        using (PdfPage pdfPage = pdfDoc.GetPage((uint)p)) {
                            int i = (int)p - pageFirst + 1;
                            _ = Task.Run(async() => {
                                ++pagesRunning;
                                var stream = new InMemoryRandomAccessStream();
                                await pdfPage.RenderToStreamAsync(stream);
                                streams[i] = stream.AsStream();
                                --pagesRunning;
                            });
                        }
                        await Task.Delay(10);
                    }
                    while (pagesRunning > 0)
                    {
                        await Task.Delay(200);
                    }
                    OCRPage[] pages = new OCRPage[status.PageCount];
                    for (int i = pageFirst - 1; i < pageLast; i++)
                    {
                        if (status.Error != OctarineError.Success)
                        {
                            break;
                        }
                        while (status.ActiveWorkers > numThreads)
                        {
                            await Task.Delay(200);
                        }
                        var st = new InternalOCRProcessorStatus(engine, status, streams[i - pageFirst + 1], i - pageFirst + 1, pages);
                        _ = Task.Run(() => ProcessWithPageAsync(st));
                        await Task.Delay(250);
                    }
                    if (status.Error != OctarineError.Success)
                    {
                        return(null);
                    }
                    while (status.ActiveWorkers > 0)
                    {
                        await Task.Delay(200);
                    }
                    if (status.Error != OctarineError.Success)
                    {
                        return(null);
                    }
                    foreach (OCRPage page in pages)
                    {
                        result.AddPage(page);
                    }
                    return(result);
                }
                else
                {
                    var stream = await file.OpenAsync(FileAccessMode.Read);

                    (OCRPage page, OctarineError error, string errorMessage) = await engine.GetTextFromStreamAsync(stream.AsStream());

                    if (page == null)
                    {
                        status.Error        = error;
                        status.ErrorMessage = errorMessage;
                        return(null);
                    }
                    if (status.OCRCancellationToken.IsCancellationRequested)
                    {
                        status.Error = OctarineError.CancellationRequested;
                        return(null);
                    }
                    result.AddPage(page);
                    return(result);
                }
            } catch (Exception ex) {
                status.Error        = OctarineError.WrongFileFormat;
                status.ErrorMessage = ex.Message;
                return(null);
            }
        }