Example #1
0
 public OcrRequestParams(ScannedImage.Snapshot snapshot, IOcrEngine ocrEngine, OcrParams ocrParams)
 {
     ScannedImage   = snapshot.Source;
     TransformState = snapshot.TransformList.Count == 0 ? -1 : snapshot.TransformState;
     Engine         = ocrEngine;
     OcrParams      = ocrParams;
 }
Example #2
0
        protected override RunInfo TesseractRunInfo(OcrParams ocrParams)
        {
            OcrMode mode   = ocrParams.Mode;
            string  folder = mode == OcrMode.Fast || mode == OcrMode.Default ? "fast" : "best";

            if (ocrParams.LanguageCode.Split('+').All(code => !File.Exists(Path.Combine(TesseractBasePath, folder, $"{code.ToLowerInvariant()}.traineddata"))))
            {
                // Use the other source if the selected one doesn't exist
                folder = folder == "fast" ? "best" : "fast";
                mode   = folder == "fast" ? OcrMode.Fast : OcrMode.Best;
            }

            return(new RunInfo
            {
                Arguments = mode == OcrMode.Best ? "--oem 1" : mode == OcrMode.Legacy ? "--oem 0" : "",
                DataPath = folder,
                PrefixPath = folder
            });
        }
Example #3
0
        public async Task <OcrResult> QueueForeground(IOcrEngine ocrEngine, ScannedImage.Snapshot snapshot, string tempImageFilePath, OcrParams ocrParams, CancellationToken cancelToken)
        {
            OcrRequest req;

            lock (this)
            {
                ocrEngine = ocrEngine ?? ocrManager.ActiveEngine ?? throw new ArgumentException("No OCR engine available");
                ocrParams = ocrParams ?? ocrManager.DefaultParams;

                var reqParams = new OcrRequestParams(snapshot, ocrEngine, ocrParams);
                req = requestCache.GetOrSet(reqParams, () => new OcrRequest(reqParams));
                // Fast path for cached results
                if (req.Result != null)
                {
                    SafeDelete(tempImageFilePath);
                    return(req.Result);
                }
                // Manage ownership of the provided temp file
                if (req.TempImageFilePath == null)
                {
                    req.TempImageFilePath = tempImageFilePath;
                }
                else
                {
                    SafeDelete(tempImageFilePath);
                }
                // Increment the reference count
                req.ForegroundCount += 1;
                queueWaitHandle.Release();
            }
            // If no worker threads are running, start them
            EnsureWorkerThreads();
            // Wait for completion or cancellation
            await Task.Factory.StartNew(() =>
            {
                try
                {
                    WaitHandle.WaitAny(new[] { req.WaitHandle, cancelToken.WaitHandle });
                }
                catch (Exception e)
                {
                    Log.ErrorException("Error in OcrRequestQueue.QueueForeground response task", e);
                }
            }, TaskCreationOptions.LongRunning);

            lock (this)
            {
                // Decrement the reference count
                req.ForegroundCount -= 1;
                // If all requestors have cancelled and there's no result to cache, delete the request
                DestroyRequest(req);
            }
            // If no requests are pending, stop the worker threads
            EnsureWorkerThreads();
            // May return null if cancelled
            return(req.Result);
        }
Example #4
0
        public bool HasCachedResult(IOcrEngine ocrEngine, ScannedImage.Snapshot snapshot, OcrParams ocrParams)
        {
            ocrEngine = ocrEngine ?? ocrManager.ActiveEngine ?? throw new ArgumentException("No OCR engine available");
            ocrParams = ocrParams ?? ocrManager.DefaultParams;
            var reqParams = new OcrRequestParams(snapshot, ocrEngine, ocrParams);

            lock (this)
            {
                return(requestCache.ContainsKey(reqParams) && requestCache[reqParams].Result != null);
            }
        }
Example #5
0
        public void QueueBackground(ScannedImage.Snapshot snapshot, string tempImageFilePath, OcrParams ocrParams)
        {
            OcrRequest req;
            CancellationTokenSource cts = new CancellationTokenSource();

            lock (this)
            {
                var ocrEngine = ocrManager.ActiveEngine;
                if (ocrEngine == null)
                {
                    return;
                }
                ocrParams = ocrParams ?? ocrManager.DefaultParams;

                var reqParams = new OcrRequestParams(snapshot, ocrEngine, ocrParams);
                req = requestCache.GetOrSet(reqParams, () => new OcrRequest(reqParams));
                // Fast path for cached results
                if (req.Result != null)
                {
                    return;
                }
                // Manage ownership of the provided temp file
                if (req.TempImageFilePath == null)
                {
                    req.TempImageFilePath = tempImageFilePath;
                }
                else
                {
                    SafeDelete(tempImageFilePath);
                }
                // Increment the reference count
                req.BackgroundCount += 1;
                snapshot.Source.ThumbnailInvalidated += (sender, args) => cts.Cancel();
                snapshot.Source.FullyDisposed        += (sender, args) => cts.Cancel();
                queueWaitHandle.Release();
            }
            // If no worker threads are running, start them
            EnsureWorkerThreads();
            var op = StartingOne();

            Task.Factory.StartNew(() =>
            {
                try
                {
                    WaitHandle.WaitAny(new[] { req.WaitHandle, cts.Token.WaitHandle, op.CancelToken.WaitHandle });
                    lock (this)
                    {
                        // Decrement the reference count
                        req.BackgroundCount -= 1;
                        // If all requestors have cancelled and there's no result to cache, delete the request
                        DestroyRequest(req);
                    }

                    FinishedOne();
                    // If no requests are pending, stop the worker threads
                    EnsureWorkerThreads();
                }
                catch (Exception e)
                {
                    Log.ErrorException("Error in OcrRequestQueue.QueueBackground response task", e);
                }
            }, TaskCreationOptions.LongRunning);
        }
Example #6
0
        public OcrResult ProcessImage(string imagePath, OcrParams ocrParams, CancellationToken cancelToken)
        {
            string tempHocrFilePath        = Path.Combine(Paths.Temp, Path.GetRandomFileName());
            string tempHocrFilePathWithExt = tempHocrFilePath + TesseractHocrExtension;

            try
            {
                var runInfo   = TesseractRunInfo(ocrParams);
                var startInfo = new ProcessStartInfo
                {
                    FileName               = Path.Combine(TesseractBasePath, TesseractExePath),
                    Arguments              = $"\"{imagePath}\" \"{tempHocrFilePath}\" -l {ocrParams.LanguageCode} {runInfo.Arguments} hocr",
                    UseShellExecute        = false,
                    CreateNoWindow         = true,
                    RedirectStandardOutput = true,
                    RedirectStandardError  = true
                };
                if (runInfo.PrefixPath != null)
                {
                    startInfo.EnvironmentVariables["TESSDATA_PREFIX"] = Path.Combine(TesseractBasePath, runInfo.PrefixPath);
                }
                if (runInfo.DataPath != null)
                {
                    var tessdata = new DirectoryInfo(Path.Combine(TesseractBasePath, runInfo.DataPath));
                    EnsureHocrConfigExists(tessdata);
                }
                var tesseractProcess = Process.Start(startInfo);
                if (tesseractProcess == null)
                {
                    // Couldn't start tesseract for some reason
                    Log.Error("Couldn't start OCR process.");
                    return(null);
                }
                var timeout = (int)(appConfigManager.Config.OcrTimeoutInSeconds * 1000);
                if (timeout == 0)
                {
                    timeout = DEFAULT_TIMEOUT;
                }
                var stopwatch = Stopwatch.StartNew();
                while (!tesseractProcess.WaitForExit(CHECK_INTERVAL))
                {
                    if (stopwatch.ElapsedMilliseconds >= timeout || cancelToken.IsCancellationRequested)
                    {
                        if (stopwatch.ElapsedMilliseconds >= timeout)
                        {
                            Log.Error("OCR process timed out.");
                        }
                        try
                        {
                            tesseractProcess.Kill();
                            // Wait a bit to give the process time to release its file handles
                            Thread.Sleep(200);
                        }
                        catch (Exception e)
                        {
                            Log.ErrorException("Error killing OCR process", e);
                        }
                        return(null);
                    }
                }
#if DEBUG && DEBUGTESS
                Debug.WriteLine("Tesseract stopwatch: " + stopwatch.ElapsedMilliseconds);
                var output = tesseractProcess.StandardOutput.ReadToEnd();
                if (output.Length > 0)
                {
                    Log.Error("Tesseract stdout: {0}", output);
                }
                output = tesseractProcess.StandardError.ReadToEnd();
                if (output.Length > 0)
                {
                    Log.Error("Tesseract stderr: {0}", output);
                }
#endif
                XDocument hocrDocument = XDocument.Load(tempHocrFilePathWithExt);
                return(new OcrResult
                {
                    PageBounds = hocrDocument.Descendants()
                                 .Where(x => x.Attributes("class").Any(y => y.Value == "ocr_page"))
                                 .Select(x => GetBounds(x.Attribute("title")))
                                 .First(),
                    Elements = hocrDocument.Descendants()
                               .Where(x => x.Attributes("class").Any(y => y.Value == "ocrx_word"))
                               .Select(x => new OcrResultElement {
                        Text = x.Value, Bounds = GetBounds(x.Attribute("title"))
                    }),
                    RightToLeft = InstalledLanguages.Where(x => x.Code == ocrParams.LanguageCode).Select(x => x.RTL).FirstOrDefault()
                });
            }
            catch (Exception e)
            {
                Log.ErrorException("Error running OCR", e);
                return(null);
            }
            finally
            {
                try
                {
                    File.Delete(tempHocrFilePathWithExt);
                }
                catch (Exception e)
                {
                    Log.ErrorException("Error cleaning up OCR temp files", e);
                }
            }
        }
Example #7
0
 protected virtual RunInfo TesseractRunInfo(OcrParams ocrParams) => new RunInfo
 {
     Arguments  = "",
     DataPath   = "tessdata",
     PrefixPath = ""
 };
Example #8
0
 protected override RunInfo TesseractRunInfo(OcrParams ocrParams) => new RunInfo
 {
     Arguments  = "",
     DataPath   = null,
     PrefixPath = null
 };