예제 #1
0
        public void LoadMainResource(string rpLanguage)
        {
            if (!InstalledLanguages.Any(r => r.Directory == rpLanguage))
            {
                Preference.Instance.Language.Value = GetDefaultLanguage().Directory;
            }

            LoadMainResourceCore(rpLanguage);
        }
예제 #2
0
 public bool CanProcess(string langCode)
 {
     if (string.IsNullOrEmpty(langCode) || !IsInstalled || !IsSupported)
     {
         return(false);
     }
     // Support multiple specified languages (e.g. "eng+fra")
     return(langCode.Split('+').All(code => InstalledLanguages.Any(x => x.Code == code)));
 }
예제 #3
0
        public override async Task InitializeAsync()
        {
            await base.InitializeAsync();

            InstalledLanguages.SyncWith(
                InteractiveInkServices.LanguageService.Languages.Where(x =>
                                                                       LanguageService.IsInstalled(x) || LanguageService.IsInstalling(x)));
            InstalledLanguages.SortBy(x => x.Id);
            InteractiveInkServices.LanguageService.Downloading  += OnDownloading;
            InteractiveInkServices.LanguageService.Initialized  += OnInitialized;
            InteractiveInkServices.LanguageService.Initializing += OnInitializing;
        }
예제 #4
0
        public OcrResult ProcessImage(string imagePath, OcrParams ocrParams, CancellationToken cancelToken)
        {
            string tempHocrFilePath        = Path.Combine(Paths.Temp, Path.GetRandomFileName());
            string tempHocrFilePathWithExt = tempHocrFilePath + TesseractHocrExtension;

            try
            {
                var runInfo   = TesseractRunInfo(ocrParams);
                var startInfo = new ProcessStartInfo
                {
                    FileName               = Path.Combine(TesseractBasePath, TesseractExePath),
                    Arguments              = $"\"{imagePath}\" \"{tempHocrFilePath}\" -l {ocrParams.LanguageCode} {runInfo.Arguments} hocr",
                    UseShellExecute        = false,
                    CreateNoWindow         = true,
                    RedirectStandardOutput = true,
                    RedirectStandardError  = true
                };
                if (runInfo.PrefixPath != null)
                {
                    startInfo.EnvironmentVariables["TESSDATA_PREFIX"] = Path.Combine(TesseractBasePath, runInfo.PrefixPath);
                }
                if (runInfo.DataPath != null)
                {
                    var tessdata = new DirectoryInfo(Path.Combine(TesseractBasePath, runInfo.DataPath));
                    EnsureHocrConfigExists(tessdata);
                }
                var tesseractProcess = Process.Start(startInfo);
                if (tesseractProcess == null)
                {
                    // Couldn't start tesseract for some reason
                    Log.Error("Couldn't start OCR process.");
                    return(null);
                }
                var timeout = (int)(appConfigManager.Config.OcrTimeoutInSeconds * 1000);
                if (timeout == 0)
                {
                    timeout = DEFAULT_TIMEOUT;
                }
                var stopwatch = Stopwatch.StartNew();
                while (!tesseractProcess.WaitForExit(CHECK_INTERVAL))
                {
                    if (stopwatch.ElapsedMilliseconds >= timeout || cancelToken.IsCancellationRequested)
                    {
                        if (stopwatch.ElapsedMilliseconds >= timeout)
                        {
                            Log.Error("OCR process timed out.");
                        }
                        try
                        {
                            tesseractProcess.Kill();
                            // Wait a bit to give the process time to release its file handles
                            Thread.Sleep(200);
                        }
                        catch (Exception e)
                        {
                            Log.ErrorException("Error killing OCR process", e);
                        }
                        return(null);
                    }
                }
#if DEBUG && DEBUGTESS
                Debug.WriteLine("Tesseract stopwatch: " + stopwatch.ElapsedMilliseconds);
                var output = tesseractProcess.StandardOutput.ReadToEnd();
                if (output.Length > 0)
                {
                    Log.Error("Tesseract stdout: {0}", output);
                }
                output = tesseractProcess.StandardError.ReadToEnd();
                if (output.Length > 0)
                {
                    Log.Error("Tesseract stderr: {0}", output);
                }
#endif
                XDocument hocrDocument = XDocument.Load(tempHocrFilePathWithExt);
                return(new OcrResult
                {
                    PageBounds = hocrDocument.Descendants()
                                 .Where(x => x.Attributes("class").Any(y => y.Value == "ocr_page"))
                                 .Select(x => GetBounds(x.Attribute("title")))
                                 .First(),
                    Elements = hocrDocument.Descendants()
                               .Where(x => x.Attributes("class").Any(y => y.Value == "ocrx_word"))
                               .Select(x => new OcrResultElement {
                        Text = x.Value, Bounds = GetBounds(x.Attribute("title"))
                    }),
                    RightToLeft = InstalledLanguages.Where(x => x.Code == ocrParams.LanguageCode).Select(x => x.RTL).FirstOrDefault()
                });
            }
            catch (Exception e)
            {
                Log.ErrorException("Error running OCR", e);
                return(null);
            }
            finally
            {
                try
                {
                    File.Delete(tempHocrFilePathWithExt);
                }
                catch (Exception e)
                {
                    Log.ErrorException("Error cleaning up OCR temp files", e);
                }
            }
        }