Example #1
0
        public List <OcrResults> Process(TextRecognitionSettings settings)
        {
            var results = new List <OcrResults>();

            foreach (var page in _pdfPages)
            {
                results.AddRange(page.Process(settings));
            }

            return(results);
        }
Example #2
0
        public List <OcrResults> Process(TextRecognitionSettings settings)
        {
            var results = new List <OcrResults>();

            if (settings.IsBinarizationEnable)
            {
                Binarize();
            }

            if (settings.IsNoiseRemovalEnable)
            {
                RemoveNoise();
            }

            if (settings.IsContrastAdjusmentEnable)
            {
                AdjustContrast();
            }

            if (settings.IsRotationEnable)
            {
                var angle = PredictTurningAngle();
                if (angle != 0)
                {
                    Rotate(angle);
                }
            }

            OcrImage(OcrEngines.Tesseract);

            if (settings.IsRotationEnable && !IsOCRedTextValid())
            {
                Rotate(180);
                OcrImage(OcrEngines.Tesseract);
            }

            var paragraphs = TextHelper.SplitTextIntoParagraphs(_ocredText);

            for (var i = 0; i < paragraphs.Length; i++)
            {
                if (settings.IsWordsCorrectionEnable)
                {
                    paragraphs[i] = CorrectOCRedText(@"Resources\EnglishDictionary.json", paragraphs[i]);
                }

                string[] typeKeyWords = new string[0];
                if (settings.IsTypeRecognitionEnable)
                {
                    var type = PredictType();
                    typeKeyWords = GetSpecialTypeKeyWords(type);
                }
                var ocrResult = new OcrResults {
                    FileName = _inputFileName
                };
                if (settings.KeyWords.Length > 0)
                {
                    ocrResult.KeyWords = WordsFinder.GetCountedKeyWordsFromText(paragraphs[i], settings.KeyWords);
                }

                if (typeKeyWords.Length > 0)
                {
                    var foundTypeKeyWords = WordsFinder.GetCountedKeyWordsFromText(paragraphs[i], typeKeyWords);
                    if (string.IsNullOrEmpty(ocrResult.KeyWords))
                    {
                        ocrResult.KeyWords = foundTypeKeyWords;
                    }
                    else
                    {
                        ocrResult.KeyWords += foundTypeKeyWords;
                    }
                }

                if (settings.KeyWords.Length > 0 && string.IsNullOrEmpty(ocrResult.KeyWords))
                {
                    continue;
                }
                else
                {
                    ocrResult.OcredText = paragraphs[i];
                }

                results.Add(ocrResult);
            }

            return(results);
        }