public List <OcrResults> Process(TextRecognitionSettings settings) { var results = new List <OcrResults>(); foreach (var page in _pdfPages) { results.AddRange(page.Process(settings)); } return(results); }
public List <OcrResults> Process(TextRecognitionSettings settings) { var results = new List <OcrResults>(); if (settings.IsBinarizationEnable) { Binarize(); } if (settings.IsNoiseRemovalEnable) { RemoveNoise(); } if (settings.IsContrastAdjusmentEnable) { AdjustContrast(); } if (settings.IsRotationEnable) { var angle = PredictTurningAngle(); if (angle != 0) { Rotate(angle); } } OcrImage(OcrEngines.Tesseract); if (settings.IsRotationEnable && !IsOCRedTextValid()) { Rotate(180); OcrImage(OcrEngines.Tesseract); } var paragraphs = TextHelper.SplitTextIntoParagraphs(_ocredText); for (var i = 0; i < paragraphs.Length; i++) { if (settings.IsWordsCorrectionEnable) { paragraphs[i] = CorrectOCRedText(@"Resources\EnglishDictionary.json", paragraphs[i]); } string[] typeKeyWords = new string[0]; if (settings.IsTypeRecognitionEnable) { var type = PredictType(); typeKeyWords = GetSpecialTypeKeyWords(type); } var ocrResult = new OcrResults { FileName = _inputFileName }; if (settings.KeyWords.Length > 0) { ocrResult.KeyWords = WordsFinder.GetCountedKeyWordsFromText(paragraphs[i], settings.KeyWords); } if (typeKeyWords.Length > 0) { var foundTypeKeyWords = WordsFinder.GetCountedKeyWordsFromText(paragraphs[i], typeKeyWords); if (string.IsNullOrEmpty(ocrResult.KeyWords)) { ocrResult.KeyWords = foundTypeKeyWords; } else { ocrResult.KeyWords += foundTypeKeyWords; } } if (settings.KeyWords.Length > 0 && string.IsNullOrEmpty(ocrResult.KeyWords)) { continue; } else { ocrResult.OcredText = paragraphs[i]; } results.Add(ocrResult); } return(results); }