/// <summary> /// Reloads the OCR patterns and apply them to the given word entries /// </summary> /// <param name="entries">The word entries to retest the OCR pattern check</param> public void ReloadOCRPatterns(IEnumerable <WordEntry> entries) { List <string> warnings; LoadOCRPatterns(out warnings); foreach (var we in entries) { if (we.IsUnknownWord) { OCRErrorTest.OCRResult ocrResult; OCRErrorTest.Test(we, ocrPatterns, fullDictionary, out ocrResult); } } }
/// <summary> /// Fills the suggestion of a word entry /// </summary> /// <param name="we">The word entry to fill the suggestion for</param> /// <param name="wordEntries">A collection of all the word entries</param> /// <param name="ocrPatternsAppliedCount">A collection that keeps track of which ocr patterns have been applied and how many times</param> public void FillSuggestion(WordEntry we, Dictionary <string, WordEntry> wordEntries, Dictionary <string, Dictionary <string, int> > ocrPatternsAppliedCount, HashSet <string> enabledTests) { if (!we.IsUnknownWord) { // it's a word that is known, ignore and don't fill a suggestion we.Ignore = true; } else { // build the dictionary suggestions // note: this takes a long time! if (we.IsUnknownWord && !we.Ignore && string.IsNullOrEmpty(we.Suggestion)) { we.DictionarySuggesions = fullDictionary .Where(s => (char.ToLower(s[0]) == char.ToLower(we.Text[0]) || s.Last() == char.ToLower(we.Text.Last())) && Math.Abs(s.Length - we.Text.Length) <= 2) // only take the words that have a max 2 char length deviation .OrderBy(s => s.GetDistance(we.Text.ToLower())).Take(10).ToArray(); } // test for numbers if (enabledTests.Contains(typeof(NumberTest).Name)) { NumberTest.Test(we); } // test for OCR errors if (enabledTests.Contains(typeof(OCRErrorTest).Name)) { OCRErrorTest.OCRResult result; OCRErrorTest.Test(we, ocrPatterns, fullDictionary, out result); // if the OCR pattern was succesfully applied, append it to the dictionary that keeps track of how many times a pattern is applied if (result != null && result.IsFixed) { Dictionary <string, int> patternMatches; // make sure to lock the dictionary, as this is executed in parallel lock (ocrPatternsAppliedCount) { // add the pattern if it's not present if (!ocrPatternsAppliedCount.TryGetValue(result.PatternSource, out patternMatches)) { ocrPatternsAppliedCount[result.PatternSource] = patternMatches = new Dictionary <string, int>(); } } // lock and increase the count of the pattern or add it if it wasn't present yet lock (patternMatches) { int ocrCount; if (patternMatches.TryGetValue(result.PatternTarget, out ocrCount)) { patternMatches[result.PatternTarget] = ocrCount + 1; } else { patternMatches[result.PatternTarget] = 1; } } } } // test for name if (enabledTests.Contains(typeof(NameTest).Name)) { NameTest.Test(we); } // test for suffixes if (enabledTests.Contains(typeof(SuffixTest).Name)) { SuffixTest.Test(we, fullDictionary); } // test for unnecessary hyphens if (enabledTests.Contains(typeof(UnnecessaryHyphenTest).Name)) { UnnecessaryHyphenTest.Test(we, fullDictionary); } // test for unnecessary diacritics if (enabledTests.Contains(typeof(UnnecessaryDiacriticsTest).Name)) { UnnecessaryDiacriticsTest.Test(we, fullDictionary); } // test for high probability if (enabledTests.Contains(typeof(HighProbabilityTest).Name)) { HighProbabilityTest.Test(we, wordEntries); } // test for probability on neighbours if (enabledTests.Contains(typeof(HighProbabilityOnNeighboursTest).Name)) { HighProbabilityOnNeighboursTest.Test(we, false); } // test for missing spaces if (enabledTests.Contains(typeof(MissingSpacesTest).Name)) { MissingSpacesTest.Test(we, wordEntries, fullDictionary); } } }