Beispiel #1
0
        /// <summary>
        /// Reloads the OCR patterns and apply them to the given word entries
        /// </summary>
        /// <param name="entries">The word entries to retest the OCR pattern check</param>
        public void ReloadOCRPatterns(IEnumerable <WordEntry> entries)
        {
            List <string> warnings;

            LoadOCRPatterns(out warnings);

            foreach (var we in entries)
            {
                if (we.IsUnknownWord)
                {
                    OCRErrorTest.OCRResult ocrResult;
                    OCRErrorTest.Test(we, ocrPatterns, fullDictionary, out ocrResult);
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// Fills the suggestion of a word entry
        /// </summary>
        /// <param name="we">The word entry to fill the suggestion for</param>
        /// <param name="wordEntries">A collection of all the word entries</param>
        /// <param name="ocrPatternsAppliedCount">A collection that keeps track of which ocr patterns have been applied and how many times</param>
        public void FillSuggestion(WordEntry we, Dictionary <string, WordEntry> wordEntries, Dictionary <string, Dictionary <string, int> > ocrPatternsAppliedCount, HashSet <string> enabledTests)
        {
            if (!we.IsUnknownWord)
            {
                // it's a word that is known, ignore and don't fill a suggestion

                we.Ignore = true;
            }
            else
            {
                // build the dictionary suggestions
                // note: this takes a long time!
                if (we.IsUnknownWord && !we.Ignore && string.IsNullOrEmpty(we.Suggestion))
                {
                    we.DictionarySuggesions = fullDictionary
                                              .Where(s => (char.ToLower(s[0]) == char.ToLower(we.Text[0]) || s.Last() == char.ToLower(we.Text.Last())) && Math.Abs(s.Length - we.Text.Length) <= 2)       // only take the words that have a max 2 char length deviation
                                              .OrderBy(s => s.GetDistance(we.Text.ToLower())).Take(10).ToArray();
                }


                // test for numbers
                if (enabledTests.Contains(typeof(NumberTest).Name))
                {
                    NumberTest.Test(we);
                }

                // test for OCR errors
                if (enabledTests.Contains(typeof(OCRErrorTest).Name))
                {
                    OCRErrorTest.OCRResult result;
                    OCRErrorTest.Test(we, ocrPatterns, fullDictionary, out result);

                    // if the OCR pattern was succesfully applied, append it to the dictionary that keeps track of how many times a pattern is applied
                    if (result != null && result.IsFixed)
                    {
                        Dictionary <string, int> patternMatches;
                        // make sure to lock the dictionary, as this is executed in parallel
                        lock (ocrPatternsAppliedCount)
                        {
                            // add the pattern if it's not present
                            if (!ocrPatternsAppliedCount.TryGetValue(result.PatternSource, out patternMatches))
                            {
                                ocrPatternsAppliedCount[result.PatternSource] = patternMatches = new Dictionary <string, int>();
                            }
                        }

                        // lock and increase the count of the pattern or add it if it wasn't present yet
                        lock (patternMatches)
                        {
                            int ocrCount;
                            if (patternMatches.TryGetValue(result.PatternTarget, out ocrCount))
                            {
                                patternMatches[result.PatternTarget] = ocrCount + 1;
                            }
                            else
                            {
                                patternMatches[result.PatternTarget] = 1;
                            }
                        }
                    }
                }

                // test for name
                if (enabledTests.Contains(typeof(NameTest).Name))
                {
                    NameTest.Test(we);
                }

                // test for suffixes
                if (enabledTests.Contains(typeof(SuffixTest).Name))
                {
                    SuffixTest.Test(we, fullDictionary);
                }

                // test for unnecessary hyphens
                if (enabledTests.Contains(typeof(UnnecessaryHyphenTest).Name))
                {
                    UnnecessaryHyphenTest.Test(we, fullDictionary);
                }

                // test for unnecessary diacritics
                if (enabledTests.Contains(typeof(UnnecessaryDiacriticsTest).Name))
                {
                    UnnecessaryDiacriticsTest.Test(we, fullDictionary);
                }

                // test for high probability
                if (enabledTests.Contains(typeof(HighProbabilityTest).Name))
                {
                    HighProbabilityTest.Test(we, wordEntries);
                }

                // test for probability on neighbours
                if (enabledTests.Contains(typeof(HighProbabilityOnNeighboursTest).Name))
                {
                    HighProbabilityOnNeighboursTest.Test(we, false);
                }

                // test for missing spaces
                if (enabledTests.Contains(typeof(MissingSpacesTest).Name))
                {
                    MissingSpacesTest.Test(we, wordEntries, fullDictionary);
                }
            }
        }