RunFuzzyMatchSequential(
                string[] wordsLookup, 
                IEnumerable<string> files)
        {
            // Sequential workflow -> how can we parallelize this work?
            // The collection 'matchSet' cannot be shared among threads  

            var matchSet = new HashSet<WordDistanceStruct>();

            foreach (var file in files)
            {
                string readText = File.ReadAllText(file);

                var words = readText.Split(punctuation.Value)
                    .Where(w => !IgnoreWords.Contains(w))
                    .AsSet();

                foreach (var wl in wordsLookup)
                {
                    var bestMatch = JaroWinklerModule.bestMatch(words, wl, threshold);
                    matchSet.AddRange(bestMatch);
                }
            }

            return PrintSummary(matchSet);
        }
Exemplo n.º 2
0
        public static (bool Matched, string Word) CpuMatch(string word)
        {
            var match = JaroWinklerModule.bestMatch(WordsToSearch, word, 0.9);

            return(match.Any() ? (true, match.First().Match) : (false, null));
        }