RunFuzzyMatchSequential( string[] wordsLookup, IEnumerable<string> files) { // Sequential workflow -> how can we parallelize this work? // The collection 'matchSet' cannot be shared among threads var matchSet = new HashSet<WordDistanceStruct>(); foreach (var file in files) { string readText = File.ReadAllText(file); var words = readText.Split(punctuation.Value) .Where(w => !IgnoreWords.Contains(w)) .AsSet(); foreach (var wl in wordsLookup) { var bestMatch = JaroWinklerModule.bestMatch(words, wl, threshold); matchSet.AddRange(bestMatch); } } return PrintSummary(matchSet); }
public static (bool Matched, string Word) CpuMatch(string word) { var match = JaroWinklerModule.bestMatch(WordsToSearch, word, 0.9); return(match.Any() ? (true, match.First().Match) : (false, null)); }