public static List <string> Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; //Stopwatch stopWatch = new Stopwatch(); //stopWatch.Start(); //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest; suggestions = symSpell.Lookup(input, verbosity); //stopWatch.Stop(); /* Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms"); */ //display term and frequency List <string> lastOf = new List <string>(); foreach (var suggestion in suggestions) { var _ = suggestion.term; _.ToList(); lastOf.Add(_); } if (verbosity != SymSpell.Verbosity.Top) { Console.WriteLine(suggestions.Count.ToString() + " suggestions"); } return(lastOf); }
public static void Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest; suggestions = symSpell.Lookup(input, verbosity); stopWatch.Stop(); Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms"); //display term and frequency foreach (var suggestion in suggestions) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } if (verbosity != SymSpell.Verbosity.Top) { Console.WriteLine(suggestions.Count.ToString() + " suggestions"); } }
public static List <SymSpell.SuggestItem> Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency const SymSpell.Verbosity verbosity = SymSpell.Verbosity.All; suggestions = symSpell.Lookup(input, verbosity); //return suggestions; //display term and frequency foreach (var suggestion in suggestions) { //Debug.Log(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } if (verbosity != SymSpell.Verbosity.Top) { Debug.Log(suggestions.Count.ToString() + " suggestions"); } return(suggestions); }
public void LookupShouldReplicateNoisyResults() { var dir = AppDomain.CurrentDomain.BaseDirectory; const int editDistanceMax = 2; const int prefixLength = 7; const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest; var symSpell = new SymSpell(83000, editDistanceMax, prefixLength); string path = dir + "../../../SymSpell/frequency_dictionary_en_82_765.txt"; //for spelling correction (genuine English words) symSpell.LoadDictionary(path, 0, 1); //load 1000 terms with random spelling errors string[] testList = new string[1000]; int i = 0; using (StreamReader sr = new StreamReader(File.OpenRead(dir + "../../../SymSpell.Demo/test_data/noisy_query_en_1000.txt"))) { String line; //process a single line at a time only for memory efficiency while ((line = sr.ReadLine()) != null) { string[] lineParts = line.Split(null); if (lineParts.Length >= 2) { testList[i++] = lineParts[0]; } } } int resultSum = 0; for (i = 0; i < testList.Length; i++) { resultSum += symSpell.Lookup(testList[i], verbosity, symSpell.MaxDictionaryEditDistance).Count; } Assert.AreEqual(4945, resultSum); }
//Load a frequency dictionary or create a frequency dictionary from a text corpus public static void Main(string[] args) { // Console.Write("Creating dictionary ..."); long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //set parameters const int initialCapacity = 82765; const int maxEditDistance = 3; const int prefixLength = 7; var symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength); symSpell.CreateDictionary(@"C:\Dev\csharp\names.txt"); const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Top; //var suggestions = symSpell.Lookup("Minuie Coare", verbosity); // Console.WriteLine(suggestions.Count); // foreach (var suggestion in suggestions) // { // Console.WriteLine(suggestion); // } using (Stream corpusStream = File.OpenRead(@"C:\Dev\csharp\keys.txt")) { using (StreamReader sr = new StreamReader(corpusStream)) { String line; //process a single line at a time only for memory efficiency while ((line = sr.ReadLine()) != null) { var suggestions = symSpell.Lookup(line.Trim(), verbosity); Console.WriteLine(suggestions.Count); foreach (var suggestion in suggestions) { Console.WriteLine(suggestion); } } } } // //Load a frequency dictionary // //wordfrequency_en.txt ensures high correction quality by combining two data sources: // //Google Books Ngram data provides representative word frequencies (but contains many entries with spelling errors) // //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies) // string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project // //string path = "../../frequency_dictionary_en_82_765.txt"; //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package) // if (!symSpell.LoadDictionary(path, 0, 1)) { Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return; } // //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt ) // //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction. // //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally // //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use https://github.com/wolfgarbe/SymSpellCompound // //string path = "big.txt"; // //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); stopWatch.Stop(); Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "); long memDelta = GC.GetTotalMemory(true) - memSize; Console.WriteLine((memDelta / 1024 / 1024.0).ToString("N0") + " MB"); // Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, " // + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString() // + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms " // + (memDelta / 1024 / 1024.0).ToString("N0") + " MB"); // //warm up // var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All); // string input; // Console.WriteLine("Type a work and hit enter key to get spelling suggestions:"); // while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim())) // { // Correct(input, symSpell); // } }