static void Main(string[] args) { int initialCapacity = 82765; string dictionaryPath = @"D:\sbwce.txt"; int prefixLength = 7; int maxEditDistanceDictionary = 2; var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength); symSpell.CreateDictionary(dictionaryPath); Type typecontroller = typeof(SymSpell); System.Reflection.FieldInfo finfo = typecontroller.GetField("words", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetField); System.Collections.Generic.Dictionary <string, System.Int64> collection = null; if (finfo != null) { collection = (System.Collections.Generic.Dictionary <string, System.Int64>)finfo.GetValue(symSpell); } using (System.IO.StreamWriter fileWriter = new System.IO.StreamWriter(@"D:\Personal\Master\Materias\TFM SLN\DictionaryFiles\load4.log")) { foreach (System.Collections.Generic.KeyValuePair <string, System.Int64> kvPair in collection) { if (kvPair.Value > 50 && !Regex.IsMatch(kvPair.Key, @"^-?\d+$")) { fileWriter.WriteLine("{0} {1}", kvPair.Key, kvPair.Value); } } fileWriter.Close(); }; }
public static void Correct(string input, string language) { List <SymSpell.suggestItem> suggestions = null; //Benchmark: 1000 x Lookup /* * Stopwatch stopWatch = new Stopwatch(); * stopWatch.Start(); * for (int i = 0; i < 1000; i++) * { * suggestions = SymSpell.Lookup(input,language, SymSpell.editDistanceMax); * } * stopWatch.Stop(); * Console.WriteLine(stopWatch.ElapsedMilliseconds.ToString()); */ //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency suggestions = SymSpell.Lookup(input, language, SymSpell.editDistanceMax); //display term and frequency foreach (var suggestion in suggestions) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } if (SymSpell.verbose != 0) { Console.WriteLine(suggestions.Count.ToString() + " suggestions"); } }
private static void AddPostProcessing(IServiceCollection services) { var symSpell = new SymSpell(); Console.Out.WriteLine("Loading SymSpell dictionary..."); { symSpell.LoadDictionary("../../ru.dict", termIndex: 0, countIndex: 1); } Console.Out.WriteLine("SymSpell initialized!"); var postProcessor = new CombinedProcessor(new ITextPostProcessor[] { new RemoveEmptyLinesProcessor(new RemoveEmptyLinesOptions { NormalizeLineEndings = NormalizeLineEndingsStrategy.Lf }), new PerWordProcessor(new ITextPostProcessor[] { new SymSpellProcessor(symSpell, 1, Enumerable.Empty <string>()) }) }); services.AddSingleton(postProcessor); }
static void Main() { var rankings = new Dictionary <string, string>(); foreach (var line in System.IO.File.ReadAllLines("../../DraftTierList_01-11-2018_Overall.txt")) { var entry = line.Split(';'); rankings.Add(entry[0], entry[1]); } string path = @"../../EternalCardName_Corpus2.txt"; if (!SymSpell.CreateDictionary(path, "")) { Console.Error.WriteLine("File not found: " + System.IO.Path.GetFullPath(path)); } //verbosity=Top=0: the suggestion with the highest term frequency of the suggestions of smallest edit distance found //verbosity=Closest=1: all suggestions of smallest edit distance found, the suggestions are ordered by term frequency //verbosity=All=2: all suggestions <= maxEditDistance, the suggestions are ordered by edit distance, then by term frequency (slower, no early termination) SymSpell.verbose = 2; SymSpell.editDistanceMax = 3; //SymSpell.lp = 7; Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new Overlay(rankings)); }
public static void Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest; suggestions = symSpell.Lookup(input, verbosity); stopWatch.Stop(); Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms"); //display term and frequency foreach (var suggestion in suggestions) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } if (verbosity != SymSpell.Verbosity.Top) { Console.WriteLine(suggestions.Count.ToString() + " suggestions"); } }
public static List <string> Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; //Stopwatch stopWatch = new Stopwatch(); //stopWatch.Start(); //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest; suggestions = symSpell.Lookup(input, verbosity); //stopWatch.Stop(); /* Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms"); */ //display term and frequency List <string> lastOf = new List <string>(); foreach (var suggestion in suggestions) { var _ = suggestion.term; _.ToList(); lastOf.Add(_); } if (verbosity != SymSpell.Verbosity.Top) { Console.WriteLine(suggestions.Count.ToString() + " suggestions"); } return(lastOf); }
public SymSpell CreateDictionary(out string ErrorMsg) { ErrorMsg = string.Empty; try { long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); const int initialCapacity = 82765; const int maxEditDistance = 2; const int prefixLength = 7; var symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength); string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; if (!symSpell.LoadDictionary(path, 0, 1)) { return(null); } stopWatch.Stop(); long memDelta = GC.GetTotalMemory(true) - memSize; var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All); return(symSpell); } catch (Exception ex) { ErrorMsg = ex.ToString(); return(null); } }
public int parseFile(ref SymSpell corrector, string filename) { //fstream in(filename); StreamReader in_ = new StreamReader(filename); string line; int ret = 0; while ((line = in_.ReadLine()) != null) { string[] tokens = line.Split(' '); tokencounter += tokens.Length; for (int ind = 0; ind != tokens.LongLength; ind++) { //corrector->CreateDictionaryEntry(tokens[ind]); if (!pToken.ContainsKey(tokens[ind])) { pToken[tokens[ind]] = 1; } else { pToken[tokens[ind]]++; } } ret++; } probability += ret; return(ret); }
public void WordsWithSharedPrefixShouldRetainCounts() { var symSpell = new SymSpell(16, 1, 3); symSpell.CreateDictionaryEntry("pipe", 5); symSpell.CreateDictionaryEntry("pips", 10); var result = symSpell.Lookup("pipe", SymSpell.Verbosity.All, 1); Assert.AreEqual(2, result.Count); Assert.AreEqual("pipe", result[0].term); Assert.AreEqual(5, result[0].count); Assert.AreEqual("pips", result[1].term); Assert.AreEqual(10, result[1].count); result = symSpell.Lookup("pips", SymSpell.Verbosity.All, 1); Assert.AreEqual(2, result.Count); Assert.AreEqual("pips", result[0].term); Assert.AreEqual(10, result[0].count); Assert.AreEqual("pipe", result[1].term); Assert.AreEqual(5, result[1].count); result = symSpell.Lookup("pip", SymSpell.Verbosity.All, 1); Assert.AreEqual(2, result.Count); Assert.AreEqual("pips", result[0].term); Assert.AreEqual(10, result[0].count); Assert.AreEqual("pipe", result[1].term); Assert.AreEqual(5, result[1].count); }
private void button1_Click(object sender, EventArgs e) { //create object int initialCapacity = 82765; int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary); //load dictionary string dictionaryPath = "../../frequency_dictionary_en_82_765.txt"; int termIndex = 0; //column of the term in the dictionary text file int countIndex = 1; //column of the term frequency in the dictionary text file if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex)) { richTextBox1.Text = "File not found!"; } var suggList = new List <string>(); string wrongWord = richTextBox1.Text.ToString(); string lowerWrongWord = wrongWord.ToLower(); int maxEditDistanceLookup = 1; //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary) var suggestionVerbosity = SymSpell.Verbosity.Closest; //Top, Closest, All var suggestions = symSpell.Lookup(lowerWrongWord, suggestionVerbosity, maxEditDistanceLookup); foreach (var suggestion in suggestions) { listBox1.Items.Add(suggestion.term.ToString()); } }
private static void Correct(string input, SymSpell symSpell) { //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency var suggestion = symSpell.WordSegmentation(input); //display term and frequency Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString()); }
public void LookupShouldNotReturnLowCountWord() { var symSpell = new SymSpell(16, 2, 7, 10); symSpell.CreateDictionaryEntry("pawn", 1); var result = symSpell.Lookup("pawn", SymSpell.Verbosity.Top, 0); Assert.AreEqual(0, result.Count); }
public void AddAdditionalCountsShouldNotAddWordAgain() { var symSpell = new SymSpell(); var word = "hello"; symSpell.CreateDictionaryEntry(word, 11); Assert.AreEqual(1, symSpell.WordCount); symSpell.CreateDictionaryEntry(word, 3); Assert.AreEqual(1, symSpell.WordCount); }
static void Main(string[] args) { //set parameters const int initialCapacity = 82765; const int maxEditDistance = 0; const int prefixLength = 7; SymSpell symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength); Console.Write("Creating dictionary ..."); long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //Load a frequency dictionary //wordfrequency_en.txt ensures high correction quality by combining two data sources: //Google Books Ngram data provides representative word frequencies (but contains many entries with spelling errors) //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies) string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project //string path = "../../frequency_dictionary_en_82_765.txt"; //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package) if (!symSpell.LoadDictionary(path, 0, 1)) { Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return; } //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt ) //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction. //The dictionary may contain vocabulary from different languages. //If you use mixed vocabulary use the language parameter in Correct() and CreateDictionary() accordingly. //You may use SymSpellCompound.CreateDictionaryEntry() to update a (self learning) dictionary incrementally //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). //string path = "big.txt" //if (!SymSpellCompound.CreateDictionary(path,"")) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); stopWatch.Stop(); long memDelta = GC.GetTotalMemory(true) - memSize; Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, " + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString() + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms " + (memDelta / 1024 / 1024.0).ToString("N0") + " MB"); //warm up var result = symSpell.WordSegmentation("isit"); string input; Console.WriteLine("Type in a text and hit enter to get word segmentation and correction:"); while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim())) { Correct(input, symSpell); } }
// pre-run to ensure code has executed once before timing benchmarks static void WarmUp() { SymSpell dict = new SymSpell(16, 2, 7); dict.LoadDictionary(DictionaryPath[0], 0, 1); var result = dict.Lookup("hockie", SymSpell.Verbosity.All, 1); Original.SymSpell dictOrig = new Original.SymSpell(2, 7); dictOrig.LoadDictionary(DictionaryPath[0], "", 0, 1); var resultOrig = dictOrig.Lookup("hockie", "", 1, 2); }
public void LookupShouldFindExactMatch() { var symSpell = new SymSpell(); symSpell.CreateDictionaryEntry("steama", 4); symSpell.CreateDictionaryEntry("steamb", 6); symSpell.CreateDictionaryEntry("steamc", 2); var result = symSpell.Lookup("steama", SymSpell.Verbosity.Top, 2); Assert.AreEqual(1, result.Count); Assert.AreEqual("steama", result[0].term); }
private static void Experimento1() { Stopwatch stopWatch = new Stopwatch(); string strPath = @"D:\json\"; string[] fileEntries = Directory.GetFiles(strPath); StringBuilder OCROriginal = new StringBuilder(); EditDistanceLength editDistance = new EditDistanceLength(); //Symspell parameters const int initialCapacity = 82765; const int maxEditDistance = 5; const int prefixLength = 7; SymSpell symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength); Dictionary <int, ExperimentSpell> excelMatrix = new Dictionary <int, ExperimentSpell>(); foreach (string path in fileEntries) { string jsonText = File.ReadAllText(path, Encoding.Default); var response = Google.Protobuf.JsonParser.Default.Parse <Google.Cloud.Vision.V1.AnnotateFileResponse>(jsonText); foreach (var respuestas in response.Responses) { var annotation = respuestas.FullTextAnnotation; if (annotation != null) { OCROriginal.Append(annotation.Text); } } } symSpell.LoadDictionary(@"D:\DictionaryFiles\default.txt", 0, 1); var arrayOCROriginal = OCROriginal.ToString().Replace("\n", " ").Replace("{", "").Replace("}", "").Replace(": ", "***").Replace(" : ", " ").Replace(":", " ").Replace("***", ": ").Replace(". ", " ").Replace(", ", " ").Replace("-", " ").Split(' '); int j = 0, k = 0; foreach (string item in arrayOCROriginal) { ExperimentSpell exp1 = new ExperimentSpell(); exp1.correction = "igual"; exp1.original = item; exp1.correctionLookupCompound = item; List <SymSpell.SuggestItem> suggestions = symSpell.Lookup(item, SymSpell.Verbosity.Top); if (suggestions.Count > 0) { exp1.correction = "modificada"; exp1.correctionLookupCompound = suggestions[0].term; } excelMatrix.Add(k++, exp1); } CreateExcelFileExperimento(excelMatrix, "1"); }
//Load a frequency dictionary or create a frequency dictionary from a text corpus public static void Main(string[] args) { Console.Write("Creating dictionary ..."); long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //set parameters const int initialCapacity = 82765; const int maxEditDistance = 2; const int prefixLength = 7; var symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength); //Load a frequency dictionary //wordfrequency_en.txt ensures high correction quality by combining two data sources: //Google Books Ngram data provides representative word frequencies (but contains many entries with spelling errors) //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies) //string path = "../../../SymSpell.Demo/test_data/frequency_dictionary_en_30_000.txt"; //for benchmark only (contains also non-genuine English words) //string path = "../../../SymSpell.Demo/test_data/frequency_dictionary_en_500_000.txt"; //for benchmark only (contains also non-genuine English words) string path = "../../../SymSpell/frequency_dictionary_en_82_765.txt"; //for spelling correction (genuine English words) //string path = "../../frequency_dictionary_en_82_765.txt"; //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package) if (!symSpell.LoadDictionary(path, 0, 1)) { Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); //path when using symspell.cs } //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt ) //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction. //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use https://github.com/wolfgarbe/SymSpellCompound //string path = "big.txt"; //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); stopWatch.Stop(); long memDelta = GC.GetTotalMemory(true) - memSize; Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, " + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString() + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms " + (memDelta / 1024 / 1024.0).ToString("N0") + " MB"); //warm up var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All, 1); string input; while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim())) { Correct(input, symSpell); } }
public void LookupShouldReturnMostFrequent() { var symSpell = new SymSpell(); symSpell.CreateDictionaryEntry("steama", 4); symSpell.CreateDictionaryEntry("steamb", 6); symSpell.CreateDictionaryEntry("steamc", 2); var result = symSpell.Lookup("steam", SymSpell.Verbosity.Top, 2); Assert.AreEqual(1, result.Count); Assert.AreEqual("steamb", result[0].term); Assert.AreEqual(6, result[0].count); }
private static void Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency suggestions = symSpell.LookupCompound(input, symSpell.MaxDictionaryEditDistance); //display term and frequency foreach (var suggestion in suggestions) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } }
public static void Test() { SymSpell corrector = new SymSpell(); Bayes cl = new Bayes(ref corrector); cl.addClass("./GENERATIVE/BAYES/grasa", "grasa"); cl.addClass("./GENERATIVE/BAYES/good", "good"); while (true) { Console.Write("Input="); string line = Console.ReadLine(); Console.WriteLine(cl.classify(line)); } }
public void initDict() { int initialCapacity = 20000; int maxEditDistanceDictionary = 3; //maximum edit distance per dictionary precalculation symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary); TextAsset dictionaryPath = Resources.Load <TextAsset>("dataset"); int termIndex = 0; //column of the term in the dictionary text file int countIndex = 1; //column of the term frequency in the dictionary text file if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex)) { Debug.Log("Unable to load dictionary"); } }
public void VerbosityShouldControlLookupResults() { var symSpell = new SymSpell(); symSpell.CreateDictionaryEntry("steam", 1); symSpell.CreateDictionaryEntry("steams", 2); symSpell.CreateDictionaryEntry("steem", 3); var result = symSpell.Lookup("steems", SymSpell.Verbosity.Top, 2); Assert.AreEqual(1, result.Count); result = symSpell.Lookup("steems", SymSpell.Verbosity.Closest, 2); Assert.AreEqual(2, result.Count); result = symSpell.Lookup("steems", SymSpell.Verbosity.All, 2); Assert.AreEqual(3, result.Count); }
public static void Benchmark(string path, int testNumber) { int resultSum = 0; string[] testList = new string[testNumber]; List <SymSpell.SuggestItem> suggestions = null; //load 1000 terms with random spelling errors int i = 0; using (StreamReader sr = new StreamReader(File.OpenRead(path))) { String line; //process a single line at a time only for memory efficiency while ((line = sr.ReadLine()) != null) { string[] lineParts = line.Split(null); if (lineParts.Length >= 2) { string key = lineParts[0]; testList[i++] = key; } } } Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //perform n rounds of Lookup of 1000 terms with random spelling errors int rounds = 10; for (int j = 0; j < rounds; j++) { resultSum = 0; //spellcheck strings for (i = 0; i < testNumber; i++) { suggestions = SymSpell.Lookup(testList[i], "", SymSpell.editDistanceMax); resultSum += suggestions.Count; } } stopWatch.Stop(); Console.WriteLine(resultSum.ToString("N0") + " results in " + (stopWatch.ElapsedMilliseconds / rounds).ToString() + " ms"); }
//Load a frequency dictionary or create a frequency dictionary from a text corpus public static void Main(string[] args) { //set global parameters SymSpell.verbose = 0; SymSpell.editDistanceMax = 2; SymSpell.lp = 7; Console.Write("Creating dictionary ..."); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //Load a frequency dictionary //wordfrequency_en.txt ensures high correction quality by combining two data sources: //Google Books Ngram data provides representative word frequencies (but contains many entries with spelling errors) //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies) //string path = "../../../symspelldemo/test_data/frequency_dictionary_en_30_000.txt"; //for benchmark only (contains also non-genuine English words) //string path = "../../../symspelldemo/test_data/frequency_dictionary_en_500_000.txt"; //for benchmark only (contains also non-genuine English words) string path = "../../../symspell/frequency_dictionary_en_82_765.txt"; //for spelling correction (genuine English words) //string path = "../../frequency_dictionary_en_82_765.txt"; //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package) if (!SymSpell.LoadDictionary(path, "", 0, 1)) { Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); //path when using symspell.cs } //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt ) //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction. //The dictionary may contain vocabulary from different languages. //If you use mixed vocabulary use the language parameter in Correct() and CreateDictionary() accordingly. //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use https://github.com/wolfgarbe/SymSpellCompound //string path = "big.txt"; //if (!SymSpell.CreateDictionary(path,"")) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); stopWatch.Stop(); Console.WriteLine("\rDictionary: " + SymSpell.wordlist.Count.ToString("N0") + " words, " + SymSpell.dictionary.Count.ToString("N0") + " entries, edit distance=" + SymSpell.editDistanceMax.ToString() + " in " + stopWatch.ElapsedMilliseconds.ToString() + "ms " + (Process.GetCurrentProcess().PrivateMemorySize64 / 1000000).ToString("N0") + " MB"); //Benchmark("../../../symspelldemo/test_data/noisy_query_en_1000.txt",1000); string input; while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim())) { Correct(input, ""); } }
private void InitSym() { //create object int initialCapacity = 82765; int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation sym = new SymSpell(initialCapacity, maxEditDistanceDictionary); //load dictionary string dictionaryPath = Path.Combine(Application.streamingAssetsPath, "SymSpell", "frequency_dictionary_en_82_765.txt"); int termIndex = 0; //column of the term in the dictionary text file int countIndex = 1; //column of the term frequency in the dictionary text file if (!sym.LoadDictionary(dictionaryPath, termIndex, countIndex)) { Debug.LogError("Dictionary file not found! Aborting..."); return; } }
public void LoadDictionary() { //create object int initialCapacity = 82765; int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary); //load dictionary string dictionaryPath = Application.dataPath + @"\SymSpell\frequency_dictionary_en_82_765.txt"; int termIndex = 0; //column of the term in the dictionary text file int countIndex = 1; //column of the term frequency in the dictionary text file if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex)) { Debug.Log("File not found!"); return; } }
public Parlogike() { corrector = new SymSpell(); BayesClassifiers = new Bayes(ref corrector); externFunctors = new Dictionary <string, Func <Parlogike, string, List <Variable>, char, bool, Pattern, string, string, Result> >(); internFunctors = new Dictionary <string, Func <Parlogike, List <Variable>, int, bool> >(); MarkovGenerators = new Dictionary <string, Markov>(); GlobalVariables = new Dictionary <string, Dictionary <string, Variable> >(); LocalStack = new List <string>(); Residues = new List <string>(); Groups = new Dictionary <string, List <string> >(); knowledge = new List <Pattern>(); GlobalVariables[""] = new Dictionary <string, Variable>(); if (!init) { Functors.populate(); init = true; } }
private string FixBadSpelling(string tempHtmlLines, SymSpell spellingEngine) { SpellingCorrection dialog = new SpellingCorrection(); dialog.OriginalChatText.Text = tempHtmlLines.ToString(); //Use SymSpell to fix horrible spelling //Space out tags tempHtmlLines = tempHtmlLines.Replace("*", " * "); tempHtmlLines = tempHtmlLines.Replace(". . . . .", "... "); tempHtmlLines = tempHtmlLines.Replace(". . . .", "... "); tempHtmlLines = tempHtmlLines.Replace(". . .", "... "); tempHtmlLines = tempHtmlLines.Replace(". .", "... "); int postStartIndex = tempHtmlLines.IndexOf(':') + 1; //int endTagIndex = tempHtmlLines.Length - 4; string postSubString = tempHtmlLines.Substring(postStartIndex, (tempHtmlLines.Length - postStartIndex - 5)); int maxEditDistanceLookup = 1; //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary) var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All maxEditDistanceLookup = 2; //max edit distance per lookup (per single word, not per whole input string) var suggestions = spellingEngine.LookupCompound(tempHtmlLines, maxEditDistanceLookup); var axy = suggestions[0]; var dog = spellingEngine.WordSegmentation(postSubString); string fixedLine = dog.correctedString; string fixedStuff = (tempHtmlLines.Substring(0, (postStartIndex)) + " " + fixedLine).Replace(" * ", "*"); dialog.SuggestedChatTextTextBox.Text = fixedStuff; dialog.ShowDialog(); if (dialog.DialogResult.HasValue && dialog.DialogResult.Value) { return(dialog.SuggestedChatTextTextBox.Text + "\r\n"); } else { return(fixedStuff + "\r\n"); } }
public static List <SymSpell.SuggestItem> Correct(string input, SymSpell symSpell) { List <SymSpell.SuggestItem> suggestions = null; //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency const SymSpell.Verbosity verbosity = SymSpell.Verbosity.All; suggestions = symSpell.Lookup(input, verbosity); //return suggestions; //display term and frequency foreach (var suggestion in suggestions) { //Debug.Log(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } if (verbosity != SymSpell.Verbosity.Top) { Debug.Log(suggestions.Count.ToString() + " suggestions"); } return(suggestions); }