static void Main() { var rankings = new Dictionary <string, string>(); foreach (var line in System.IO.File.ReadAllLines("../../DraftTierList_01-11-2018_Overall.txt")) { var entry = line.Split(';'); rankings.Add(entry[0], entry[1]); } string path = @"../../EternalCardName_Corpus2.txt"; if (!SymSpell.CreateDictionary(path, "")) { Console.Error.WriteLine("File not found: " + System.IO.Path.GetFullPath(path)); } //verbosity=Top=0: the suggestion with the highest term frequency of the suggestions of smallest edit distance found //verbosity=Closest=1: all suggestions of smallest edit distance found, the suggestions are ordered by term frequency //verbosity=All=2: all suggestions <= maxEditDistance, the suggestions are ordered by edit distance, then by term frequency (slower, no early termination) SymSpell.verbose = 2; SymSpell.editDistanceMax = 3; //SymSpell.lp = 7; Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new Overlay(rankings)); }
static void Main(string[] args) { int initialCapacity = 82765; string dictionaryPath = @"D:\sbwce.txt"; int prefixLength = 7; int maxEditDistanceDictionary = 2; var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength); symSpell.CreateDictionary(dictionaryPath); Type typecontroller = typeof(SymSpell); System.Reflection.FieldInfo finfo = typecontroller.GetField("words", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetField); System.Collections.Generic.Dictionary <string, System.Int64> collection = null; if (finfo != null) { collection = (System.Collections.Generic.Dictionary <string, System.Int64>)finfo.GetValue(symSpell); } using (System.IO.StreamWriter fileWriter = new System.IO.StreamWriter(@"D:\Personal\Master\Materias\TFM SLN\DictionaryFiles\load4.log")) { foreach (System.Collections.Generic.KeyValuePair <string, System.Int64> kvPair in collection) { if (kvPair.Value > 50 && !Regex.IsMatch(kvPair.Key, @"^-?\d+$")) { fileWriter.WriteLine("{0} {1}", kvPair.Key, kvPair.Value); } } fileWriter.Close(); }; }
static void Main(string[] args) { if (args.Length > 2) { Console.Error.Write("Creating dictionary ..."); long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //parameters int initialCapacity = 82765; int termIndex = 0; //column of the term in the dictionary text file int countIndex = 1; //column of the term frequency in the dictionary text file //dictionaryType string dictionaryType = args[0].ToLower(); if ("load.create".IndexOf(dictionaryType) == -1) { Console.Error.WriteLine("Error in parameter 1"); return; } //dictionaryPath string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1]; //prefix length (optional parameter) int offset = 0; string lookupType = ""; int prefixLength = 7; if (!int.TryParse(args[2], out prefixLength)) { prefixLength = 7; } else { offset = 1; } //lookupType if (args.Length > 2 + offset) { lookupType = args[2 + offset].ToLower(); if ("lookup.lookupcompound.wordsegment".IndexOf(lookupType) == -1) { Console.Error.WriteLine("Error in parameter " + (3 + offset).ToString()); return; } } //maxEditDistance int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation if (args.Length > 3 + offset) { if (!int.TryParse(args[3 + offset], out maxEditDistanceDictionary)) { Console.Error.WriteLine("Error in parameter " + (4 + offset).ToString()); return; } } //output stats bool outputStats = false;//false, true if (args.Length > 4 + offset) { if (!bool.TryParse(args[4 + offset], out outputStats)) { Console.Error.WriteLine("Error in parameter " + (5 + offset).ToString()); return; } } //verbosity var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All if (args.Length > 5 + offset) { if (!Enum.TryParse(args[5 + offset], true, out suggestionVerbosity)) { Console.Error.WriteLine("Error in parameter " + (6 + offset).ToString()); return; } } //create object var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength); //load dictionary switch (dictionaryType) { case "load": if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex)) { Console.Error.WriteLine("File not found!"); return; } break; case "create": if (!symSpell.CreateDictionary(dictionaryPath)) { Console.Error.WriteLine("File not found!"); return; } break; default: break; } stopWatch.Stop(); long memDelta = GC.GetTotalMemory(true) - memSize; //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, " + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString() + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms " + (memDelta / 1024 / 1024.0).ToString("N0") + " MB"); //warm up var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All); //lookup suggestions for single-word input strings string inputTerm; while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim())) { switch (lookupType) { case "lookup": var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceDictionary, true); //display suggestions, edit distance and term frequency foreach (var suggestion in suggestions) { if (outputStats) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } else { Console.WriteLine(suggestion.term); } } break; case "lookupcompound": var suggestions2 = symSpell.LookupCompound(inputTerm); //display suggestions, edit distance and term frequency foreach (var suggestion in suggestions2) { if (outputStats) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } else { Console.WriteLine(suggestion.term); } } break; case "wordsegment": var suggestions3 = symSpell.WordSegmentation(inputTerm); //display suggestions, edit distance and term frequency foreach (var suggestion in suggestions3) { if (outputStats) { Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString()); } else { Console.WriteLine(suggestion.correctedString); } } break; default: break; } } } else { //PrefixLength is number //help Console.WriteLine("SymSpell.CommandLine DictionaryType DictionaryPath [PrefixLength] LookupType [MaxEditDistance] [OutputStats] [Verbosity]"); Console.WriteLine(); Console.WriteLine("DictionaryType=load|create"); Console.WriteLine(" load: load dictionary from dictionary file"); Console.WriteLine(" create: create dictionary from text corpus"); Console.WriteLine("DictionaryPath: path to dictionary/corpus file"); Console.WriteLine("PrefixLength: default=7 (speed/memory consumption trade-off)"); //dictionary param Console.WriteLine(" 5: low memory, slow lookup"); Console.WriteLine(" 6: medium memory, medium lookup"); Console.WriteLine(" 7: high memory, fast lookup"); //lookup intended for correction of single word //lookupcompound intended for correction of multiple words, it can insert only a single space per token, faster than wordsegmentation //wordsegmentation intended for segmentation and correction of multiple words, it can insert multiple spaces per token, slower than lookupcompound Console.WriteLine("LookupType=lookup|lookupcompound|wordsegment"); Console.WriteLine(" lookup: correct single word"); Console.WriteLine(" lookupcompound: correct multiple-word string (supports splitting/merging)"); Console.WriteLine(" wordsegment: word segment and correct input string"); Console.WriteLine("MaxEditDistance: default=2 (0: no correction, word segmentation only)"); Console.WriteLine("OutputStats=false|true"); Console.WriteLine(" false: only corrected string"); Console.WriteLine(" true: corrected string, edit distance, word frequency/probability"); Console.WriteLine("Verbosity=top|closest|all"); //no effect for lookupcompound and wordsegment Console.WriteLine(" top: Top suggestion"); Console.WriteLine(" closest: All suggestions of smallest edit distance found"); Console.WriteLine(" all: All suggestions within maxEditDistance"); Console.WriteLine(); } }
static void Main(string[] args) { if (args.Length >= 2) { Console.Error.Write("Creating dictionary ..."); long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //parameters int initialCapacity = 82765; int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation if (args.Length > 2) { if (!int.TryParse(args[2], out maxEditDistanceDictionary)) { Console.Error.WriteLine("Error in parameter 3"); return; } } int maxEditDistanceLookup = maxEditDistanceDictionary; //max edit distance per lookup var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All if (args.Length > 3) { if (!Enum.TryParse(args[3], out suggestionVerbosity)) { Console.Error.WriteLine("Error in parameter 4"); return; } } int prefixLength = 7; if (args.Length > 4) { if (!int.TryParse(args[4], out prefixLength)) { Console.Error.WriteLine("Error in parameter 5"); return; } } string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1]; // "../../../../SymSpell/frequency_dictionary_en_82_765.txt"; int termIndex = 0; //column of the term in the dictionary text file int countIndex = 1; //column of the term frequency in the dictionary text file //create object var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength); //load dictionary switch (args[0].ToLower()) { case "load": if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex)) { Console.Error.WriteLine("File not found!"); return; } break; case "create": if (!symSpell.CreateDictionary(dictionaryPath)) { Console.Error.WriteLine("File not found!"); return; } break; default: break; } stopWatch.Stop(); long memDelta = GC.GetTotalMemory(true) - memSize; //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, " + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString() + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms " + (memDelta / 1024 / 1024.0).ToString("N0") + " MB"); //warm up var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All, 1); //lookup suggestions for single-word input strings string inputTerm; while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim())) { var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceLookup, true); //display suggestions, edit distance and term frequency foreach (var suggestion in suggestions) { Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0")); } } } else { //help Console.WriteLine("SymSpell.CommandLine load Path [MaxEditDistance] [Verbosity] [PrefixLength]"); Console.WriteLine("SymSpell.CommandLine create Path [MaxEditDistance] [Verbosity] [PrefixLength]"); Console.WriteLine(); Console.WriteLine("load: load dictionary from dictionary file"); Console.WriteLine("create: create dictionary from text corpus"); Console.WriteLine("MaxEditDistance: default=2"); Console.WriteLine("Verbosity=Top|Closest|All (case-sensitive)"); Console.WriteLine("PrefixLength: default=7 (5:low memory; 7:fast lookup)"); Console.WriteLine(); } }
//Load a frequency dictionary or create a frequency dictionary from a text corpus public static void Main(string[] args) { // Console.Write("Creating dictionary ..."); long memSize = GC.GetTotalMemory(true); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //set parameters const int initialCapacity = 82765; const int maxEditDistance = 3; const int prefixLength = 7; var symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength); symSpell.CreateDictionary(@"C:\Dev\csharp\names.txt"); const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Top; //var suggestions = symSpell.Lookup("Minuie Coare", verbosity); // Console.WriteLine(suggestions.Count); // foreach (var suggestion in suggestions) // { // Console.WriteLine(suggestion); // } using (Stream corpusStream = File.OpenRead(@"C:\Dev\csharp\keys.txt")) { using (StreamReader sr = new StreamReader(corpusStream)) { String line; //process a single line at a time only for memory efficiency while ((line = sr.ReadLine()) != null) { var suggestions = symSpell.Lookup(line.Trim(), verbosity); Console.WriteLine(suggestions.Count); foreach (var suggestion in suggestions) { Console.WriteLine(suggestion); } } } } // //Load a frequency dictionary // //wordfrequency_en.txt ensures high correction quality by combining two data sources: // //Google Books Ngram data provides representative word frequencies (but contains many entries with spelling errors) // //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies) // string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project // //string path = "../../frequency_dictionary_en_82_765.txt"; //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package) // if (!symSpell.LoadDictionary(path, 0, 1)) { Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return; } // //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt ) // //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction. // //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally // //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use https://github.com/wolfgarbe/SymSpellCompound // //string path = "big.txt"; // //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path)); stopWatch.Stop(); Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "); long memDelta = GC.GetTotalMemory(true) - memSize; Console.WriteLine((memDelta / 1024 / 1024.0).ToString("N0") + " MB"); // Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, " // + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString() // + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms " // + (memDelta / 1024 / 1024.0).ToString("N0") + " MB"); // //warm up // var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All); // string input; // Console.WriteLine("Type a work and hit enter key to get spelling suggestions:"); // while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim())) // { // Correct(input, symSpell); // } }