Пример #1
0
        static void Main()
        {
            var rankings = new Dictionary <string, string>();

            foreach (var line in System.IO.File.ReadAllLines("../../DraftTierList_01-11-2018_Overall.txt"))
            {
                var entry = line.Split(';');

                rankings.Add(entry[0], entry[1]);
            }

            string path = @"../../EternalCardName_Corpus2.txt";

            if (!SymSpell.CreateDictionary(path, ""))
            {
                Console.Error.WriteLine("File not found: " + System.IO.Path.GetFullPath(path));
            }

            //verbosity=Top=0: the suggestion with the highest term frequency of the suggestions of smallest edit distance found
            //verbosity=Closest=1: all suggestions of smallest edit distance found, the suggestions are ordered by term frequency
            //verbosity=All=2: all suggestions <= maxEditDistance, the suggestions are ordered by edit distance, then by term frequency (slower, no early termination)

            SymSpell.verbose         = 2;
            SymSpell.editDistanceMax = 3;
            //SymSpell.lp = 7;


            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            Application.Run(new Overlay(rankings));
        }
Пример #2
0
        static void Main(string[] args)
        {
            int    initialCapacity           = 82765;
            string dictionaryPath            = @"D:\sbwce.txt";
            int    prefixLength              = 7;
            int    maxEditDistanceDictionary = 2;
            var    symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

            symSpell.CreateDictionary(dictionaryPath);
            Type typecontroller = typeof(SymSpell);

            System.Reflection.FieldInfo finfo = typecontroller.GetField("words", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetField);
            System.Collections.Generic.Dictionary <string, System.Int64> collection = null;
            if (finfo != null)
            {
                collection = (System.Collections.Generic.Dictionary <string, System.Int64>)finfo.GetValue(symSpell);
            }

            using (System.IO.StreamWriter fileWriter = new System.IO.StreamWriter(@"D:\Personal\Master\Materias\TFM SLN\DictionaryFiles\load4.log"))
            {
                foreach (System.Collections.Generic.KeyValuePair <string, System.Int64> kvPair in collection)
                {
                    if (kvPair.Value > 50 && !Regex.IsMatch(kvPair.Key, @"^-?\d+$"))
                    {
                        fileWriter.WriteLine("{0} {1}", kvPair.Key, kvPair.Value);
                    }
                }
                fileWriter.Close();
            };
        }
        static void Main(string[] args)
        {
            if (args.Length > 2)
            {
                Console.Error.Write("Creating dictionary ...");
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();
                stopWatch.Start();

                //parameters
                int initialCapacity = 82765;
                int termIndex       = 0; //column of the term in the dictionary text file
                int countIndex      = 1; //column of the term frequency in the dictionary text file

                //dictionaryType
                string dictionaryType = args[0].ToLower();
                if ("load.create".IndexOf(dictionaryType) == -1)
                {
                    Console.Error.WriteLine("Error in parameter 1"); return;
                }

                //dictionaryPath
                string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1];

                //prefix length (optional parameter)
                int    offset       = 0;
                string lookupType   = "";
                int    prefixLength = 7;
                if (!int.TryParse(args[2], out prefixLength))
                {
                    prefixLength = 7;
                }
                else
                {
                    offset = 1;
                }

                //lookupType
                if (args.Length > 2 + offset)
                {
                    lookupType = args[2 + offset].ToLower();
                    if ("lookup.lookupcompound.wordsegment".IndexOf(lookupType) == -1)
                    {
                        Console.Error.WriteLine("Error in parameter " + (3 + offset).ToString()); return;
                    }
                }

                //maxEditDistance
                int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
                if (args.Length > 3 + offset)
                {
                    if (!int.TryParse(args[3 + offset], out maxEditDistanceDictionary))
                    {
                        Console.Error.WriteLine("Error in parameter " + (4 + offset).ToString()); return;
                    }
                }

                //output stats
                bool outputStats = false;//false, true
                if (args.Length > 4 + offset)
                {
                    if (!bool.TryParse(args[4 + offset], out outputStats))
                    {
                        Console.Error.WriteLine("Error in parameter " + (5 + offset).ToString()); return;
                    }
                }

                //verbosity
                var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All
                if (args.Length > 5 + offset)
                {
                    if (!Enum.TryParse(args[5 + offset], true, out suggestionVerbosity))
                    {
                        Console.Error.WriteLine("Error in parameter " + (6 + offset).ToString()); return;
                    }
                }

                //create object
                var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

                //load dictionary
                switch (dictionaryType)
                {
                case "load":
                    if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                case "create":
                    if (!symSpell.CreateDictionary(dictionaryPath))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                default:
                    break;
                }

                stopWatch.Stop();
                long memDelta = GC.GetTotalMemory(true) - memSize;

                //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped
                Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                                        + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                                        + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                                        + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

                //warm up
                var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);

                //lookup suggestions for single-word input strings
                string inputTerm;
                while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim()))
                {
                    switch (lookupType)
                    {
                    case "lookup":
                        var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceDictionary, true);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                            }
                            else
                            {
                                Console.WriteLine(suggestion.term);
                            }
                        }
                        break;

                    case "lookupcompound":
                        var suggestions2 = symSpell.LookupCompound(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions2)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                            }
                            else
                            {
                                Console.WriteLine(suggestion.term);
                            }
                        }
                        break;

                    case "wordsegment":
                        var suggestions3 = symSpell.WordSegmentation(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions3)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString());
                            }
                            else
                            {
                                Console.WriteLine(suggestion.correctedString);
                            }
                        }
                        break;

                    default:
                        break;
                    }
                }
            }
            else
            {
                //PrefixLength is number

                //help
                Console.WriteLine("SymSpell.CommandLine DictionaryType DictionaryPath [PrefixLength] LookupType [MaxEditDistance] [OutputStats] [Verbosity]");
                Console.WriteLine();
                Console.WriteLine("DictionaryType=load|create");
                Console.WriteLine("   load: load dictionary from dictionary file");
                Console.WriteLine("   create: create dictionary from text corpus");
                Console.WriteLine("DictionaryPath: path to dictionary/corpus file");
                Console.WriteLine("PrefixLength: default=7 (speed/memory consumption trade-off)");  //dictionary param
                Console.WriteLine("   5: low memory, slow lookup");
                Console.WriteLine("   6: medium memory, medium lookup");
                Console.WriteLine("   7: high memory, fast lookup");
                //lookup intended for correction of single word
                //lookupcompound intended for correction of multiple words, it can insert only a single space per token, faster than wordsegmentation
                //wordsegmentation intended for segmentation and correction of multiple words, it can insert multiple spaces per token, slower than lookupcompound
                Console.WriteLine("LookupType=lookup|lookupcompound|wordsegment");
                Console.WriteLine("   lookup: correct single word");
                Console.WriteLine("   lookupcompound: correct multiple-word string (supports splitting/merging)");
                Console.WriteLine("   wordsegment: word segment and correct input string");
                Console.WriteLine("MaxEditDistance: default=2 (0: no correction, word segmentation only)");
                Console.WriteLine("OutputStats=false|true");
                Console.WriteLine("   false: only corrected string");
                Console.WriteLine("   true: corrected string, edit distance, word frequency/probability");
                Console.WriteLine("Verbosity=top|closest|all"); //no effect for lookupcompound and wordsegment
                Console.WriteLine("   top: Top suggestion");
                Console.WriteLine("   closest: All suggestions of smallest edit distance found");
                Console.WriteLine("   all: All suggestions within maxEditDistance");
                Console.WriteLine();
            }
        }
Пример #4
0
        static void Main(string[] args)
        {
            if (args.Length >= 2)
            {
                Console.Error.Write("Creating dictionary ...");
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();
                stopWatch.Start();

                //parameters
                int initialCapacity = 82765;

                int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
                if (args.Length > 2)
                {
                    if (!int.TryParse(args[2], out maxEditDistanceDictionary))
                    {
                        Console.Error.WriteLine("Error in parameter 3"); return;
                    }
                }
                int maxEditDistanceLookup = maxEditDistanceDictionary; //max edit distance per lookup

                var suggestionVerbosity = SymSpell.Verbosity.Top;      //Top, Closest, All
                if (args.Length > 3)
                {
                    if (!Enum.TryParse(args[3], out suggestionVerbosity))
                    {
                        Console.Error.WriteLine("Error in parameter 4"); return;
                    }
                }

                int prefixLength = 7;
                if (args.Length > 4)
                {
                    if (!int.TryParse(args[4], out prefixLength))
                    {
                        Console.Error.WriteLine("Error in parameter 5"); return;
                    }
                }

                string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1]; // "../../../../SymSpell/frequency_dictionary_en_82_765.txt";
                int    termIndex      = 0;                                               //column of the term in the dictionary text file
                int    countIndex     = 1;                                               //column of the term frequency in the dictionary text file

                //create object
                var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

                //load dictionary
                switch (args[0].ToLower())
                {
                case "load":
                    if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                case "create":
                    if (!symSpell.CreateDictionary(dictionaryPath))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                default:
                    break;
                }

                stopWatch.Stop();
                long memDelta = GC.GetTotalMemory(true) - memSize;

                //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped
                Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                                        + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                                        + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                                        + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

                //warm up
                var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All, 1);

                //lookup suggestions for single-word input strings
                string inputTerm;
                while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim()))
                {
                    var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceLookup, true);

                    //display suggestions, edit distance and term frequency
                    foreach (var suggestion in suggestions)
                    {
                        Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                    }
                }
            }
            else
            {
                //help
                Console.WriteLine("SymSpell.CommandLine load   Path [MaxEditDistance] [Verbosity] [PrefixLength]");
                Console.WriteLine("SymSpell.CommandLine create Path [MaxEditDistance] [Verbosity] [PrefixLength]");
                Console.WriteLine();
                Console.WriteLine("load: load dictionary from dictionary file");
                Console.WriteLine("create: create dictionary from text corpus");
                Console.WriteLine("MaxEditDistance: default=2");
                Console.WriteLine("Verbosity=Top|Closest|All (case-sensitive)");
                Console.WriteLine("PrefixLength: default=7 (5:low memory; 7:fast lookup)");
                Console.WriteLine();
            }
        }
Пример #5
0
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            // Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 3;
            const int prefixLength    = 7;
            var       symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            symSpell.CreateDictionary(@"C:\Dev\csharp\names.txt");

            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Top;

            //var suggestions = symSpell.Lookup("Minuie Coare", verbosity);
            // Console.WriteLine(suggestions.Count);
            // foreach (var suggestion in suggestions)
            // {
            //     Console.WriteLine(suggestion);
            // }
            using (Stream corpusStream = File.OpenRead(@"C:\Dev\csharp\keys.txt"))
            {
                using (StreamReader sr = new StreamReader(corpusStream))
                {
                    String line;
                    //process a single line at a time only for memory efficiency
                    while ((line = sr.ReadLine()) != null)
                    {
                        var suggestions = symSpell.Lookup(line.Trim(), verbosity);
                        Console.WriteLine(suggestions.Count);
                        foreach (var suggestion in suggestions)
                        {
                            Console.WriteLine(suggestion);
                        }
                    }
                }
            }

            // //Load a frequency dictionary
            // //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            // //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            // //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            // string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project
            // //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            // if (!symSpell.LoadDictionary(path, 0, 1)) { Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return; }

            // //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            // //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            // //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            // //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use  https://github.com/wolfgarbe/SymSpellCompound
            // //string path = "big.txt";
            // //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms ");
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine((memDelta / 1024 / 1024.0).ToString("N0") + " MB");
            // Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
            //     + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
            //     + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
            //     + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            // //warm up
            // var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);

            // string input;
            // Console.WriteLine("Type a work and hit enter key to get spelling suggestions:");
            // while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            // {
            //     Correct(input, symSpell);
            // }
        }