예제 #1
0
        public static List <string> Correct(string input, SymSpell symSpell)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            //Stopwatch stopWatch = new Stopwatch();
            //stopWatch.Start();

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest;

            suggestions = symSpell.Lookup(input, verbosity);

            //stopWatch.Stop();
            /*  Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms"); */

            //display term and frequency
            List <string> lastOf = new List <string>();

            foreach (var suggestion in suggestions)
            {
                var _ = suggestion.term;
                _.ToList();
                lastOf.Add(_);
            }
            if (verbosity != SymSpell.Verbosity.Top)
            {
                Console.WriteLine(suggestions.Count.ToString() + " suggestions");
            }
            return(lastOf);
        }
예제 #2
0
        public static void Correct(string input, SymSpell symSpell)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest;

            suggestions = symSpell.Lookup(input, verbosity);

            stopWatch.Stop();
            Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms");

            //display term and frequency
            foreach (var suggestion in suggestions)
            {
                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
            }
            if (verbosity != SymSpell.Verbosity.Top)
            {
                Console.WriteLine(suggestions.Count.ToString() + " suggestions");
            }
        }
예제 #3
0
    public static List <SymSpell.SuggestItem> Correct(string input, SymSpell symSpell)
    {
        List <SymSpell.SuggestItem> suggestions = null;

        //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
        const SymSpell.Verbosity verbosity = SymSpell.Verbosity.All;

        suggestions = symSpell.Lookup(input, verbosity);

        //return suggestions;
        //display term and frequency
        foreach (var suggestion in suggestions)
        {
            //Debug.Log(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
        }
        if (verbosity != SymSpell.Verbosity.Top)
        {
            Debug.Log(suggestions.Count.ToString() + " suggestions");
        }
        return(suggestions);
    }
예제 #4
0
        public void LookupShouldReplicateNoisyResults()
        {
            var dir = AppDomain.CurrentDomain.BaseDirectory;

            const int editDistanceMax          = 2;
            const int prefixLength             = 7;
            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest;
            var    symSpell = new SymSpell(83000, editDistanceMax, prefixLength);
            string path     = dir + "../../../SymSpell/frequency_dictionary_en_82_765.txt"; //for spelling correction (genuine English words)

            symSpell.LoadDictionary(path, 0, 1);

            //load 1000 terms with random spelling errors
            string[] testList = new string[1000];
            int      i        = 0;

            using (StreamReader sr = new StreamReader(File.OpenRead(dir + "../../../SymSpell.Demo/test_data/noisy_query_en_1000.txt")))
            {
                String line;
                //process a single line at a time only for memory efficiency
                while ((line = sr.ReadLine()) != null)
                {
                    string[] lineParts = line.Split(null);
                    if (lineParts.Length >= 2)
                    {
                        testList[i++] = lineParts[0];
                    }
                }
            }

            int resultSum = 0;

            for (i = 0; i < testList.Length; i++)
            {
                resultSum += symSpell.Lookup(testList[i], verbosity, symSpell.MaxDictionaryEditDistance).Count;
            }
            Assert.AreEqual(4945, resultSum);
        }
예제 #5
0
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            // Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 3;
            const int prefixLength    = 7;
            var       symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            symSpell.CreateDictionary(@"C:\Dev\csharp\names.txt");

            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Top;

            //var suggestions = symSpell.Lookup("Minuie Coare", verbosity);
            // Console.WriteLine(suggestions.Count);
            // foreach (var suggestion in suggestions)
            // {
            //     Console.WriteLine(suggestion);
            // }
            using (Stream corpusStream = File.OpenRead(@"C:\Dev\csharp\keys.txt"))
            {
                using (StreamReader sr = new StreamReader(corpusStream))
                {
                    String line;
                    //process a single line at a time only for memory efficiency
                    while ((line = sr.ReadLine()) != null)
                    {
                        var suggestions = symSpell.Lookup(line.Trim(), verbosity);
                        Console.WriteLine(suggestions.Count);
                        foreach (var suggestion in suggestions)
                        {
                            Console.WriteLine(suggestion);
                        }
                    }
                }
            }

            // //Load a frequency dictionary
            // //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            // //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            // //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            // string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project
            // //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            // if (!symSpell.LoadDictionary(path, 0, 1)) { Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return; }

            // //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            // //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            // //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            // //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use  https://github.com/wolfgarbe/SymSpellCompound
            // //string path = "big.txt";
            // //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms ");
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine((memDelta / 1024 / 1024.0).ToString("N0") + " MB");
            // Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
            //     + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
            //     + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
            //     + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            // //warm up
            // var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);

            // string input;
            // Console.WriteLine("Type a work and hit enter key to get spelling suggestions:");
            // while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            // {
            //     Correct(input, symSpell);
            // }
        }