Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            int    initialCapacity           = 82765;
            string dictionaryPath            = @"D:\sbwce.txt";
            int    prefixLength              = 7;
            int    maxEditDistanceDictionary = 2;
            var    symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

            symSpell.CreateDictionary(dictionaryPath);
            Type typecontroller = typeof(SymSpell);

            System.Reflection.FieldInfo finfo = typecontroller.GetField("words", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetField);
            System.Collections.Generic.Dictionary <string, System.Int64> collection = null;
            if (finfo != null)
            {
                collection = (System.Collections.Generic.Dictionary <string, System.Int64>)finfo.GetValue(symSpell);
            }

            using (System.IO.StreamWriter fileWriter = new System.IO.StreamWriter(@"D:\Personal\Master\Materias\TFM SLN\DictionaryFiles\load4.log"))
            {
                foreach (System.Collections.Generic.KeyValuePair <string, System.Int64> kvPair in collection)
                {
                    if (kvPair.Value > 50 && !Regex.IsMatch(kvPair.Key, @"^-?\d+$"))
                    {
                        fileWriter.WriteLine("{0} {1}", kvPair.Key, kvPair.Value);
                    }
                }
                fileWriter.Close();
            };
        }
Ejemplo n.º 2
0
        public static void Correct(string input, string language)
        {
            List <SymSpell.suggestItem> suggestions = null;

            //Benchmark: 1000 x Lookup

            /*
             * Stopwatch stopWatch = new Stopwatch();
             * stopWatch.Start();
             * for (int i = 0; i < 1000; i++)
             * {
             *  suggestions = SymSpell.Lookup(input,language, SymSpell.editDistanceMax);
             * }
             * stopWatch.Stop();
             * Console.WriteLine(stopWatch.ElapsedMilliseconds.ToString());
             */

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            suggestions = SymSpell.Lookup(input, language, SymSpell.editDistanceMax);

            //display term and frequency
            foreach (var suggestion in suggestions)
            {
                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
            }
            if (SymSpell.verbose != 0)
            {
                Console.WriteLine(suggestions.Count.ToString() + " suggestions");
            }
        }
Ejemplo n.º 3
0
        private static void AddPostProcessing(IServiceCollection services)
        {
            var symSpell = new SymSpell();

            Console.Out.WriteLine("Loading SymSpell dictionary...");
            {
                symSpell.LoadDictionary("../../ru.dict", termIndex: 0, countIndex: 1);
            }
            Console.Out.WriteLine("SymSpell initialized!");

            var postProcessor = new CombinedProcessor(new ITextPostProcessor[]
            {
                new RemoveEmptyLinesProcessor(new RemoveEmptyLinesOptions
                {
                    NormalizeLineEndings = NormalizeLineEndingsStrategy.Lf
                }),

                new PerWordProcessor(new ITextPostProcessor[]
                {
                    new SymSpellProcessor(symSpell, 1, Enumerable.Empty <string>())
                })
            });

            services.AddSingleton(postProcessor);
        }
Ejemplo n.º 4
0
        static void Main()
        {
            var rankings = new Dictionary <string, string>();

            foreach (var line in System.IO.File.ReadAllLines("../../DraftTierList_01-11-2018_Overall.txt"))
            {
                var entry = line.Split(';');

                rankings.Add(entry[0], entry[1]);
            }

            string path = @"../../EternalCardName_Corpus2.txt";

            if (!SymSpell.CreateDictionary(path, ""))
            {
                Console.Error.WriteLine("File not found: " + System.IO.Path.GetFullPath(path));
            }

            //verbosity=Top=0: the suggestion with the highest term frequency of the suggestions of smallest edit distance found
            //verbosity=Closest=1: all suggestions of smallest edit distance found, the suggestions are ordered by term frequency
            //verbosity=All=2: all suggestions <= maxEditDistance, the suggestions are ordered by edit distance, then by term frequency (slower, no early termination)

            SymSpell.verbose         = 2;
            SymSpell.editDistanceMax = 3;
            //SymSpell.lp = 7;


            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            Application.Run(new Overlay(rankings));
        }
Ejemplo n.º 5
0
        public static void Correct(string input, SymSpell symSpell)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest;

            suggestions = symSpell.Lookup(input, verbosity);

            stopWatch.Stop();
            Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms");

            //display term and frequency
            foreach (var suggestion in suggestions)
            {
                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
            }
            if (verbosity != SymSpell.Verbosity.Top)
            {
                Console.WriteLine(suggestions.Count.ToString() + " suggestions");
            }
        }
Ejemplo n.º 6
0
        public static List <string> Correct(string input, SymSpell symSpell)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            //Stopwatch stopWatch = new Stopwatch();
            //stopWatch.Start();

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest;

            suggestions = symSpell.Lookup(input, verbosity);

            //stopWatch.Stop();
            /*  Console.WriteLine(stopWatch.Elapsed.TotalMilliseconds.ToString("0.000") + " ms"); */

            //display term and frequency
            List <string> lastOf = new List <string>();

            foreach (var suggestion in suggestions)
            {
                var _ = suggestion.term;
                _.ToList();
                lastOf.Add(_);
            }
            if (verbosity != SymSpell.Verbosity.Top)
            {
                Console.WriteLine(suggestions.Count.ToString() + " suggestions");
            }
            return(lastOf);
        }
Ejemplo n.º 7
0
        public SymSpell CreateDictionary(out string ErrorMsg)
        {
            ErrorMsg = string.Empty;
            try
            {
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();
                stopWatch.Start();
                const int initialCapacity = 82765;
                const int maxEditDistance = 2;
                const int prefixLength    = 7;
                var       symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);
                string    path            = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt";
                if (!symSpell.LoadDictionary(path, 0, 1))
                {
                    return(null);
                }

                stopWatch.Stop();
                long memDelta = GC.GetTotalMemory(true) - memSize;
                var  result   = symSpell.Lookup("warmup", SymSpell.Verbosity.All);
                return(symSpell);
            }
            catch (Exception ex)
            {
                ErrorMsg = ex.ToString();
                return(null);
            }
        }
Ejemplo n.º 8
0
        public int parseFile(ref SymSpell corrector, string filename)
        {
            //fstream in(filename);
            StreamReader in_ = new StreamReader(filename);
            string       line;
            int          ret = 0;

            while ((line = in_.ReadLine()) != null)
            {
                string[] tokens = line.Split(' ');
                tokencounter += tokens.Length;
                for (int ind = 0; ind != tokens.LongLength; ind++)
                {
                    //corrector->CreateDictionaryEntry(tokens[ind]);
                    if (!pToken.ContainsKey(tokens[ind]))
                    {
                        pToken[tokens[ind]] = 1;
                    }
                    else
                    {
                        pToken[tokens[ind]]++;
                    }
                }
                ret++;
            }
            probability += ret;
            return(ret);
        }
Ejemplo n.º 9
0
        public void WordsWithSharedPrefixShouldRetainCounts()
        {
            var symSpell = new SymSpell(16, 1, 3);

            symSpell.CreateDictionaryEntry("pipe", 5);
            symSpell.CreateDictionaryEntry("pips", 10);
            var result = symSpell.Lookup("pipe", SymSpell.Verbosity.All, 1);

            Assert.AreEqual(2, result.Count);
            Assert.AreEqual("pipe", result[0].term);
            Assert.AreEqual(5, result[0].count);
            Assert.AreEqual("pips", result[1].term);
            Assert.AreEqual(10, result[1].count);
            result = symSpell.Lookup("pips", SymSpell.Verbosity.All, 1);
            Assert.AreEqual(2, result.Count);
            Assert.AreEqual("pips", result[0].term);
            Assert.AreEqual(10, result[0].count);
            Assert.AreEqual("pipe", result[1].term);
            Assert.AreEqual(5, result[1].count);
            result = symSpell.Lookup("pip", SymSpell.Verbosity.All, 1);
            Assert.AreEqual(2, result.Count);
            Assert.AreEqual("pips", result[0].term);
            Assert.AreEqual(10, result[0].count);
            Assert.AreEqual("pipe", result[1].term);
            Assert.AreEqual(5, result[1].count);
        }
Ejemplo n.º 10
0
        private void button1_Click(object sender, EventArgs e)
        {
            //create object
            int initialCapacity           = 82765;
            int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
            var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary);


            //load dictionary
            string dictionaryPath = "../../frequency_dictionary_en_82_765.txt";
            int    termIndex      = 0; //column of the term in the dictionary text file
            int    countIndex     = 1; //column of the term frequency in the dictionary text file

            if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
            {
                richTextBox1.Text = "File not found!";
            }


            var    suggList       = new List <string>();
            string wrongWord      = richTextBox1.Text.ToString();
            string lowerWrongWord = wrongWord.ToLower();


            int maxEditDistanceLookup = 1;                          //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary)
            var suggestionVerbosity   = SymSpell.Verbosity.Closest; //Top, Closest, All
            var suggestions           = symSpell.Lookup(lowerWrongWord, suggestionVerbosity, maxEditDistanceLookup);


            foreach (var suggestion in suggestions)
            {
                listBox1.Items.Add(suggestion.term.ToString());
            }
        }
Ejemplo n.º 11
0
        private static void Correct(string input, SymSpell symSpell)
        {
            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            var suggestion = symSpell.WordSegmentation(input);

            //display term and frequency
            Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString());
        }
Ejemplo n.º 12
0
        public void LookupShouldNotReturnLowCountWord()
        {
            var symSpell = new SymSpell(16, 2, 7, 10);

            symSpell.CreateDictionaryEntry("pawn", 1);
            var result = symSpell.Lookup("pawn", SymSpell.Verbosity.Top, 0);

            Assert.AreEqual(0, result.Count);
        }
Ejemplo n.º 13
0
        public void AddAdditionalCountsShouldNotAddWordAgain()
        {
            var symSpell = new SymSpell();
            var word     = "hello";

            symSpell.CreateDictionaryEntry(word, 11);
            Assert.AreEqual(1, symSpell.WordCount);
            symSpell.CreateDictionaryEntry(word, 3);
            Assert.AreEqual(1, symSpell.WordCount);
        }
        static void Main(string[] args)
        {
            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 0;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project

            //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            if (!symSpell.LoadDictionary(path, 0, 1))
            {
                Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return;
            }

            //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            //The dictionary may contain vocabulary from different languages.
            //If you use mixed vocabulary use the language parameter in Correct() and CreateDictionary() accordingly.
            //You may use SymSpellCompound.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry().
            //string path = "big.txt"
            //if (!SymSpellCompound.CreateDictionary(path,"")) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                              + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                              + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                              + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            //warm up
            var result = symSpell.WordSegmentation("isit");

            string input;

            Console.WriteLine("Type in a text and hit enter to get word segmentation and correction:");
            while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            {
                Correct(input, symSpell);
            }
        }
Ejemplo n.º 15
0
        // pre-run to ensure code has executed once before timing benchmarks
        static void WarmUp()
        {
            SymSpell dict = new SymSpell(16, 2, 7);

            dict.LoadDictionary(DictionaryPath[0], 0, 1);
            var result = dict.Lookup("hockie", SymSpell.Verbosity.All, 1);

            Original.SymSpell dictOrig = new Original.SymSpell(2, 7);
            dictOrig.LoadDictionary(DictionaryPath[0], "", 0, 1);
            var resultOrig = dictOrig.Lookup("hockie", "", 1, 2);
        }
Ejemplo n.º 16
0
        public void LookupShouldFindExactMatch()
        {
            var symSpell = new SymSpell();

            symSpell.CreateDictionaryEntry("steama", 4);
            symSpell.CreateDictionaryEntry("steamb", 6);
            symSpell.CreateDictionaryEntry("steamc", 2);
            var result = symSpell.Lookup("steama", SymSpell.Verbosity.Top, 2);

            Assert.AreEqual(1, result.Count);
            Assert.AreEqual("steama", result[0].term);
        }
Ejemplo n.º 17
0
        private static void Experimento1()
        {
            Stopwatch stopWatch = new Stopwatch();
            string    strPath   = @"D:\json\";

            string[]      fileEntries = Directory.GetFiles(strPath);
            StringBuilder OCROriginal = new StringBuilder();

            EditDistanceLength editDistance = new EditDistanceLength();
            //Symspell parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 5;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);
            Dictionary <int, ExperimentSpell> excelMatrix = new Dictionary <int, ExperimentSpell>();

            foreach (string path in fileEntries)
            {
                string jsonText = File.ReadAllText(path, Encoding.Default);
                var    response = Google.Protobuf.JsonParser.Default.Parse <Google.Cloud.Vision.V1.AnnotateFileResponse>(jsonText);
                foreach (var respuestas in response.Responses)
                {
                    var annotation = respuestas.FullTextAnnotation;
                    if (annotation != null)
                    {
                        OCROriginal.Append(annotation.Text);
                    }
                }
            }

            symSpell.LoadDictionary(@"D:\DictionaryFiles\default.txt", 0, 1);
            var arrayOCROriginal = OCROriginal.ToString().Replace("\n", " ").Replace("{", "").Replace("}", "").Replace(": ", "***").Replace(" : ", " ").Replace(":", " ").Replace("***", ": ").Replace(". ", " ").Replace(", ", " ").Replace("-", " ").Split(' ');

            int j = 0, k = 0;

            foreach (string item in arrayOCROriginal)
            {
                ExperimentSpell exp1 = new ExperimentSpell();
                exp1.correction = "igual";
                exp1.original   = item;
                exp1.correctionLookupCompound = item;

                List <SymSpell.SuggestItem> suggestions = symSpell.Lookup(item, SymSpell.Verbosity.Top);
                if (suggestions.Count > 0)
                {
                    exp1.correction = "modificada";
                    exp1.correctionLookupCompound = suggestions[0].term;
                }
                excelMatrix.Add(k++, exp1);
            }
            CreateExcelFileExperimento(excelMatrix, "1");
        }
Ejemplo n.º 18
0
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 2;
            const int prefixLength    = 7;
            var       symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            //Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            //string path = "../../../SymSpell.Demo/test_data/frequency_dictionary_en_30_000.txt"; //for benchmark only (contains also non-genuine English words)
            //string path = "../../../SymSpell.Demo/test_data/frequency_dictionary_en_500_000.txt"; //for benchmark only (contains also non-genuine English words)
            string path = "../../../SymSpell/frequency_dictionary_en_82_765.txt";    //for spelling correction (genuine English words)

            //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            if (!symSpell.LoadDictionary(path, 0, 1))
            {
                Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));                                       //path when using symspell.cs
            }
            //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use  https://github.com/wolfgarbe/SymSpellCompound
            //string path = "big.txt";
            //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                              + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                              + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                              + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            //warm up
            var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All, 1);

            string input;

            while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            {
                Correct(input, symSpell);
            }
        }
Ejemplo n.º 19
0
        public void LookupShouldReturnMostFrequent()
        {
            var symSpell = new SymSpell();

            symSpell.CreateDictionaryEntry("steama", 4);
            symSpell.CreateDictionaryEntry("steamb", 6);
            symSpell.CreateDictionaryEntry("steamc", 2);
            var result = symSpell.Lookup("steam", SymSpell.Verbosity.Top, 2);

            Assert.AreEqual(1, result.Count);
            Assert.AreEqual("steamb", result[0].term);
            Assert.AreEqual(6, result[0].count);
        }
Ejemplo n.º 20
0
        private static void Correct(string input, SymSpell symSpell)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            suggestions = symSpell.LookupCompound(input, symSpell.MaxDictionaryEditDistance);

            //display term and frequency
            foreach (var suggestion in suggestions)
            {
                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
            }
        }
Ejemplo n.º 21
0
        public static void Test()
        {
            SymSpell corrector = new SymSpell();
            Bayes    cl        = new Bayes(ref corrector);

            cl.addClass("./GENERATIVE/BAYES/grasa", "grasa");
            cl.addClass("./GENERATIVE/BAYES/good", "good");
            while (true)
            {
                Console.Write("Input=");
                string line = Console.ReadLine();
                Console.WriteLine(cl.classify(line));
            }
        }
Ejemplo n.º 22
0
        public void initDict()
        {
            int initialCapacity           = 20000;
            int maxEditDistanceDictionary = 3;             //maximum edit distance per dictionary precalculation

            symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary);
            TextAsset dictionaryPath = Resources.Load <TextAsset>("dataset");
            int       termIndex      = 0;  //column of the term in the dictionary text file
            int       countIndex     = 1;  //column of the term frequency in the dictionary text file

            if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
            {
                Debug.Log("Unable to load dictionary");
            }
        }
Ejemplo n.º 23
0
        public void VerbosityShouldControlLookupResults()
        {
            var symSpell = new SymSpell();

            symSpell.CreateDictionaryEntry("steam", 1);
            symSpell.CreateDictionaryEntry("steams", 2);
            symSpell.CreateDictionaryEntry("steem", 3);
            var result = symSpell.Lookup("steems", SymSpell.Verbosity.Top, 2);

            Assert.AreEqual(1, result.Count);
            result = symSpell.Lookup("steems", SymSpell.Verbosity.Closest, 2);
            Assert.AreEqual(2, result.Count);
            result = symSpell.Lookup("steems", SymSpell.Verbosity.All, 2);
            Assert.AreEqual(3, result.Count);
        }
Ejemplo n.º 24
0
        public static void Benchmark(string path, int testNumber)
        {
            int resultSum = 0;

            string[] testList = new string[testNumber];
            List <SymSpell.SuggestItem> suggestions = null;

            //load 1000 terms with random spelling errors
            int i = 0;

            using (StreamReader sr = new StreamReader(File.OpenRead(path)))
            {
                String line;

                //process a single line at a time only for memory efficiency
                while ((line = sr.ReadLine()) != null)
                {
                    string[] lineParts = line.Split(null);
                    if (lineParts.Length >= 2)
                    {
                        string key = lineParts[0];
                        testList[i++] = key;
                    }
                }
            }

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //perform n rounds of Lookup of 1000 terms with random spelling errors
            int rounds = 10;

            for (int j = 0; j < rounds; j++)
            {
                resultSum = 0;
                //spellcheck strings
                for (i = 0; i < testNumber; i++)
                {
                    suggestions = SymSpell.Lookup(testList[i], "", SymSpell.editDistanceMax);
                    resultSum  += suggestions.Count;
                }
            }
            stopWatch.Stop();
            Console.WriteLine(resultSum.ToString("N0") + " results in " + (stopWatch.ElapsedMilliseconds / rounds).ToString() + " ms");
        }
Ejemplo n.º 25
0
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            //set global parameters
            SymSpell.verbose         = 0;
            SymSpell.editDistanceMax = 2;
            SymSpell.lp = 7;

            Console.Write("Creating dictionary ...");
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            //string path = "../../../symspelldemo/test_data/frequency_dictionary_en_30_000.txt"; //for benchmark only (contains also non-genuine English words)
            //string path = "../../../symspelldemo/test_data/frequency_dictionary_en_500_000.txt"; //for benchmark only (contains also non-genuine English words)
            string path = "../../../symspell/frequency_dictionary_en_82_765.txt";    //for spelling correction (genuine English words)

            //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            if (!SymSpell.LoadDictionary(path, "", 0, 1))
            {
                Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));                                           //path when using symspell.cs
            }
            //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            //The dictionary may contain vocabulary from different languages.
            //If you use mixed vocabulary use the language parameter in Correct() and CreateDictionary() accordingly.
            //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use  https://github.com/wolfgarbe/SymSpellCompound
            //string path = "big.txt";
            //if (!SymSpell.CreateDictionary(path,"")) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            Console.WriteLine("\rDictionary: " + SymSpell.wordlist.Count.ToString("N0") + " words, " + SymSpell.dictionary.Count.ToString("N0") + " entries, edit distance=" + SymSpell.editDistanceMax.ToString() + " in " + stopWatch.ElapsedMilliseconds.ToString() + "ms " + (Process.GetCurrentProcess().PrivateMemorySize64 / 1000000).ToString("N0") + " MB");

            //Benchmark("../../../symspelldemo/test_data/noisy_query_en_1000.txt",1000);

            string input;

            while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            {
                Correct(input, "");
            }
        }
Ejemplo n.º 26
0
        private void InitSym()
        {
            //create object
            int initialCapacity           = 82765;
            int maxEditDistanceDictionary = 2;             //maximum edit distance per dictionary precalculation

            sym = new SymSpell(initialCapacity, maxEditDistanceDictionary);

            //load dictionary
            string dictionaryPath = Path.Combine(Application.streamingAssetsPath, "SymSpell", "frequency_dictionary_en_82_765.txt");
            int    termIndex      = 0;     //column of the term in the dictionary text file
            int    countIndex     = 1;     //column of the term frequency in the dictionary text file

            if (!sym.LoadDictionary(dictionaryPath, termIndex, countIndex))
            {
                Debug.LogError("Dictionary file not found! Aborting...");
                return;
            }
        }
Ejemplo n.º 27
0
    public void LoadDictionary()
    {
        //create object
        int initialCapacity           = 82765;
        int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation

        symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary);

        //load dictionary
        string dictionaryPath = Application.dataPath + @"\SymSpell\frequency_dictionary_en_82_765.txt";

        int termIndex  = 0; //column of the term in the dictionary text file
        int countIndex = 1; //column of the term frequency in the dictionary text file

        if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
        {
            Debug.Log("File not found!");
            return;
        }
    }
Ejemplo n.º 28
0
        public Parlogike()
        {
            corrector           = new SymSpell();
            BayesClassifiers    = new Bayes(ref corrector);
            externFunctors      = new Dictionary <string, Func <Parlogike, string, List <Variable>, char, bool, Pattern, string, string, Result> >();
            internFunctors      = new Dictionary <string, Func <Parlogike, List <Variable>, int, bool> >();
            MarkovGenerators    = new Dictionary <string, Markov>();
            GlobalVariables     = new Dictionary <string, Dictionary <string, Variable> >();
            LocalStack          = new List <string>();
            Residues            = new List <string>();
            Groups              = new Dictionary <string, List <string> >();
            knowledge           = new List <Pattern>();
            GlobalVariables[""] = new Dictionary <string, Variable>();

            if (!init)
            {
                Functors.populate();
                init = true;
            }
        }
Ejemplo n.º 29
0
        private string FixBadSpelling(string tempHtmlLines, SymSpell spellingEngine)
        {
            SpellingCorrection dialog = new SpellingCorrection();

            dialog.OriginalChatText.Text = tempHtmlLines.ToString();

            //Use SymSpell to fix horrible spelling

            //Space out tags
            tempHtmlLines = tempHtmlLines.Replace("*", " * ");
            tempHtmlLines = tempHtmlLines.Replace(". . . . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". . . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". .", "... ");
            int postStartIndex = tempHtmlLines.IndexOf(':') + 1;
            //int endTagIndex = tempHtmlLines.Length - 4;
            string postSubString = tempHtmlLines.Substring(postStartIndex, (tempHtmlLines.Length - postStartIndex - 5));

            int maxEditDistanceLookup = 1;                      //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary)
            var suggestionVerbosity   = SymSpell.Verbosity.Top; //Top, Closest, All

            maxEditDistanceLookup = 2;                          //max edit distance per lookup (per single word, not per whole input string)
            var suggestions = spellingEngine.LookupCompound(tempHtmlLines, maxEditDistanceLookup);

            var    axy        = suggestions[0];
            var    dog        = spellingEngine.WordSegmentation(postSubString);
            string fixedLine  = dog.correctedString;
            string fixedStuff = (tempHtmlLines.Substring(0, (postStartIndex)) + " " + fixedLine).Replace(" * ", "*");

            dialog.SuggestedChatTextTextBox.Text = fixedStuff;

            dialog.ShowDialog();
            if (dialog.DialogResult.HasValue && dialog.DialogResult.Value)
            {
                return(dialog.SuggestedChatTextTextBox.Text + "\r\n");
            }
            else
            {
                return(fixedStuff + "\r\n");
            }
        }
Ejemplo n.º 30
0
    public static List <SymSpell.SuggestItem> Correct(string input, SymSpell symSpell)
    {
        List <SymSpell.SuggestItem> suggestions = null;

        //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
        const SymSpell.Verbosity verbosity = SymSpell.Verbosity.All;

        suggestions = symSpell.Lookup(input, verbosity);

        //return suggestions;
        //display term and frequency
        foreach (var suggestion in suggestions)
        {
            //Debug.Log(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
        }
        if (verbosity != SymSpell.Verbosity.Top)
        {
            Debug.Log(suggestions.Count.ToString() + " suggestions");
        }
        return(suggestions);
    }