private static void Correct(string input, SymSpell symSpell)
            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            var suggestion = symSpell.WordSegmentation(input);

            //display term and frequency
            Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString());
        static void Main(string[] args)
            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 0;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();


            //Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project

            //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            if (!symSpell.LoadDictionary(path, 0, 1))
                Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return;

            //Alternatively Create the dictionary from a text corpus (e.g. )
            //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            //The dictionary may contain vocabulary from different languages.
            //If you use mixed vocabulary use the language parameter in Correct() and CreateDictionary() accordingly.
            //You may use SymSpellCompound.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry().
            //string path = "big.txt"
            //if (!SymSpellCompound.CreateDictionary(path,"")) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                              + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                              + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                              + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            //warm up
            var result = symSpell.WordSegmentation("isit");

            string input;

            Console.WriteLine("Type in a text and hit enter to get word segmentation and correction:");
            while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
                Correct(input, symSpell);
Пример #3
        private string FixBadSpelling(string tempHtmlLines, SymSpell spellingEngine)
            SpellingCorrection dialog = new SpellingCorrection();

            dialog.OriginalChatText.Text = tempHtmlLines.ToString();

            //Use SymSpell to fix horrible spelling

            //Space out tags
            tempHtmlLines = tempHtmlLines.Replace("*", " * ");
            tempHtmlLines = tempHtmlLines.Replace(". . . . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". . . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". .", "... ");
            int postStartIndex = tempHtmlLines.IndexOf(':') + 1;
            //int endTagIndex = tempHtmlLines.Length - 4;
            string postSubString = tempHtmlLines.Substring(postStartIndex, (tempHtmlLines.Length - postStartIndex - 5));

            int maxEditDistanceLookup = 1;                      //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary)
            var suggestionVerbosity   = SymSpell.Verbosity.Top; //Top, Closest, All

            maxEditDistanceLookup = 2;                          //max edit distance per lookup (per single word, not per whole input string)
            var suggestions = spellingEngine.LookupCompound(tempHtmlLines, maxEditDistanceLookup);

            var    axy        = suggestions[0];
            var    dog        = spellingEngine.WordSegmentation(postSubString);
            string fixedLine  = dog.correctedString;
            string fixedStuff = (tempHtmlLines.Substring(0, (postStartIndex)) + " " + fixedLine).Replace(" * ", "*");

            dialog.SuggestedChatTextTextBox.Text = fixedStuff;

            if (dialog.DialogResult.HasValue && dialog.DialogResult.Value)
                return(dialog.SuggestedChatTextTextBox.Text + "\r\n");
                return(fixedStuff + "\r\n");
        static void Main(string[] args)
            if (args.Length > 2)
                Console.Error.Write("Creating dictionary ...");
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();

                int initialCapacity = 82765;
                int termIndex       = 0; //column of the term in the dictionary text file
                int countIndex      = 1; //column of the term frequency in the dictionary text file

                string dictionaryType = args[0].ToLower();
                if ("load.create".IndexOf(dictionaryType) == -1)
                    Console.Error.WriteLine("Error in parameter 1"); return;

                string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1];

                //prefix length (optional parameter)
                int    offset       = 0;
                string lookupType   = "";
                int    prefixLength = 7;
                if (!int.TryParse(args[2], out prefixLength))
                    prefixLength = 7;
                    offset = 1;

                if (args.Length > 2 + offset)
                    lookupType = args[2 + offset].ToLower();
                    if ("lookup.lookupcompound.wordsegment".IndexOf(lookupType) == -1)
                        Console.Error.WriteLine("Error in parameter " + (3 + offset).ToString()); return;

                int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
                if (args.Length > 3 + offset)
                    if (!int.TryParse(args[3 + offset], out maxEditDistanceDictionary))
                        Console.Error.WriteLine("Error in parameter " + (4 + offset).ToString()); return;

                //output stats
                bool outputStats = false;//false, true
                if (args.Length > 4 + offset)
                    if (!bool.TryParse(args[4 + offset], out outputStats))
                        Console.Error.WriteLine("Error in parameter " + (5 + offset).ToString()); return;

                var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All
                if (args.Length > 5 + offset)
                    if (!Enum.TryParse(args[5 + offset], true, out suggestionVerbosity))
                        Console.Error.WriteLine("Error in parameter " + (6 + offset).ToString()); return;

                //create object
                var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

                //load dictionary
                switch (dictionaryType)
                case "load":
                    if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                        Console.Error.WriteLine("File not found!");

                case "create":
                    if (!symSpell.CreateDictionary(dictionaryPath))
                        Console.Error.WriteLine("File not found!");


                long memDelta = GC.GetTotalMemory(true) - memSize;

                //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped
                Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                                        + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                                        + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                                        + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

                //warm up
                var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);

                //lookup suggestions for single-word input strings
                string inputTerm;
                while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim()))
                    switch (lookupType)
                    case "lookup":
                        var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceDictionary, true);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions)
                            if (outputStats)
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));

                    case "lookupcompound":
                        var suggestions2 = symSpell.LookupCompound(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions2)
                            if (outputStats)
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));

                    case "wordsegment":
                        var suggestions3 = symSpell.WordSegmentation(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions3)
                            if (outputStats)
                                Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString());

                //PrefixLength is number

                Console.WriteLine("SymSpell.CommandLine DictionaryType DictionaryPath [PrefixLength] LookupType [MaxEditDistance] [OutputStats] [Verbosity]");
                Console.WriteLine("   load: load dictionary from dictionary file");
                Console.WriteLine("   create: create dictionary from text corpus");
                Console.WriteLine("DictionaryPath: path to dictionary/corpus file");
                Console.WriteLine("PrefixLength: default=7 (speed/memory consumption trade-off)");  //dictionary param
                Console.WriteLine("   5: low memory, slow lookup");
                Console.WriteLine("   6: medium memory, medium lookup");
                Console.WriteLine("   7: high memory, fast lookup");
                //lookup intended for correction of single word
                //lookupcompound intended for correction of multiple words, it can insert only a single space per token, faster than wordsegmentation
                //wordsegmentation intended for segmentation and correction of multiple words, it can insert multiple spaces per token, slower than lookupcompound
                Console.WriteLine("   lookup: correct single word");
                Console.WriteLine("   lookupcompound: correct multiple-word string (supports splitting/merging)");
                Console.WriteLine("   wordsegment: word segment and correct input string");
                Console.WriteLine("MaxEditDistance: default=2 (0: no correction, word segmentation only)");
                Console.WriteLine("   false: only corrected string");
                Console.WriteLine("   true: corrected string, edit distance, word frequency/probability");
                Console.WriteLine("Verbosity=top|closest|all"); //no effect for lookupcompound and wordsegment
                Console.WriteLine("   top: Top suggestion");
                Console.WriteLine("   closest: All suggestions of smallest edit distance found");
                Console.WriteLine("   all: All suggestions within maxEditDistance");
Пример #5
        static void Main(string[] args)
            //Console.WriteLine("Hello World!");
            //create object
            int initialCapacity           = 82765;
            int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
            var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary);

            //load dictionary
            string baseDirectory = AppDomain.CurrentDomain.BaseDirectory;

            //string dictionaryPath= baseDirectory + "frequency_dictionary_en_82_765.txt";

            string dictionaryPath = baseDirectory + "../../../frequency_dictionary_en_82_765.txt";

            int termIndex  = 0; //column of the term in the dictionary text file
            int countIndex = 1; //column of the term frequency in the dictionary text file

            if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                Console.WriteLine("File not found!");
                //press any key to exit program

            int    i = 0;
            string contentUndetermine   = "";
            string contentDate          = "";
            string contentScore         = "";
            string contentLineOfReviews = "";

             *  here is a method provided by which allow user to read the whole document at once
             *  and pass all the content as a single string
             *  decide not to read the whole document as pass all the content as a string since the string may be so huge

            // string path = "C:/Users/kongwh/Desktop/test2/t1.txt";

            // StreamReader textFile = new StreamReader(path);

            // string input = textFile.ReadToEnd();

            // textFile.Close();

             *  here is a method provided by tutorial point which read a document line by line
             *  and pass each line as a string
             *  and write string to a document

            string line = "";
            string nameOfadjustedDocument = args[0];

            nameOfadjustedDocument = nameOfadjustedDocument.Insert(nameOfadjustedDocument.Length - 4, "_adj");

            using (StreamWriter sw = new StreamWriter(nameOfadjustedDocument)){
                //using(StreamWriter sw = new StreamWriter(adj.txt)){
                using (StreamReader sr = new StreamReader(args[0])) {
                    //using (StreamReader sr = new StreamReader("t1.txt")) {
                    while ((line = sr.ReadLine()) != null)
                        //Console.WriteLine("line: " + line);
                        //Console.WriteLine("line length: " + line.Length);

                        contentUndetermine = line;

                        if (isFormatOfDate(contentUndetermine))
                            //contentUndetermine is a date
                            contentDate = contentUndetermine;
                            i           = 1;
                        else if (i == 1)
                            //contentUndetermine is a score
                            contentScore = contentUndetermine;
                            i = 2;
                            string onlyEnglishAndSpace = "";
                            string notEnglishAndSpace  = "";
                            string adjustedReviewLine  = "";

                            contentLineOfReviews = contentUndetermine;

                            foreach (char charInLine in contentLineOfReviews)
                                if (charInLine.ToString().Contains(" ") || isEnglishLetter(charInLine))
                                    onlyEnglishAndSpace = onlyEnglishAndSpace + charInLine.ToString();
                                    notEnglishAndSpace = charInLine.ToString();

                                    if (onlyEnglishAndSpace.Equals(""))
                                        adjustedReviewLine = adjustedReviewLine + notEnglishAndSpace;
                                        //word segmentation and correction for multi-word input strings with/without spaces
                                        var suggestion = symSpell.WordSegmentation(onlyEnglishAndSpace);
                                        adjustedReviewLine = adjustedReviewLine + suggestion.correctedString + notEnglishAndSpace;

                                    onlyEnglishAndSpace = "";
                                    notEnglishAndSpace  = "";

                            if (!onlyEnglishAndSpace.Equals(""))
                                //word segmentation and correction for multi-word input strings with/without spaces
                                var suggestion = symSpell.WordSegmentation(onlyEnglishAndSpace);
                                adjustedReviewLine = adjustedReviewLine + suggestion.correctedString;


            //----------------functions used for spell check provided by symSpell------------------

            // //word segmentation and correction for multi-word input strings with/without spaces
            // var suggestion1 = symSpell.WordSegmentation(input);

            // //display term and edit distance
            // Console.WriteLine(suggestion1.correctedString);

            // //lookup suggestions for single-word input strings
            // string inputTerm="goodandnicedesign";
            // //string inputTerm=suggestion1.correctedString;
            // int maxEditDistanceLookup = 1; //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary)
            // var suggestionVerbosity = SymSpell.Verbosity.Closest; //Top, Closest, All
            // var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceLookup);
            // //lookup suggestions for multi-word input strings (supports compound splitting & merging)
            // //inputTerm="whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixtgrade and ins pired him";
            // maxEditDistanceLookup = 2; //max edit distance per lookup (per single word, not per whole input string)
            // suggestions = symSpell.LookupCompound(inputTerm, maxEditDistanceLookup);

            // //display suggestions, edit distance and term frequency
            // foreach (var suggestion in suggestions)
            // {
            // Console.WriteLine(suggestion.term);
            // }

            //press any key to exit program
        public (string segmentedString, string correctedString, int distanceSum, decimal probabilityLogSum) segmentText(string text, int distance)
            var suggestion = symSpell.WordSegmentation(text, distance);
