Exemplo n.º 1
0
        public string SpellCheck(string input)
        {
            // empty string
            if (input.Equals(""))
            {
                return(input);
            }
            List <SymSpell.SuggestItem> suggestions;

            // check for mulit word spell check
            if (input.Split(" ").Length == 1)
            {
                // single word spell check
                //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary)
                int maxEditDistanceLookup = 2;
                var suggestionVerbosity   = SymSpell.Verbosity.Closest;
                suggestions = spellChecker.Lookup(input, suggestionVerbosity, maxEditDistanceLookup);
            }
            else
            {
                // multi word spell check
                //max edit distance per lookup (per single word, not per whole input string)
                int maxEditDistanceLookup = 2;
                suggestions = spellChecker.LookupCompound(input, maxEditDistanceLookup);
            }

            // return first suggestion if exists, else return back input
            return(suggestions.Count != 0 ? suggestions[0].term : input);
        }
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 2;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            string path = AppDomain.CurrentDomain.BaseDirectory + "frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project

            //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            if (!symSpell.LoadDictionary(path, 0, 1))
            {
                Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return;
            }

            //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            //The dictionary may contain vocabulary from different languages.
            //If you use mixed vocabulary use the language parameter in Correct() and CreateDictionary() accordingly.
            //You may use SymSpellCompound.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry().
            //string path = "big.txt"
            //if (!SymSpellCompound.CreateDictionary(path,"")) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                              + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                              + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                              + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            //warm up
            var result = symSpell.LookupCompound("isit");

            string input;

            Console.WriteLine("Type in a word or phrase and hit enter to get suggestions:");
            while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            {
                Correct(input, symSpell);
            }
        }
Exemplo n.º 3
0
        private static void Correct(string input, SymSpell symSpell)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            suggestions = symSpell.LookupCompound(input, symSpell.MaxDictionaryEditDistance);

            //display term and frequency
            foreach (var suggestion in suggestions)
            {
                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
            }
        }
Exemplo n.º 4
0
        private string FixBadSpelling(string tempHtmlLines, SymSpell spellingEngine)
        {
            SpellingCorrection dialog = new SpellingCorrection();

            dialog.OriginalChatText.Text = tempHtmlLines.ToString();

            //Use SymSpell to fix horrible spelling

            //Space out tags
            tempHtmlLines = tempHtmlLines.Replace("*", " * ");
            tempHtmlLines = tempHtmlLines.Replace(". . . . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". . . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". . .", "... ");
            tempHtmlLines = tempHtmlLines.Replace(". .", "... ");
            int postStartIndex = tempHtmlLines.IndexOf(':') + 1;
            //int endTagIndex = tempHtmlLines.Length - 4;
            string postSubString = tempHtmlLines.Substring(postStartIndex, (tempHtmlLines.Length - postStartIndex - 5));

            int maxEditDistanceLookup = 1;                      //max edit distance per lookup (maxEditDistanceLookup<=maxEditDistanceDictionary)
            var suggestionVerbosity   = SymSpell.Verbosity.Top; //Top, Closest, All

            maxEditDistanceLookup = 2;                          //max edit distance per lookup (per single word, not per whole input string)
            var suggestions = spellingEngine.LookupCompound(tempHtmlLines, maxEditDistanceLookup);

            var    axy        = suggestions[0];
            var    dog        = spellingEngine.WordSegmentation(postSubString);
            string fixedLine  = dog.correctedString;
            string fixedStuff = (tempHtmlLines.Substring(0, (postStartIndex)) + " " + fixedLine).Replace(" * ", "*");

            dialog.SuggestedChatTextTextBox.Text = fixedStuff;

            dialog.ShowDialog();
            if (dialog.DialogResult.HasValue && dialog.DialogResult.Value)
            {
                return(dialog.SuggestedChatTextTextBox.Text + "\r\n");
            }
            else
            {
                return(fixedStuff + "\r\n");
            }
        }
        static void Main(string[] args)
        {
            if (args.Length > 2)
            {
                Console.Error.Write("Creating dictionary ...");
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();
                stopWatch.Start();

                //parameters
                int initialCapacity = 82765;
                int termIndex       = 0; //column of the term in the dictionary text file
                int countIndex      = 1; //column of the term frequency in the dictionary text file

                //dictionaryType
                string dictionaryType = args[0].ToLower();
                if ("load.create".IndexOf(dictionaryType) == -1)
                {
                    Console.Error.WriteLine("Error in parameter 1"); return;
                }

                //dictionaryPath
                string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1];

                //prefix length (optional parameter)
                int    offset       = 0;
                string lookupType   = "";
                int    prefixLength = 7;
                if (!int.TryParse(args[2], out prefixLength))
                {
                    prefixLength = 7;
                }
                else
                {
                    offset = 1;
                }

                //lookupType
                if (args.Length > 2 + offset)
                {
                    lookupType = args[2 + offset].ToLower();
                    if ("lookup.lookupcompound.wordsegment".IndexOf(lookupType) == -1)
                    {
                        Console.Error.WriteLine("Error in parameter " + (3 + offset).ToString()); return;
                    }
                }

                //maxEditDistance
                int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
                if (args.Length > 3 + offset)
                {
                    if (!int.TryParse(args[3 + offset], out maxEditDistanceDictionary))
                    {
                        Console.Error.WriteLine("Error in parameter " + (4 + offset).ToString()); return;
                    }
                }

                //output stats
                bool outputStats = false;//false, true
                if (args.Length > 4 + offset)
                {
                    if (!bool.TryParse(args[4 + offset], out outputStats))
                    {
                        Console.Error.WriteLine("Error in parameter " + (5 + offset).ToString()); return;
                    }
                }

                //verbosity
                var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All
                if (args.Length > 5 + offset)
                {
                    if (!Enum.TryParse(args[5 + offset], true, out suggestionVerbosity))
                    {
                        Console.Error.WriteLine("Error in parameter " + (6 + offset).ToString()); return;
                    }
                }

                //create object
                var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

                //load dictionary
                switch (dictionaryType)
                {
                case "load":
                    if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                case "create":
                    if (!symSpell.CreateDictionary(dictionaryPath))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                default:
                    break;
                }

                stopWatch.Stop();
                long memDelta = GC.GetTotalMemory(true) - memSize;

                //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped
                Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                                        + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                                        + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                                        + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

                //warm up
                var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);

                //lookup suggestions for single-word input strings
                string inputTerm;
                while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim()))
                {
                    switch (lookupType)
                    {
                    case "lookup":
                        var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceDictionary, true);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                            }
                            else
                            {
                                Console.WriteLine(suggestion.term);
                            }
                        }
                        break;

                    case "lookupcompound":
                        var suggestions2 = symSpell.LookupCompound(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions2)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                            }
                            else
                            {
                                Console.WriteLine(suggestion.term);
                            }
                        }
                        break;

                    case "wordsegment":
                        var suggestions3 = symSpell.WordSegmentation(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions3)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString());
                            }
                            else
                            {
                                Console.WriteLine(suggestion.correctedString);
                            }
                        }
                        break;

                    default:
                        break;
                    }
                }
            }
            else
            {
                //PrefixLength is number

                //help
                Console.WriteLine("SymSpell.CommandLine DictionaryType DictionaryPath [PrefixLength] LookupType [MaxEditDistance] [OutputStats] [Verbosity]");
                Console.WriteLine();
                Console.WriteLine("DictionaryType=load|create");
                Console.WriteLine("   load: load dictionary from dictionary file");
                Console.WriteLine("   create: create dictionary from text corpus");
                Console.WriteLine("DictionaryPath: path to dictionary/corpus file");
                Console.WriteLine("PrefixLength: default=7 (speed/memory consumption trade-off)");  //dictionary param
                Console.WriteLine("   5: low memory, slow lookup");
                Console.WriteLine("   6: medium memory, medium lookup");
                Console.WriteLine("   7: high memory, fast lookup");
                //lookup intended for correction of single word
                //lookupcompound intended for correction of multiple words, it can insert only a single space per token, faster than wordsegmentation
                //wordsegmentation intended for segmentation and correction of multiple words, it can insert multiple spaces per token, slower than lookupcompound
                Console.WriteLine("LookupType=lookup|lookupcompound|wordsegment");
                Console.WriteLine("   lookup: correct single word");
                Console.WriteLine("   lookupcompound: correct multiple-word string (supports splitting/merging)");
                Console.WriteLine("   wordsegment: word segment and correct input string");
                Console.WriteLine("MaxEditDistance: default=2 (0: no correction, word segmentation only)");
                Console.WriteLine("OutputStats=false|true");
                Console.WriteLine("   false: only corrected string");
                Console.WriteLine("   true: corrected string, edit distance, word frequency/probability");
                Console.WriteLine("Verbosity=top|closest|all"); //no effect for lookupcompound and wordsegment
                Console.WriteLine("   top: Top suggestion");
                Console.WriteLine("   closest: All suggestions of smallest edit distance found");
                Console.WriteLine("   all: All suggestions within maxEditDistance");
                Console.WriteLine();
            }
        }
Exemplo n.º 6
0
        static SearchResult[] Search(
            // input query
            string query,
            // trie for prefix/infix matching
            PatriciaSuffixTrie <string> trie,
            SymSpell symSpell,
            // inverted index
            Dictionary <int, HashSet <int> > inverter,
            // word -> its order
            Dictionary <string, int> dict,
            // collection of documents
            List <string> documents,
            // limit
            int limit
            )
        {
            var aggregated = new Dictionary <int, SearchResult>();
            var tokens     = new LinkedList <string>();

            foreach (var word in query.ToLower().Split(' '))
            {
                tokens.AddLast(word);
            }

            while (tokens.Count > 0)
            {
                // pop_front the queue
                var word = tokens.First.Value;
                tokens.RemoveFirst();
                // pipeline:
                // 1. find exact matches first
                int tmp;
                if (dict.TryGetValue(word, out tmp))
                {
                    var docs = inverter[tmp];
                    foreach (var doc in docs)
                    {
                        SearchResult tempSearchResult;
                        // add to aggregated result
                        if (!aggregated.TryGetValue(doc, out tempSearchResult))
                        {
                            aggregated[doc] = new SearchResult(documents[doc], 1);;
                        }
                        else
                        {
                            tempSearchResult.score++;
                        }
                    }
                    continue;
                }

                // if no exact match then search for prefix suggestions (for prefix <= 3)
                if (word.Length <= 3)   // find prefix matches
                {
                    string suggestion = null;
                    // take 1 suggestion first
                    foreach (var suggest in trie.Retrieve(word))
                    {
                        suggestion = suggest;
                        Console.WriteLine("Prefix matched: " + suggestion);
                        break;
                    }
                    if (suggestion != null)
                    {
                        // push_front
                        tokens.AddFirst(suggestion);
                        continue;
                    }
                }

                // if no prefix suggestion found then correct spelling
                var lookupResult = symSpell.LookupCompound(word)[0].term.Split(' ');
                for (int i = lookupResult.Length - 1; i >= 0; --i)
                {
                    tokens.AddFirst(lookupResult[i]);
                }
            }

            // then sort??
            return(aggregated.Values.ToArray());
        }
        public List <SymSpell.SuggestItem> correctText(string text, int distance)
        {
            var suggestions = symSpell.LookupCompound(text, distance);

            return(suggestions);
        }
Exemplo n.º 8
0
        private static void Experimento3()
        {
            string strPath = @"D:\json\";

            string[]      fileEntries      = Directory.GetFiles(strPath);
            StringBuilder OCROriginal      = new StringBuilder();
            string        fileName         = @"D:\cuantificacion\Experimentos\experimento3.xlsx";
            string        connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;" +
                                                           "Data Source={0};Extended Properties='Excel 12.0;HDR=YES;IMEX=0'", fileName);
            EditDistanceLength editDistance               = new EditDistanceLength();
            const int          initialCapacity            = 82765;
            const int          maxEditDistance            = 5;
            const int          prefixLength               = 7;
            SymSpell           symSpell                   = new SymSpell(initialCapacity, maxEditDistance, prefixLength);
            Dictionary <int, ExperimentSpell> excelMatrix = new Dictionary <int, ExperimentSpell>();

            foreach (string path in fileEntries)
            {
                string jsonText = File.ReadAllText(path, Encoding.Default);
                var    response = Google.Protobuf.JsonParser.Default.Parse <Google.Cloud.Vision.V1.AnnotateFileResponse>(jsonText);
                foreach (var respuestas in response.Responses)
                {
                    var annotation = respuestas.FullTextAnnotation;
                    if (annotation != null)
                    {
                        OCROriginal.Append(annotation.Text);
                    }
                }
            }
            symSpell.LoadDictionary(@"D:\load8.txt", 0, 1);
            List <SymSpell.SuggestItem> suggestions = symSpell.LookupCompound(OCROriginal.ToString(), 2);
            var    arraySymspell = suggestions[0].ToString().Replace("\n", " ").Replace("{", "").Replace("}", "").Split(' ');
            var    arrayOCROriginal = OCROriginal.ToString().Replace("\n", " ").Replace("{", "").Replace("}", "").Replace(": ", "***").Replace(" : ", " ").Replace(":", " ").Replace("***", ": ").Replace(". ", " ").Replace(", ", " ").Replace("-", " ").Split(' ');
            int    j = 0, k = 0;
            double similarity;

            for (int i = 0; i < arraySymspell.Length; i++)
            {
                if (j == arrayOCROriginal.Length)
                {
                    break;
                }
                similarity = editDistance.CalculateSimilarity(arraySymspell[i], arrayOCROriginal[j].ToLower());
                ExperimentSpell exp1 = new ExperimentSpell();

                if (similarity == 1)
                {
                    exp1.correction = "igual";
                    exp1.correctionLookupCompound = arraySymspell[i];
                    exp1.original = arrayOCROriginal[j];
                    j++;
                }
                else
                {
                    if (similarity >= .4)
                    {
                        exp1.correction = "Corregida";
                        exp1.correctionLookupCompound = arraySymspell[i];
                        exp1.original = arrayOCROriginal[j];
                        j++;
                    }
                    else
                    {
                        if (similarity > 0.06)
                        {
                            exp1.correction = "Espacios";
                            exp1.correctionLookupCompound = arraySymspell[i];
                            exp1.original = arrayOCROriginal[j];
                        }
                        else
                        {
                            if (j > 0)
                            {
                                similarity = editDistance.CalculateSimilarity(arraySymspell[i], arrayOCROriginal[j - 1].ToLower());
                            }
                            else
                            {
                                similarity = 0;
                            }
                            if (similarity == 1)
                            {
                                j--;
                                exp1.correction = "igual";
                                exp1.correctionLookupCompound = arraySymspell[i];
                                exp1.original = arrayOCROriginal[j];
                            }
                            else
                            {
                                if (similarity >= .4)
                                {
                                    j--;
                                    exp1.correction = "Corregida";
                                    exp1.correctionLookupCompound = arraySymspell[i];
                                    exp1.original = arrayOCROriginal[j];
                                }
                                else
                                {
                                    if (similarity > 0.06)
                                    {
                                        j--;
                                        exp1.correction = "Espacios";
                                        exp1.correctionLookupCompound = arraySymspell[i];
                                        exp1.original = arrayOCROriginal[j];
                                    }
                                    else
                                    {
                                        if (j + 1 < arrayOCROriginal.Length)
                                        {
                                            similarity = editDistance.CalculateSimilarity(arraySymspell[i], arrayOCROriginal[j + 1].ToLower());
                                        }
                                        else
                                        {
                                            similarity = 0;
                                        }

                                        if (similarity == 1)
                                        {
                                            j++;
                                            exp1.correction = "igual";
                                            exp1.correctionLookupCompound = arraySymspell[i];
                                            exp1.original = arrayOCROriginal[j];
                                        }
                                        else
                                        {
                                            if (similarity >= .4)
                                            {
                                                j++;
                                                exp1.correction = "Corregida";
                                                exp1.correctionLookupCompound = arraySymspell[i];
                                                exp1.original = arrayOCROriginal[j];
                                            }
                                            else
                                            {
                                                if (similarity > 0.06)
                                                {
                                                    j++;
                                                    exp1.correction = "Espacios";
                                                    exp1.correctionLookupCompound = arraySymspell[i];
                                                    exp1.original = arrayOCROriginal[j];
                                                }
                                                else
                                                {
                                                    exp1.correction = "Error";
                                                    exp1.correctionLookupCompound = arraySymspell[i];
                                                    exp1.original = arrayOCROriginal[j];
                                                    j++;
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                excelMatrix.Add(k++, exp1);
            }
            CreateExcelFileExperimento(excelMatrix, "3");
        }
Exemplo n.º 9
0
        private static void Experimento2_1()
        {
            Stopwatch stopWatch = new Stopwatch();
            string    strPath   = @"D:\json\";

            string[]      fileEntries      = Directory.GetFiles(strPath);
            StringBuilder OCROriginal      = new StringBuilder();
            string        fileName         = @"D:\cuantificacion\Experimentos\experimento2.xlsx";
            string        connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;" +
                                                           "Data Source={0};Extended Properties='Excel 12.0;HDR=YES;IMEX=0'", fileName);
            EditDistanceLength editDistance = new EditDistanceLength();
            //Symspell parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 5;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);
            Dictionary <int, ExperimentSpell> excelMatrix = new Dictionary <int, ExperimentSpell>();

            foreach (string path in fileEntries)
            {
                string jsonText = File.ReadAllText(path, Encoding.Default);
                var    response = Google.Protobuf.JsonParser.Default.Parse <Google.Cloud.Vision.V1.AnnotateFileResponse>(jsonText);
                foreach (var respuestas in response.Responses)
                {
                    var annotation = respuestas.FullTextAnnotation;
                    if (annotation != null)
                    {
                        OCROriginal.Append(annotation.Text);
                    }
                }
            }

            stopWatch.Start();
            //load symspell dictionary default
            symSpell.LoadDictionary(@"D:\load8.txt", 0, 1);
            //process symspell
            List <SymSpell.SuggestItem> suggestions = symSpell.LookupCompound(OCROriginal.ToString(), 2);

            stopWatch.Stop();

            var arraySymspell = suggestions[0].ToString().Replace("\n", " ").Replace("}", "").Split(' ');
            var arrayOCROriginal = OCROriginal.ToString().Replace("\n", " ").Replace("}", "").Replace(": ", "***").Replace(" : ", " ").Replace(":", " ").Replace("***", ": ").Replace(". ", " ").Replace(", ", " ").Replace("-", " ").Split(' ');
            int j = 0, k = 0;

            for (int i = 0; i < arraySymspell.Length; i++)
            {
                ExperimentSpell exp1 = new ExperimentSpell();
                exp1.correction = "igual";
                exp1.correctionLookupCompound = arraySymspell[i];
                if (j < arrayOCROriginal.Length)
                {
                    exp1.original = arrayOCROriginal[j];
                }
                else
                {
                    exp1.original = "";
                }
                j++;
                excelMatrix.Add(k++, exp1);
            }
            CreateExcelFileExperimento(excelMatrix, "2");
        }
Exemplo n.º 10
0
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            var path = AppDomain.CurrentDomain.BaseDirectory + @"all-suggests-cleaned.txt";

            Console.Write("Creating trie ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();
            var wordToIndex   = new Dictionary <string, int>();
            var wordFrequency = new Dictionary <string, int>();
            var phraseList    = new List <string>();
            int count         = 0;

            using (StreamReader sr = new StreamReader(path))
            {
                while (sr.Peek() >= 0)
                {
                    var s = sr.ReadLine();
                    phraseList.Add(s.Trim());

                    var tokens = s.Trim().Split(' ');

                    for (int i = 0; i < tokens.Length; ++i)
                    {
                        int index = 0, freq = 0;
                        if (!wordToIndex.TryGetValue(tokens[i], out index))
                        {
                            wordToIndex[tokens[i]] = count++;
                        }
                        if (!wordFrequency.TryGetValue(tokens[i], out freq))
                        {
                            wordFrequency[tokens[i]] = 1;
                        }
                        else
                        {
                            wordFrequency[tokens[i]] = freq + 1;
                        }
                    }
                }
            }

            long memDeltaForStoringValues = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("Memory for storing value: " + memDeltaForStoringValues + ". Going to add to trie");

            var trie  = new UkkonenTrie <int>(1);
            int value = 0;

            foreach (var phrase in phraseList)
            {
                trie.Add(phrase, value++);
            }

            //Load a frequency dictionary
            stopWatch.Stop();
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("Done in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                              + (memDelta / 1024 / 1024.0).ToString("N0") + " MB. Token count: " + wordToIndex.Count);

            // spell checker
            var spellChecker = new SymSpell(wordToIndex.Count, 2);

            foreach (var entry in wordFrequency)
            {
                spellChecker.CreateDictionaryEntry(entry.Key, entry.Value);
            }

            while (true)
            {
                Console.WriteLine("Input string to search:");
                var s = Console.ReadLine();
                if (s == "exit")
                {
                    return;
                }

                var normalized = s.ToLower();
                var suggests   = spellChecker.LookupCompound(normalized, 2);

                // lookup in trie
                var results = trie.Retrieve(normalized);

                var resultCount = 0;
                foreach (var result in results)
                {
                    Console.WriteLine("--> " + phraseList[result]);
                    resultCount++;
                }

                var suggest = suggests[0].term;
                foreach (var sug in suggests)
                {
                    Console.WriteLine("Can search for: " + sug.term);
                }
                if (suggest != normalized)
                {
                    Console.WriteLine("Did you mean: " + suggest + "?");
                }

                Console.WriteLine(String.Format("Found {0} result", resultCount));
            }
        }