예제 #1
0
        /// <summary>
        /// Function Updates the dictionary to include new words in the RAM
        /// </summary>
        /// <param name="inputText">Pass the data to be learned here</param>
        /// <param name="addNewWordsCond">This boolean value specifies whether or not new words should be learned.true if yes. False if new
        /// words are to be ignored</param>
        public void learnData(String inputText, bool addNewWordsCond)
        {
            if (inputText == "")
            {
                return;
            }

            String[]      terms           = inputText.Split(' ');
            List <String> newterms        = new List <String>();
            List <String> oldterms        = new List <String>();
            Dictionary <String, String> h = new Dictionary <String, String>();


            // identify new terms
            foreach (String term in terms)
            {
                TSTNode node = tst.traverse(term);
                if (node == null)
                {
                    newterms.Add(term);
                }
                else
                {
                    oldterms.Add(term);
                    node.wordEnd = true;
                }
            }

            // generate bigrams
            if (terms.Length > 1)
            {
                for (int i = 0; i < terms.Length - 1; i++)
                {
                    if (h.ContainsKey(terms[0]))
                    {
                        h[terms[i]] = h[terms[i]] + terms[i + 1];
                    }
                    else
                    {
                        h.Add(terms[i], terms[i + 1]);
                    }
                }
            }
            if (terms.Length == 1)
            {
                if (h.ContainsKey(terms[0]))
                {
                }
                else
                {
                    h.Add(terms[0], "");
                }
            }

            //autocorrect and autocomplete
            if (addNewWordsCond == true)
            {
                if (newterms.Count > 0)
                {
                    foreach (String newTerm in newterms)
                    {
                        tst.insert(newTerm, 1);
                        bkt.Add(newTerm);
                    }
                }
                if (oldterms.Count > 0)
                {
                    foreach (String oldTerm in oldterms)
                    {
                        TSTNode node = tst.traverse(oldTerm);
                        node.frequency += 1;
                    }
                }

                //add bigrams
                foreach (KeyValuePair <String, String> pair in h)
                {
                    if (sd.ht.ContainsKey(pair.Key))
                    {
                        String[] wordsWithFreq = pair.Value.Split('&');
                        int      count         = 0;
                        foreach (String wordWithFreq in wordsWithFreq)
                        {
                            String[] wordAndFreq = wordWithFreq.Split(';');
                            if (wordAndFreq[0] == pair.Value)
                            {
                                count++;
                                try
                                {
                                    sd.ht[pair.Key] = sd.ht[pair.Key].Replace(wordWithFreq, wordAndFreq[0] + ";" + int.Parse(wordAndFreq[1] + 1));
                                    //sd.ht.Add(pair.Key,sd.ht[pair.Key].Replace(wordWithFreq,wordAndFreq[0]+";"+int.Parse(wordAndFreq[1] +1)));
                                }
                                catch (IndexOutOfRangeException e)
                                {
                                    sd.ht[pair.Key] = sd.ht[pair.Key].Replace(wordWithFreq, wordAndFreq[0] + ";1");
                                    //sd.ht.Add(pair.Key, sd.ht[pair.Key].Replace(wordWithFreq, wordAndFreq[0] + ";1"));
                                }
                                break;
                            }
                        }
                        if (count == 0)
                        {
                            sd.ht[pair.Key] = sd.ht[pair.Key] + (pair.Value) + ";1&";
                            //sd.ht.Add(pair.Key , sd.ht[pair.Key]+(pair.Value)+ ";1&");
                        }
                    }
                    else
                    {
                        sd.ht.Add(pair.Key, pair.Value + ";1&");
                    }
                }
            }

            else
            {
                if (oldterms.Count > 0)
                {
                    foreach (String oldTerm in oldterms)
                    {
                        TSTNode node = tst.traverse(oldTerm);
                        node.frequency += 1;
                    }
                }
                foreach (KeyValuePair <String, String> pair in h)
                {
                    if (newterms.Contains(pair.Key) || newterms.Contains(pair.Value))
                    {
                        //skip if new terms are found
                    }
                    else
                    {
                        if (sd.ht.ContainsKey(pair.Key))
                        {
                            String[] wordsWithFreq = pair.Value.Split('&');
                            int      count         = 0;
                            foreach (String wordWithFreq in wordsWithFreq)
                            {
                                String[] wordAndFreq = wordWithFreq.Split(';');
                                if (wordAndFreq[0] == pair.Value)
                                {
                                    count++;
                                    try
                                    {
                                        sd.ht[pair.Key] = sd.ht[pair.Key].Replace(wordWithFreq, wordAndFreq[0] + ";" + int.Parse(wordAndFreq[1] + 1));
                                        //sd.ht.Add(pair.Key,sd.ht[pair.Key].Replace(i,wordAndFreq[0]+";"+int.Parse(wordAndFreq[1] +1)));
                                    }
                                    catch (IndexOutOfRangeException e)
                                    {
                                        sd.ht[pair.Key] = sd.ht[pair.Key].Replace(wordWithFreq, wordAndFreq[0] + ";1");
                                        //sd.ht.Add(pair.Key, sd.ht[pair.Key].Replace(i, wordAndFreq[0] + ";1"));
                                    }
                                    break;
                                }
                            }
                            if (count == 0)
                            {
                                sd.ht[pair.Key] = sd.ht[pair.Key] + (pair.Value) + ";1&";
                                //sd.ht.Add(pair.Key , sd.ht[pair.Key]+(pair.Value)+ ";1&");
                            }
                        }
                        else
                        {
                            sd.ht.Add(pair.Key, pair.Value + ";1&");
                        }
                    }
                }
            }
        }
 /// <summary>
 /// A method to initialize the root node.Call this method before inserting the dictionary into the tree.
 /// </summary>
 /// <param name="c">Inserting a character which is in the .middle of the language character set is recommended.</param>
 public void createRoot(char c)
 {
     root = new TSTNode(c);
 }
 /// <summary>
 /// insert words into the tree
 /// </summary>
 /// <param name="word">A normal word</param>
 /// <param name="frequency">The pre-decided frequency count in the dictionary</param>
 public void insert(string word, int frequency)
 {
     root = insert(root, word, 0, frequency);
 }
        public TSTNode traverse(string prefix)
        {
            TSTNode n = traverse(root, prefix, 0);

            return(n);
        }