private WordObject AddWordObj(string word, string POS) { WordObject w = new WordObject(word); if (POSLib.IsNoun(POS) == true) { w.PartOfSpeech = POSLib.NOUN; } return w; }
/// <summary> /// Add a word to the internal hash table /// </summary> /// <param name="word"> /// A <see cref="System.String"/> /// </param> /// <returns> /// A <see cref="System.Boolean"/> /// </returns> public bool AddWord(string word, string article) { WordObject wordObj; //article = FindWords.CleanArticle(article); int position = article.IndexOf(word); // Add the word to the hash if (this.words.ContainsKey(word).Equals(false)) { wordObj = new WordObject(word); this.words.Add(word, wordObj); } // If the word is found in this article, calculate the statistics wordObj = this.words[word]; // Prior based on total classifiers if (this.usingPrior == true) { if (position >= 0) { wordObj.Seen(); } // Using Dirichlet Prior double numerator = (double)wordObj.TimesInTraining + 1d; double denominator = (double)this.timesInTraining + (double)this.totalClassifiers; double prob = numerator / denominator; wordObj.SetProb(prob); } else if (this.usingLaplace == true) { if (position >= 0) { wordObj.Seen(); } // Using Laplace Smoothing // Add a virtual document for each word double numerator = (double)wordObj.TimesInTraining + 1d; double denominator = (double)this.timesInTraining + (double)this.totalWords; double prob = numerator / denominator; wordObj.SetProb(prob); } else if (this.useMesitmate == true) { if (position >= 0) { wordObj.Seen(); } double numerator = (double)wordObj.TimesInTraining + (1d * this.pValue); double denominator = (double)this.timesInTraining + (double)this.totalWords; double prob = numerator / denominator; wordObj.SetProb(prob); } // we are not using any "special" techniques // if the word in this article is found, adjust probabilites else if (position >= 0) { wordObj.Seen(); double prob = ((double)wordObj.TimesInTraining / (double)this.timesInTraining); wordObj.SetProb(prob); } // We have not seen this word before in this classifier // Change the default to Min Value else if (wordObj.GetProb() == 0) { // We can't let the prob go to zero // Calculate the M-estimate double prob = ClassifierObject.GetMinValue(); wordObj.SetProb(prob); } return true; }