Exemple #1
0
        private WordObject AddWordObj(string word, string POS)
        {
            WordObject w = new WordObject(word);

            if (POSLib.IsNoun(POS) == true)
            {
                w.PartOfSpeech = POSLib.NOUN;
            }

            return w;
        }
Exemple #2
0
        /// <summary>
        /// Add a word to the internal hash table
        /// </summary>
        /// <param name="word">
        /// A <see cref="System.String"/>
        /// </param>
        /// <returns>
        /// A <see cref="System.Boolean"/>
        /// </returns>
        public bool AddWord(string word, string article)
        {
            WordObject wordObj;
            //article = FindWords.CleanArticle(article);
            int position = article.IndexOf(word);

            // Add the word to the hash
            if (this.words.ContainsKey(word).Equals(false))
            {
                wordObj = new WordObject(word);
                this.words.Add(word, wordObj);
            }

            // If the word is found in this article, calculate the statistics
            wordObj = this.words[word];

            // Prior based on total classifiers
            if (this.usingPrior == true)
            {
                if (position >= 0)
                {
                    wordObj.Seen();
                }

                // Using Dirichlet Prior
                double numerator = (double)wordObj.TimesInTraining + 1d;
                double denominator = (double)this.timesInTraining + (double)this.totalClassifiers;
                double prob = numerator / denominator;
                wordObj.SetProb(prob);
            }
            else if (this.usingLaplace == true)
            {
                if (position >= 0)
                {
                    wordObj.Seen();
                }

                // Using Laplace Smoothing
                // Add a virtual document for each word
                double numerator = (double)wordObj.TimesInTraining + 1d;
                double denominator = (double)this.timesInTraining + (double)this.totalWords;
                double prob = numerator / denominator;
                wordObj.SetProb(prob);
            }
            else if (this.useMesitmate == true)
            {
                if (position >= 0)
                {
                    wordObj.Seen();
                }

                double numerator = (double)wordObj.TimesInTraining + (1d * this.pValue);
                double denominator = (double)this.timesInTraining + (double)this.totalWords;
                double prob = numerator / denominator;
                wordObj.SetProb(prob);
            }
            // we are not using any "special" techniques
            // if the word in this article is found, adjust probabilites
            else if (position >= 0)
            {

                wordObj.Seen();
                double prob = ((double)wordObj.TimesInTraining / (double)this.timesInTraining);
                wordObj.SetProb(prob);
            }
            // We have not seen this word before in this classifier
            // Change the default to Min Value
            else if (wordObj.GetProb() == 0)
            {
                // We can't let the prob go to zero
                // Calculate the M-estimate
                double prob = ClassifierObject.GetMinValue();
                wordObj.SetProb(prob);
            }

            return true;
        }