Пример #1
0
        public int Classify(string review)
        {
            var tokenizer = new HappyFunTokenizer(_preserveCase);
            var tokens    = this.AddNegationAugments(tokenizer.Tokenize(review));
            var scores    = new Dictionary <int, double>();

            for (var i = 1; i < 6; i++)
            {
                scores[i] = this.GetEmptyScoreForSentiment(i);
            }

            foreach (var token in tokens)
            {
                if (!Utilities.CheckUtf8(token))
                {
                    continue;
                }

                if (!this._wordList.ContainsKey(token))
                {
                    continue;
                }

                var index = this._wordList[token];

                for (var i = 1; i < 6; i++)
                {
                    var score = this.GetProbabilityOfWordGivenSentimentFast(index, i);

                    scores[i] *= score / (1.0 - score);
                }
            }

            return(scores.Aggregate((l, r) => l.Value > r.Value ? l : r).Key);
        }
Пример #2
0
        protected int AddTrainingEntry(HappyFunTokenizer tokenizer, int score, string review)
        {
            var addedEntries = new HashSet <int>();
            var tokens       = this.AddNegationAugments(tokenizer.Tokenize(review));
            var count        = 0;

            foreach (var token in tokens)
            {
                if (!Utilities.CheckUtf8(token))
                {
                    continue;
                }

                if (this._wordList.ContainsKey(token))
                {
                    var index = this._wordList[token];

                    if (addedEntries.Contains(index))
                    {
                        continue;
                    }

                    count++;
                    this._sentimentWordCounts[score][index]++;
                    addedEntries.Add(index);
                }
                else
                {
                    var index = this._wordList.Count;
                    this._wordList[token] = index;
                    count++;

                    for (var i = 0; i < 6; i++)
                    {
                        this._sentimentWordCounts[i][index] = 0;
                    }

                    this._sentimentWordCounts[score][index]++;
                    addedEntries.Add(index);
                }
            }

            return(count);
        }