public int Classify(string review) { var tokenizer = new HappyFunTokenizer(_preserveCase); var tokens = this.AddNegationAugments(tokenizer.Tokenize(review)); var scores = new Dictionary <int, double>(); for (var i = 1; i < 6; i++) { scores[i] = this.GetEmptyScoreForSentiment(i); } foreach (var token in tokens) { if (!Utilities.CheckUtf8(token)) { continue; } if (!this._wordList.ContainsKey(token)) { continue; } var index = this._wordList[token]; for (var i = 1; i < 6; i++) { var score = this.GetProbabilityOfWordGivenSentimentFast(index, i); scores[i] *= score / (1.0 - score); } } return(scores.Aggregate((l, r) => l.Value > r.Value ? l : r).Key); }
protected int AddTrainingEntry(HappyFunTokenizer tokenizer, int score, string review) { var addedEntries = new HashSet <int>(); var tokens = this.AddNegationAugments(tokenizer.Tokenize(review)); var count = 0; foreach (var token in tokens) { if (!Utilities.CheckUtf8(token)) { continue; } if (this._wordList.ContainsKey(token)) { var index = this._wordList[token]; if (addedEntries.Contains(index)) { continue; } count++; this._sentimentWordCounts[score][index]++; addedEntries.Add(index); } else { var index = this._wordList.Count; this._wordList[token] = index; count++; for (var i = 0; i < 6; i++) { this._sentimentWordCounts[i][index] = 0; } this._sentimentWordCounts[score][index]++; addedEntries.Add(index); } } return(count); }