public (double Probability, double Normalization, VectorData Vector) GetVector(TextVectorCell[] cells) { if (cells is null) { throw new ArgumentNullException(nameof(cells)); } log.LogDebug("GetVector"); var vectorCells = new List <VectorCell>(); var vector = new double[featureTable.Count]; for (var i = 0; i < featureTable.Count; i++) { vector[i] = 0; } var unknownIndexes = vector.Length; foreach (TextVectorCell textCell in cells) { VectorCell cell = GetCell(textCell); if (cell != null) { vector[cell.Index] = cell.X; vectorCells.Add(cell); } else { // if inverted exist in database, it is very likely that normal version has opposite meaning cell = GetCell(new TextVectorCell(textCell.Name.GetOpposite(), Math.Abs(textCell.Value))); if (cell != null) { var theata = textCell.Name.IsInverted() ? cell.Theta / 2 : cell.Theta / 4; cell = new VectorCell(unknownIndexes, textCell, -theata); vectorCells.Add(cell); unknownIndexes++; } } } INormalize normalized = vector.Normalize(NormalizationType.L2); vector = normalized.GetNormalized.ToArray(); var probability = Classifier.Probability(vector); // do not normalize data - SVM operates with normalized already. Second time normalization is not required. return(probability, normalized.Coeficient, new VectorData(vectorCells.ToArray(), unknownIndexes, Classifier.Model.Threshold, NormalizationType.None)); }
private VectorCell GetCell(TextVectorCell textCell) { IHeader header = DataSet.Header[textCell.Name]; if (header == null || !featureTable.TryGetValue(header, out var index)) { return(null); } TextVectorCell absoluteCell = textCell.Item == null ? new TextVectorCell(textCell.Name, Math.Abs(textCell.Value)) : new TextVectorCell(textCell.Item, Math.Abs(textCell.Value)); var cellItem = new VectorCell(index, absoluteCell, weights[index]); return(cellItem); }
protected override void CalculateRatingLogic() { TextVectorCell[] cells = Review.Vector.GetCells().ToArray(); (double Probability, double Normalization, VectorData Vector)result = Model.GetVector(cells); VectorData vector = result.Vector; if (vector == null || vector.Length == 0) { Rating = Review.CalculateRawRating(); return; } var bias = vector.RHO; var fallbackWeight = 0.1; VectorCell lexicon = default; foreach (VectorCell item in vector.Cells) { var cell = (TextVectorCell)item.Data; if (cell.Name == Constants.RATING_STARS) { lexicon = item; } if (cell.Item != null) { var word = (IWordItem)cell.Item; Add(new SentimentValue(word, word.Text, new SentimentValueData(item.Calculated, SentimentSource.AdjustedSVM))); } else { bias += item.Calculated; } } var notAddedSentiments = new List <SentimentValue>(); foreach (SentimentValue sentimentValue in Review.GetAllSentiments()) { if (!ContainsSentiment(sentimentValue.Owner)) { notAddedSentiments.Add(sentimentValue); } } if (lexicon != null) { var totalWords = Review.GetAllSentiments().Length; fallbackWeight = Math.Abs(lexicon.Theta) / totalWords; } if (notAddedSentiments.Count > 0) { foreach (SentimentValue sentiment in notAddedSentiments) { Add(new SentimentValue(sentiment.Owner, sentiment.Span, new SentimentValueData(sentiment.DataValue.Value * fallbackWeight, SentimentSource.AdjustedCalculated))); } } if (TotalSentiments > 0) { Add(new SentimentValue( WordOccurrence.CreateBasic(Constants.BIAS, POSTags.Instance.JJ), "BIAS", new SentimentValueData(bias, SentimentSource.AdjustedSVM))); } if (Rating.HasValue) { if (Rating.IsPositive.Value && result.Probability < 0.5) { log.LogDebug("Mistmatch in sentiment with machine prediction: {0} - {1}", Rating.IsPositive, result.Probability); } } }