Example #1
0
        public (double Probability, double Normalization, VectorData Vector) GetVector(TextVectorCell[] cells)
        {
            if (cells is null)
            {
                throw new ArgumentNullException(nameof(cells));
            }

            log.LogDebug("GetVector");
            var vectorCells = new List <VectorCell>();
            var vector      = new double[featureTable.Count];

            for (var i = 0; i < featureTable.Count; i++)
            {
                vector[i] = 0;
            }

            var unknownIndexes = vector.Length;

            foreach (TextVectorCell textCell in cells)
            {
                VectorCell cell = GetCell(textCell);
                if (cell != null)
                {
                    vector[cell.Index] = cell.X;
                    vectorCells.Add(cell);
                }
                else
                {
                    // if inverted exist in database, it is very likely that normal version has opposite meaning
                    cell = GetCell(new TextVectorCell(textCell.Name.GetOpposite(), Math.Abs(textCell.Value)));
                    if (cell != null)
                    {
                        var theata = textCell.Name.IsInverted() ? cell.Theta / 2 : cell.Theta / 4;
                        cell = new VectorCell(unknownIndexes, textCell, -theata);
                        vectorCells.Add(cell);
                        unknownIndexes++;
                    }
                }
            }

            INormalize normalized = vector.Normalize(NormalizationType.L2);

            vector = normalized.GetNormalized.ToArray();
            var probability = Classifier.Probability(vector);

            // do not normalize data - SVM operates with normalized already. Second time normalization is not required.
            return(probability, normalized.Coeficient, new VectorData(vectorCells.ToArray(), unknownIndexes, Classifier.Model.Threshold, NormalizationType.None));
        }
Example #2
0
        private VectorCell GetCell(TextVectorCell textCell)
        {
            IHeader header = DataSet.Header[textCell.Name];

            if (header == null ||
                !featureTable.TryGetValue(header, out var index))
            {
                return(null);
            }

            TextVectorCell absoluteCell = textCell.Item == null
                ? new TextVectorCell(textCell.Name, Math.Abs(textCell.Value))
                : new TextVectorCell(textCell.Item, Math.Abs(textCell.Value));

            var cellItem = new VectorCell(index, absoluteCell, weights[index]);

            return(cellItem);
        }
Example #3
0
        protected override void CalculateRatingLogic()
        {
            TextVectorCell[] cells = Review.Vector.GetCells().ToArray();
            (double Probability, double Normalization, VectorData Vector)result = Model.GetVector(cells);
            VectorData vector = result.Vector;

            if (vector == null ||
                vector.Length == 0)
            {
                Rating = Review.CalculateRawRating();
                return;
            }

            var        bias           = vector.RHO;
            var        fallbackWeight = 0.1;
            VectorCell lexicon        = default;

            foreach (VectorCell item in vector.Cells)
            {
                var cell = (TextVectorCell)item.Data;
                if (cell.Name == Constants.RATING_STARS)
                {
                    lexicon = item;
                }

                if (cell.Item != null)
                {
                    var word = (IWordItem)cell.Item;
                    Add(new SentimentValue(word, word.Text, new SentimentValueData(item.Calculated, SentimentSource.AdjustedSVM)));
                }
                else
                {
                    bias += item.Calculated;
                }
            }

            var notAddedSentiments = new List <SentimentValue>();

            foreach (SentimentValue sentimentValue in Review.GetAllSentiments())
            {
                if (!ContainsSentiment(sentimentValue.Owner))
                {
                    notAddedSentiments.Add(sentimentValue);
                }
            }

            if (lexicon != null)
            {
                var totalWords = Review.GetAllSentiments().Length;
                fallbackWeight = Math.Abs(lexicon.Theta) / totalWords;
            }

            if (notAddedSentiments.Count > 0)
            {
                foreach (SentimentValue sentiment in notAddedSentiments)
                {
                    Add(new SentimentValue(sentiment.Owner, sentiment.Span, new SentimentValueData(sentiment.DataValue.Value * fallbackWeight, SentimentSource.AdjustedCalculated)));
                }
            }

            if (TotalSentiments > 0)
            {
                Add(new SentimentValue(
                        WordOccurrence.CreateBasic(Constants.BIAS, POSTags.Instance.JJ),
                        "BIAS",
                        new SentimentValueData(bias, SentimentSource.AdjustedSVM)));
            }

            if (Rating.HasValue)
            {
                if (Rating.IsPositive.Value &&
                    result.Probability < 0.5)
                {
                    log.LogDebug("Mistmatch in sentiment with machine prediction: {0} - {1}", Rating.IsPositive, result.Probability);
                }
            }
        }