public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);

            var ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Neutral)
                                                                                 .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example)));

            mPosNegModel = TrainModel(ds, SentimentLabel.Positive, SentimentLabel.Negative);

            ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Positive)
                                                                             .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example)));
            mNegNeuModel = TrainModel(ds, SentimentLabel.Negative, SentimentLabel.Neutral);

            ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Negative)
                                                                             .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example)));
            mPosNeuModel = TrainModel(ds, SentimentLabel.Positive, SentimentLabel.Neutral);

            IsTrained = true;
        }
 public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
 {
     foreach (ModelLabel modelLabel in ModelLabels.Take(ModelLabels.Count() - 1))
     {
         modelLabel.Model.Train(dataset);
         ModelLabel modelLabel_ = modelLabel;
         dataset = new LabeledDataset <LblT, ExT>(dataset.Where(le => !le.Label.Equals(modelLabel_.Label)));
     }
     ModelLabels.Last().Model.Train(dataset);
     IsTrained = true;
 }
Exemple #3
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);
            Preconditions.CheckArgumentRange(IsCalcBounds || NegCentile >= 0 && NegCentile <= 1);
            Preconditions.CheckArgumentRange(IsCalcBounds || PosCentile >= 0 && PosCentile <= 1);

            var labeledDataset = (LabeledDataset <SentimentLabel, SparseVector <double> >)dataset;

            if (labeledDataset.Count == 0)
            {
                Console.WriteLine("empty dataset");
            }

            TrainStats = null;

            var posScores      = new List <double>();
            var negScores      = new List <double>();
            var neutralScores  = new List <double>();
            var trainDataset   = new LabeledDataset <SentimentLabel, SparseVector <double> >(labeledDataset.Where(le => le.Label != SentimentLabel.Neutral));
            var neutralDataset = IsCalcStats || IsCalcBounds
                ? new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label == SentimentLabel.Neutral))
                : null;

            var validation = new CrossValidator <SentimentLabel, SparseVector <double> >
            {
                NumFolds = NumTrainFolds,
                Dataset  = trainDataset,

                OnAfterPrediction = (sender, foldN, model, example, le, prediction) =>
                {
                    if (le.Label == prediction.BestClassLabel)
                    {
                        if (le.Label == SentimentLabel.Positive)
                        {
                            posScores.Add(prediction.BestScore);
                        }
                        else
                        {
                            negScores.Add(-prediction.BestScore);
                        }
                    }
                    return(true);
                },

                OnAfterFold = (sender, foldN, trainSet, testSet) =>
                {
                    if (IsCalcStats || IsCalcBounds)
                    {
                        neutralScores.AddRange(neutralDataset
                                               .Select(le => sender.Models[0].Predict(le.Example))
                                               .Select(p => p.BestClassLabel == SentimentLabel.Positive ? p.BestScore : -p.BestScore));
                    }
                }
            };

            validation.Models.Add(CreateModel());
            validation.Run();

            if (IsCalcBounds)
            {
                double negMaxProb, negScore;
                NegBound = FindMaxExclusiveProbability(neutralScores.Where(s => s < 0).Select(s => - s),
                                                       negScores.Select(s => - s), out negMaxProb, out negScore) ? -negScore : 0;

                double posMaxProb, posScore;
                PosBound = FindMaxExclusiveProbability(neutralScores.Where(s => s > 0),
                                                       posScores, out posMaxProb, out posScore) ? posScore : 0;
            }
            else
            {
                if (NegCentile != null)
                {
                    NegBound = negScores.OrderByDescending(bs => bs).Skip((int)Math.Truncate(negScores.Count * NegCentile.Value)).FirstOrDefault();
                }
                if (PosCentile != null)
                {
                    PosBound = posScores.OrderBy(bs => bs).Skip((int)Math.Truncate(posScores.Count * PosCentile.Value)).FirstOrDefault();
                }
            }

            if (IsCalcStats)
            {
                TrainStats = CalcStats(negScores, neutralScores, posScores);
            }

            mBinaryClassifier = validation.Models[0];
            mBinaryClassifier.Train(trainDataset);

            IsTrained = true;
        }
Exemple #4
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);
            Preconditions.CheckArgumentRange(TagDistrTable == null || TagDistrTable.NumOfDimensions == 2);

            mBinModel = CreateModel();
            mBinModel.Train(new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Neutral)));

            TagDistrTable = new EnumTagDistrTable <SentimentLabel>(1, BinWidth, -5, 5, SentimentLabel.Exclude)
            {
                CalcDistrFunc = (tagCounts, values, tag) => ((double)tagCounts[tag] + 1) / (tagCounts.Values.Sum() + tagCounts.Count)     // use Laplace formula
            };
            foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset)
            {
                Prediction <SentimentLabel> prediction = mBinModel.Predict(le.Example);
                TagDistrTable.AddCount(le.Label, prediction.BestClassLabel == SentimentLabel.Positive ? prediction.BestScore : -prediction.BestScore);
            }
            TagDistrTable.Calculate();

            IsTrained = true;
        }