public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); var ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Neutral) .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example))); mPosNegModel = TrainModel(ds, SentimentLabel.Positive, SentimentLabel.Negative); ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Positive) .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example))); mNegNeuModel = TrainModel(ds, SentimentLabel.Negative, SentimentLabel.Neutral); ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Negative) .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example))); mPosNeuModel = TrainModel(ds, SentimentLabel.Positive, SentimentLabel.Neutral); IsTrained = true; }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { foreach (ModelLabel modelLabel in ModelLabels.Take(ModelLabels.Count() - 1)) { modelLabel.Model.Train(dataset); ModelLabel modelLabel_ = modelLabel; dataset = new LabeledDataset <LblT, ExT>(dataset.Where(le => !le.Label.Equals(modelLabel_.Label))); } ModelLabels.Last().Model.Train(dataset); IsTrained = true; }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); Preconditions.CheckArgumentRange(IsCalcBounds || NegCentile >= 0 && NegCentile <= 1); Preconditions.CheckArgumentRange(IsCalcBounds || PosCentile >= 0 && PosCentile <= 1); var labeledDataset = (LabeledDataset <SentimentLabel, SparseVector <double> >)dataset; if (labeledDataset.Count == 0) { Console.WriteLine("empty dataset"); } TrainStats = null; var posScores = new List <double>(); var negScores = new List <double>(); var neutralScores = new List <double>(); var trainDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(labeledDataset.Where(le => le.Label != SentimentLabel.Neutral)); var neutralDataset = IsCalcStats || IsCalcBounds ? new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label == SentimentLabel.Neutral)) : null; var validation = new CrossValidator <SentimentLabel, SparseVector <double> > { NumFolds = NumTrainFolds, Dataset = trainDataset, OnAfterPrediction = (sender, foldN, model, example, le, prediction) => { if (le.Label == prediction.BestClassLabel) { if (le.Label == SentimentLabel.Positive) { posScores.Add(prediction.BestScore); } else { negScores.Add(-prediction.BestScore); } } return(true); }, OnAfterFold = (sender, foldN, trainSet, testSet) => { if (IsCalcStats || IsCalcBounds) { neutralScores.AddRange(neutralDataset .Select(le => sender.Models[0].Predict(le.Example)) .Select(p => p.BestClassLabel == SentimentLabel.Positive ? p.BestScore : -p.BestScore)); } } }; validation.Models.Add(CreateModel()); validation.Run(); if (IsCalcBounds) { double negMaxProb, negScore; NegBound = FindMaxExclusiveProbability(neutralScores.Where(s => s < 0).Select(s => - s), negScores.Select(s => - s), out negMaxProb, out negScore) ? -negScore : 0; double posMaxProb, posScore; PosBound = FindMaxExclusiveProbability(neutralScores.Where(s => s > 0), posScores, out posMaxProb, out posScore) ? posScore : 0; } else { if (NegCentile != null) { NegBound = negScores.OrderByDescending(bs => bs).Skip((int)Math.Truncate(negScores.Count * NegCentile.Value)).FirstOrDefault(); } if (PosCentile != null) { PosBound = posScores.OrderBy(bs => bs).Skip((int)Math.Truncate(posScores.Count * PosCentile.Value)).FirstOrDefault(); } } if (IsCalcStats) { TrainStats = CalcStats(negScores, neutralScores, posScores); } mBinaryClassifier = validation.Models[0]; mBinaryClassifier.Train(trainDataset); IsTrained = true; }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); Preconditions.CheckArgumentRange(TagDistrTable == null || TagDistrTable.NumOfDimensions == 2); mBinModel = CreateModel(); mBinModel.Train(new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Neutral))); TagDistrTable = new EnumTagDistrTable <SentimentLabel>(1, BinWidth, -5, 5, SentimentLabel.Exclude) { CalcDistrFunc = (tagCounts, values, tag) => ((double)tagCounts[tag] + 1) / (tagCounts.Values.Sum() + tagCounts.Count) // use Laplace formula }; foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset) { Prediction <SentimentLabel> prediction = mBinModel.Predict(le.Example); TagDistrTable.AddCount(le.Label, prediction.BestClassLabel == SentimentLabel.Positive ? prediction.BestScore : -prediction.BestScore); } TagDistrTable.Calculate(); IsTrained = true; }