protected virtual void AfterTrain(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, ModelExT> trainSet) { if (OnAfterTrain != null) { OnAfterTrain(this, foldN, model, trainSet); } }
protected void RunModel(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet, ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile) { // train ILabeledDataset <LblT, ModelExT> usedTrainSet = BeforeTrain(foldN, model, trainSet, mappedTrainSet); Train(foldN, model, usedTrainSet); AfterTrain(foldN, model, usedTrainSet); modelProfile.TrainEndTime = DateTime.Now; // test modelProfile.TestStartTime = DateTime.Now; ILabeledDataset <LblT, ModelExT> usedTestSet = BeforeTest(foldN, model, testSet, mappedTestSet); PerfMatrix <LblT> foldMatrix = GetPerfMatrix(GetModelName(model), foldN); for (int i = 0; i < usedTestSet.Count; i++) { LabeledExample <LblT, ModelExT> le = usedTestSet[i]; Prediction <LblT> prediction = Predict(foldN, model, le); if (AfterPrediction(foldN, model, testSet[i].Example, le, prediction) && prediction.Any()) { foldMatrix.AddCount(le.Label, prediction.BestClassLabel); } } modelProfile.TestEndTime = DateTime.Now; AfterTest(foldN, model, usedTestSet); }
protected virtual void AfterTest(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, ModelExT> testSet) { if (OnAfterTest != null) { OnAfterTest(this, foldN, model, testSet); } }
protected virtual void AfterFold(int foldN, ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, InputExT> testSet) { if (OnAfterFold != null) { OnAfterFold(this, foldN, trainSet, testSet); } }
protected virtual void Train(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, ModelExT> trainDataset) { if (OnTrain != null) { OnTrain(this, foldN, model, trainDataset); } else { model.Train(trainDataset); } }
protected override void DoRunModel(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet, ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile) { if (mFoldTasks != null) { RunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile); } else { lock (mFoldModelTasks[foldN]) { mFoldModelTasks[foldN].Add(() => RunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile)); } } }
protected void RunFold(int foldN) { var foldProfile = new CrossValidationTimeProfile { FoldN = foldN, FoldStartTime = DateTime.Now }; var foldProfiles = new ConcurrentDictionary <string, CrossValidationTimeProfile>(); foldProfiles.TryAdd("", foldProfile); mFoldModelTimes.TryAdd(foldN, foldProfiles); // fold data LabeledDataset <LblT, InputExT> testSet, trainSet; if (IsStratified) { Dataset.SplitForStratifiedCrossValidation(NumFolds, foldN, out trainSet, out testSet); } else { Dataset.SplitForCrossValidation(NumFolds, foldN, out trainSet, out testSet); } BeforeFold(foldN, trainSet, testSet); // pefrorm mapping ILabeledDataset <LblT, ModelExT> mappedTrainSet = MapTrainSet(foldN, trainSet); ILabeledDataset <LblT, ModelExT> mappedTestSet = MapTestSet(foldN, testSet); // validate foreach (IModel <LblT, ModelExT> model in Models) { string modelName = GetModelName(model); var modelProfile = new CrossValidationTimeProfile { FoldN = foldN, ModelName = modelName, FoldStartTime = foldProfile.FoldStartTime, TrainStartTime = DateTime.Now }; foldProfiles.TryAdd(modelName, modelProfile); DoRunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile); } foldProfile.FoldEndTime = DateTime.Now; AfterFold(foldN, trainSet, testSet); }
public ILabeledDataset <LblT> ConvertDataset(Type newExType, bool move) { Utils.ThrowException(newExType == null ? new ArgumentNullException("newExType") : null); ILabeledDataset <LblT> newDataset = null; ArrayList <LabeledExample <LblT, object> > tmp = new ArrayList <LabeledExample <LblT, object> >(mItems.Count); for (int i = 0; i < mItems.Count; i++) { tmp.Add(new LabeledExample <LblT, object>(mItems[i].Label, ModelUtils.ConvertExample(mItems[i].Example, newExType))); // throws ArgumentValueException if (move) { mItems[i] = null; } } if (move) { mItems.Clear(); } if (newExType == typeof(SparseVector <double>)) { newDataset = new LabeledDataset <LblT, SparseVector <double> >(tmp); } else if (newExType == typeof(SparseVector <double> .ReadOnly)) { newDataset = new LabeledDataset <LblT, SparseVector <double> .ReadOnly>(tmp); } else if (newExType == typeof(BinaryVector)) { newDataset = new LabeledDataset <LblT, BinaryVector>(tmp); } else if (newExType == typeof(BinaryVector.ReadOnly)) { newDataset = new LabeledDataset <LblT, BinaryVector.ReadOnly>(tmp); } else { throw new ArgumentNotSupportedException("newExType"); } return(newDataset); }
protected override ILabeledDataset <LblT, SparseVector <double> > BeforeTrain(int foldN, IModel <LblT, SparseVector <double> > model, ILabeledDataset <LblT, string> trainSet, ILabeledDataset <LblT, SparseVector <double> > mappedTrainSet) { mappedTrainSet = base.BeforeTrain(foldN, model, trainSet, mappedTrainSet); // add fold's models for report for (int i = 0; !mFoldModels.TryAdd(new Tuple <int, int>(foldN, i), model); i++) { } return(mappedTrainSet); }
public List <SparseVector <double> > Initialize(ILabeledDataset <LabelT, string> labeledDataset) { return(Initialize(labeledDataset, false)); }
public List <SparseVector <double> > Initialize(ILabeledDataset <LabelT, string> labeledDataset, bool largeScale) { bool normalizeVectors = NormalizeVectors; NormalizeVectors = false; List <SparseVector <double> > bowData = base.Initialize(labeledDataset.Select(d => d.Example), largeScale); NormalizeVectors = normalizeVectors; // count word label frequencies var labelWordCounts = new Dictionary <LabelT, Dictionary <int, int> >(); for (int i = 0; i < bowData.Count; i++) { foreach (IdxDat <double> idxDat in bowData[i]) { LabelT label = labeledDataset[i].Label; Dictionary <int, int> wordCounts; if (!labelWordCounts.TryGetValue(label, out wordCounts)) { labelWordCounts.Add(label, wordCounts = new Dictionary <int, int>()); } int count; if (!wordCounts.TryGetValue(idxDat.Idx, out count)) { wordCounts.Add(idxDat.Idx, 1); } else { wordCounts[idxDat.Idx] = count + 1; } } } // calc deltas int labelCount = labelWordCounts.Count; var counts = new List <double>(); foreach (Word word in Words) { counts.Clear(); foreach (KeyValuePair <LabelT, Dictionary <int, int> > kv in labelWordCounts) { int count; if (kv.Value.TryGetValue(word.mIdx, out count)) { counts.Add(count); } } if (counts.Any()) { double max = counts.Max(); mWordDeltas.Add(word.mIdx, Math.Abs(Math.Log( max / Math.Max(counts.Sum() - max, 1) * (labelCount - 1), 2))); } else { mWordDeltas.Add(word.mIdx, 1); } } // transform vectors using deltas var bowDataset = new List <SparseVector <double> >(); foreach (SparseVector <double> bow in bowData) { CalcDeltaBow(bow, normalizeVectors); bowDataset.Add(bow); } return(bowDataset); }
protected override ILabeledDataset <SentimentLabel, SparseVector <double> > BeforeTrain(int foldN, IModel <SentimentLabel, SparseVector <double> > model, ILabeledDataset <SentimentLabel, Tweet> trainSet, ILabeledDataset <SentimentLabel, SparseVector <double> > mappedTrainSet) { mappedTrainSet = base.BeforeTrain(foldN, model, trainSet, mappedTrainSet); // replication wrapper needs special treatment if (model is ReplicationWrapperClassifier) { ((ReplicationWrapperClassifier)model).BowSpace = mFoldBowSpaces[foldN]; } // add fold's models for report for (int i = 0; !mFoldModels.TryAdd(new Tuple <int, int>(foldN, i), model); i++) { } return(mappedTrainSet); }
protected override ILabeledDataset <SentimentLabel, SparseVector <double> > MapTestSet(int foldN, ILabeledDataset <SentimentLabel, Tweet> testSet) { return(new LabeledDataset <SentimentLabel, SparseVector <double> >(testSet.Select(le => { SparseVector <double> sparseVector = mFoldBowSpaces[foldN].ProcessDocument(le.Example.Text); return new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, sparseVector); }))); }
protected virtual ILabeledDataset <LblT, ModelExT> BeforeTest(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet) { return(OnBeforeTest != null?OnBeforeTest(this, foldN, model, testSet, mappedTestSet) : mappedTestSet); }
protected override void DoRunModel(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet, ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile) { RunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile); }
protected virtual ILabeledDataset <LblT, ModelExT> MapTestSet(int foldN, ILabeledDataset <LblT, InputExT> testSet) { return(OnTestSetMap != null?OnTestSetMap(this, foldN, testSet) : (ILabeledDataset <LblT, ModelExT>)testSet); }
protected override ILabeledDataset <SentimentLabel, SparseVector <double> > MapTrainSet(int foldN, ILabeledDataset <SentimentLabel, Tweet> trainSet) { BowSpace bowSpace; Preconditions.CheckState(!mFoldBowSpaces.TryGetValue(foldN, out bowSpace)); Preconditions.CheckState(mFoldBowSpaces.TryAdd(foldN, bowSpace = BowSpaceFunc())); List <SparseVector <double> > bowData = bowSpace is DeltaBowSpace <SentimentLabel> ?((DeltaBowSpace <SentimentLabel>)bowSpace).Initialize(new LabeledDataset <SentimentLabel, string>(trainSet .Select(d => new LabeledExample <SentimentLabel, string>(d.Label, d.Example.Text)))) : bowSpace.Initialize(trainSet.Select(d => d.Example.Text)); var bowDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(); for (int i = 0; i < bowData.Count; i++) { bowDataset.Add(trainSet[i].Label, bowData[i]); } return(bowDataset); }
protected abstract void DoRunModel(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet, ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile);