Exemple #1
0
 protected virtual void AfterTrain(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, ModelExT> trainSet)
 {
     if (OnAfterTrain != null)
     {
         OnAfterTrain(this, foldN, model, trainSet);
     }
 }
Exemple #2
0
        protected void RunModel(int foldN, IModel <LblT, ModelExT> model,
                                ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet,
                                ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile)
        {
            // train
            ILabeledDataset <LblT, ModelExT> usedTrainSet = BeforeTrain(foldN, model, trainSet, mappedTrainSet);

            Train(foldN, model, usedTrainSet);
            AfterTrain(foldN, model, usedTrainSet);
            modelProfile.TrainEndTime = DateTime.Now;

            // test
            modelProfile.TestStartTime = DateTime.Now;
            ILabeledDataset <LblT, ModelExT> usedTestSet = BeforeTest(foldN, model, testSet, mappedTestSet);
            PerfMatrix <LblT> foldMatrix = GetPerfMatrix(GetModelName(model), foldN);

            for (int i = 0; i < usedTestSet.Count; i++)
            {
                LabeledExample <LblT, ModelExT> le = usedTestSet[i];

                Prediction <LblT> prediction = Predict(foldN, model, le);
                if (AfterPrediction(foldN, model, testSet[i].Example, le, prediction) && prediction.Any())
                {
                    foldMatrix.AddCount(le.Label, prediction.BestClassLabel);
                }
            }

            modelProfile.TestEndTime = DateTime.Now;
            AfterTest(foldN, model, usedTestSet);
        }
Exemple #3
0
 protected virtual void AfterTest(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, ModelExT> testSet)
 {
     if (OnAfterTest != null)
     {
         OnAfterTest(this, foldN, model, testSet);
     }
 }
Exemple #4
0
 protected virtual void AfterFold(int foldN, ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, InputExT> testSet)
 {
     if (OnAfterFold != null)
     {
         OnAfterFold(this, foldN, trainSet, testSet);
     }
 }
Exemple #5
0
 protected virtual void Train(int foldN, IModel <LblT, ModelExT> model, ILabeledDataset <LblT, ModelExT> trainDataset)
 {
     if (OnTrain != null)
     {
         OnTrain(this, foldN, model, trainDataset);
     }
     else
     {
         model.Train(trainDataset);
     }
 }
Exemple #6
0
 protected override void DoRunModel(int foldN, IModel <LblT, ModelExT> model,
                                    ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet,
                                    ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile)
 {
     if (mFoldTasks != null)
     {
         RunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile);
     }
     else
     {
         lock (mFoldModelTasks[foldN])
         {
             mFoldModelTasks[foldN].Add(() => RunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile));
         }
     }
 }
Exemple #7
0
        protected void RunFold(int foldN)
        {
            var foldProfile = new CrossValidationTimeProfile {
                FoldN = foldN, FoldStartTime = DateTime.Now
            };
            var foldProfiles = new ConcurrentDictionary <string, CrossValidationTimeProfile>();

            foldProfiles.TryAdd("", foldProfile);
            mFoldModelTimes.TryAdd(foldN, foldProfiles);

            // fold data
            LabeledDataset <LblT, InputExT> testSet, trainSet;

            if (IsStratified)
            {
                Dataset.SplitForStratifiedCrossValidation(NumFolds, foldN, out trainSet, out testSet);
            }
            else
            {
                Dataset.SplitForCrossValidation(NumFolds, foldN, out trainSet, out testSet);
            }

            BeforeFold(foldN, trainSet, testSet);

            // pefrorm mapping
            ILabeledDataset <LblT, ModelExT> mappedTrainSet = MapTrainSet(foldN, trainSet);
            ILabeledDataset <LblT, ModelExT> mappedTestSet  = MapTestSet(foldN, testSet);

            // validate
            foreach (IModel <LblT, ModelExT> model in Models)
            {
                string modelName    = GetModelName(model);
                var    modelProfile = new CrossValidationTimeProfile
                {
                    FoldN          = foldN,
                    ModelName      = modelName,
                    FoldStartTime  = foldProfile.FoldStartTime,
                    TrainStartTime = DateTime.Now
                };
                foldProfiles.TryAdd(modelName, modelProfile);

                DoRunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile);
            }

            foldProfile.FoldEndTime = DateTime.Now;
            AfterFold(foldN, trainSet, testSet);
        }
Exemple #8
0
        public ILabeledDataset <LblT> ConvertDataset(Type newExType, bool move)
        {
            Utils.ThrowException(newExType == null ? new ArgumentNullException("newExType") : null);
            ILabeledDataset <LblT> newDataset = null;
            ArrayList <LabeledExample <LblT, object> > tmp = new ArrayList <LabeledExample <LblT, object> >(mItems.Count);

            for (int i = 0; i < mItems.Count; i++)
            {
                tmp.Add(new LabeledExample <LblT, object>(mItems[i].Label, ModelUtils.ConvertExample(mItems[i].Example, newExType))); // throws ArgumentValueException
                if (move)
                {
                    mItems[i] = null;
                }
            }
            if (move)
            {
                mItems.Clear();
            }
            if (newExType == typeof(SparseVector <double>))
            {
                newDataset = new LabeledDataset <LblT, SparseVector <double> >(tmp);
            }
            else if (newExType == typeof(SparseVector <double> .ReadOnly))
            {
                newDataset = new LabeledDataset <LblT, SparseVector <double> .ReadOnly>(tmp);
            }
            else if (newExType == typeof(BinaryVector))
            {
                newDataset = new LabeledDataset <LblT, BinaryVector>(tmp);
            }
            else if (newExType == typeof(BinaryVector.ReadOnly))
            {
                newDataset = new LabeledDataset <LblT, BinaryVector.ReadOnly>(tmp);
            }
            else
            {
                throw new ArgumentNotSupportedException("newExType");
            }
            return(newDataset);
        }
Exemple #9
0
        protected override ILabeledDataset <LblT, SparseVector <double> > BeforeTrain(int foldN, IModel <LblT, SparseVector <double> > model,
                                                                                      ILabeledDataset <LblT, string> trainSet, ILabeledDataset <LblT, SparseVector <double> > mappedTrainSet)
        {
            mappedTrainSet = base.BeforeTrain(foldN, model, trainSet, mappedTrainSet);

            // add fold's models for report
            for (int i = 0; !mFoldModels.TryAdd(new Tuple <int, int>(foldN, i), model); i++)
            {
            }

            return(mappedTrainSet);
        }
Exemple #10
0
 public List <SparseVector <double> > Initialize(ILabeledDataset <LabelT, string> labeledDataset)
 {
     return(Initialize(labeledDataset, false));
 }
Exemple #11
0
        public List <SparseVector <double> > Initialize(ILabeledDataset <LabelT, string> labeledDataset, bool largeScale)
        {
            bool normalizeVectors = NormalizeVectors;

            NormalizeVectors = false;
            List <SparseVector <double> > bowData = base.Initialize(labeledDataset.Select(d => d.Example), largeScale);

            NormalizeVectors = normalizeVectors;

            // count word label frequencies
            var labelWordCounts = new Dictionary <LabelT, Dictionary <int, int> >();

            for (int i = 0; i < bowData.Count; i++)
            {
                foreach (IdxDat <double> idxDat in bowData[i])
                {
                    LabelT label = labeledDataset[i].Label;
                    Dictionary <int, int> wordCounts;
                    if (!labelWordCounts.TryGetValue(label, out wordCounts))
                    {
                        labelWordCounts.Add(label, wordCounts = new Dictionary <int, int>());
                    }
                    int count;
                    if (!wordCounts.TryGetValue(idxDat.Idx, out count))
                    {
                        wordCounts.Add(idxDat.Idx, 1);
                    }
                    else
                    {
                        wordCounts[idxDat.Idx] = count + 1;
                    }
                }
            }

            // calc deltas
            int labelCount = labelWordCounts.Count;
            var counts     = new List <double>();

            foreach (Word word in Words)
            {
                counts.Clear();
                foreach (KeyValuePair <LabelT, Dictionary <int, int> > kv in labelWordCounts)
                {
                    int count;
                    if (kv.Value.TryGetValue(word.mIdx, out count))
                    {
                        counts.Add(count);
                    }
                }
                if (counts.Any())
                {
                    double max = counts.Max();
                    mWordDeltas.Add(word.mIdx, Math.Abs(Math.Log(
                                                            max / Math.Max(counts.Sum() - max, 1) * (labelCount - 1), 2)));
                }
                else
                {
                    mWordDeltas.Add(word.mIdx, 1);
                }
            }

            // transform vectors using deltas
            var bowDataset = new List <SparseVector <double> >();

            foreach (SparseVector <double> bow in bowData)
            {
                CalcDeltaBow(bow, normalizeVectors);
                bowDataset.Add(bow);
            }

            return(bowDataset);
        }
        protected override ILabeledDataset <SentimentLabel, SparseVector <double> > BeforeTrain(int foldN, IModel <SentimentLabel, SparseVector <double> > model,
                                                                                                ILabeledDataset <SentimentLabel, Tweet> trainSet, ILabeledDataset <SentimentLabel, SparseVector <double> > mappedTrainSet)
        {
            mappedTrainSet = base.BeforeTrain(foldN, model, trainSet, mappedTrainSet);

            // replication wrapper needs special treatment
            if (model is ReplicationWrapperClassifier)
            {
                ((ReplicationWrapperClassifier)model).BowSpace = mFoldBowSpaces[foldN];
            }

            // add fold's models for report
            for (int i = 0; !mFoldModels.TryAdd(new Tuple <int, int>(foldN, i), model); i++)
            {
            }

            return(mappedTrainSet);
        }
 protected override ILabeledDataset <SentimentLabel, SparseVector <double> > MapTestSet(int foldN, ILabeledDataset <SentimentLabel, Tweet> testSet)
 {
     return(new LabeledDataset <SentimentLabel, SparseVector <double> >(testSet.Select(le =>
     {
         SparseVector <double> sparseVector = mFoldBowSpaces[foldN].ProcessDocument(le.Example.Text);
         return new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, sparseVector);
     })));
 }
Exemple #14
0
 protected virtual ILabeledDataset <LblT, ModelExT> BeforeTest(int foldN, IModel <LblT, ModelExT> model,
                                                               ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet)
 {
     return(OnBeforeTest != null?OnBeforeTest(this, foldN, model, testSet, mappedTestSet) : mappedTestSet);
 }
Exemple #15
0
 protected override void DoRunModel(int foldN, IModel <LblT, ModelExT> model,
                                    ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet,
                                    ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile)
 {
     RunModel(foldN, model, trainSet, mappedTrainSet, testSet, mappedTestSet, modelProfile);
 }
Exemple #16
0
 protected virtual ILabeledDataset <LblT, ModelExT> MapTestSet(int foldN, ILabeledDataset <LblT, InputExT> testSet)
 {
     return(OnTestSetMap != null?OnTestSetMap(this, foldN, testSet) : (ILabeledDataset <LblT, ModelExT>)testSet);
 }
        protected override ILabeledDataset <SentimentLabel, SparseVector <double> > MapTrainSet(int foldN, ILabeledDataset <SentimentLabel, Tweet> trainSet)
        {
            BowSpace bowSpace;

            Preconditions.CheckState(!mFoldBowSpaces.TryGetValue(foldN, out bowSpace));
            Preconditions.CheckState(mFoldBowSpaces.TryAdd(foldN, bowSpace = BowSpaceFunc()));

            List <SparseVector <double> > bowData = bowSpace is DeltaBowSpace <SentimentLabel>
                                                    ?((DeltaBowSpace <SentimentLabel>)bowSpace).Initialize(new LabeledDataset <SentimentLabel, string>(trainSet
                                                                                                                                                       .Select(d => new LabeledExample <SentimentLabel, string>(d.Label, d.Example.Text))))
                                                        : bowSpace.Initialize(trainSet.Select(d => d.Example.Text));

            var bowDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >();

            for (int i = 0; i < bowData.Count; i++)
            {
                bowDataset.Add(trainSet[i].Label, bowData[i]);
            }

            return(bowDataset);
        }
Exemple #18
0
 protected abstract void DoRunModel(int foldN, IModel <LblT, ModelExT> model,
                                    ILabeledDataset <LblT, InputExT> trainSet, ILabeledDataset <LblT, ModelExT> mappedTrainSet,
                                    ILabeledDataset <LblT, InputExT> testSet, ILabeledDataset <LblT, ModelExT> mappedTestSet, CrossValidationTimeProfile modelProfile);