public void Train(ILabeledExampleCollection <LblT, BinaryVector> dataset)
 {
     Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
     Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
     mLambda = null;                                                                                                                     // allow GC to collect this
     mLambda = MaxEnt.Gis(dataset, mCutOff, mNumIter, mMoveData, /*mtxFileName=*/ null, ref mIdxToLbl, mNumThreads, /*allowedDiff=*/ 0); // *** allowedDiff
 }
        public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            Preconditions.CheckNotNull(dataset);

            var trainDataset = new LabeledDataset <LblT, ExT>(dataset);

            for (int i = 0; i < mInnerModels.Length; i++)
            {
                if (mInnerModels[i] == null)
                {
                    mInnerModels[i] = CreateModel(i);
                }
                mInnerModels[i].Train(GetTrainSet(i, mInnerModels[i], trainDataset));
            }

            foreach (LabeledExample <LblT, ExT> le in trainDataset)
            {
                LabeledExample <LblT, ExT> le_ = le;
                string      key         = StringOf(mInnerModels.Select(m => m.Predict(le_.Example).BestClassLabel));
                VotingEntry votingEntry = mVotingEntries[key];
                votingEntry.LabelCounts[le.Label]++;
            }
            foreach (VotingEntry entry in mVotingEntries.Values)
            {
                PerformVoting(entry);
            }

            IsTrained = true;
        }
Exemple #3
0
        public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            Dispose();
            int[] trainSet = new int[dataset.Count];
            int[] labels   = new int[dataset.Count];
            int   j        = 0;

            foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset)
            {
                SparseVector <double> vec = lblEx.Example;
                int[]   idx = new int[vec.Count];
                float[] val = new float[vec.Count];
                for (int i = 0; i < vec.Count; i++)
                {
                    idx[i] = vec.InnerIdx[i] + 1;    // *** indices are 1-based in SvmLightLib
                    val[i] = (float)vec.InnerDat[i]; // *** loss of precision (double -> float)
                }
                int lbl;
                if (!mLblToId.TryGetValue(lblEx.Label, out lbl))
                {
                    mLblToId.Add(lblEx.Label, lbl = mLblToId.Count + 1); // *** labels start with 1 in SvmLightLib
                    mIdxToLbl.Add(lblEx.Label);
                }
                trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl);
            }
            mModelId = SvmLightLib.TrainMulticlassModel(string.Format("-c {0} -e {1}", mC.ToString(CultureInfo.InvariantCulture), mEps.ToString(CultureInfo.InvariantCulture)),
                                                        trainSet.Length, trainSet);
            // delete training vectors
            foreach (int vecIdx in trainSet)
            {
                SvmLightLib.DeleteFeatureVector(vecIdx);
            }
        }
        public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            Dictionary <LblT, int> counter = new Dictionary <LblT, int>(mLblCmp);

            foreach (LabeledExample <LblT, ExT> lblEx in dataset)
            {
                int count;
                if (counter.TryGetValue(lblEx.Label, out count))
                {
                    counter[lblEx.Label] = count + 1;
                }
                else
                {
                    counter.Add(lblEx.Label, 1);
                }
            }
            mPrediction = new Prediction <LblT>();
            foreach (KeyValuePair <LblT, int> keyVal in counter)
            {
                mPrediction.Inner.Add(new KeyDat <double, LblT>((double)keyVal.Value / (double)dataset.Count, keyVal.Key));
            }
            mPrediction.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance);
        }
Exemple #5
0
 public void Train(ILabeledExampleCollection <LblT, BinaryVector> dataset)
 {
     Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
     Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
     PrecomputeProbabilities(dataset);
     mDatasetCount = dataset.Count;
 }
        public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            Preconditions.CheckNotNull(dataset);

            var trainDataset = new LabeledDataset <LblT, ExT>(dataset);

            for (int i = 0; i < mInnerModels.Length; i++)
            {
                if (mInnerModels[i] == null)
                {
                    mInnerModels[i] = CreateModel(i);
                }
                mInnerModels[i].Train(GetTrainSet(i, mInnerModels[i], trainDataset));
            }

            foreach (LabeledExample <LblT, ExT> le in trainDataset)
            {
                LabeledExample <LblT, ExT> le_ = le;
                double[] scores = GetPredictionScores(mInnerModels.Select(m => m.Predict(le_.Example)).ToArray()).ToArray();
                mTagDistrTable.AddCount(le.Label, scores);
            }
            mTagDistrTable.Calculate();

            IsTrained = true;
        }
        public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            var binaryDataset = new LabeledDataset <LblT, ExT>(dataset.Select(le =>
                                                                              new LabeledExample <LblT, ExT>(le.Label.Equals(OneLabel) ? OneLabel : OtherLabel, le.Example)));

            mBinaryModel.Train(binaryDataset);
            IsTrained = true;
        }
Exemple #8
0
        private static SparseMatrix <double> CreateObservationMatrix <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, ref LblT[] idxToLbl)
        {
            ArrayList <LblT>       tmp      = new ArrayList <LblT>();
            Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>();

            foreach (LabeledExample <LblT, BinaryVector> labeledExample in dataset)
            {
                if (!lblToIdx.ContainsKey(labeledExample.Label))
                {
                    lblToIdx.Add(labeledExample.Label, lblToIdx.Count);
                    tmp.Add(labeledExample.Label);
                }
            }
            // prepare struct for fast computation
            Dictionary <int, int>[] counter = new Dictionary <int, int> [tmp.Count];
            for (int j = 0; j < counter.Length; j++)
            {
                counter[j] = new Dictionary <int, int>();
            }
            // count features
            int    i  = 0;
            object id = new object();

            foreach (LabeledExample <LblT, BinaryVector> labeledExample in dataset)
            {
                mLogger.ProgressFast(id, "CreateObservationMatrix", "{0} / {1}", ++i, dataset.Count);
                int lblIdx = lblToIdx[labeledExample.Label];
                int val;
                foreach (int idx in labeledExample.Example)
                {
                    if (counter[lblIdx].TryGetValue(idx, out val))
                    {
                        counter[lblIdx][idx] = val + 1;
                    }
                    else
                    {
                        counter[lblIdx].Add(idx, 1);
                    }
                }
            }
            // create sparse matrix
            SparseMatrix <double> mtx = new SparseMatrix <double>();

            for (int j = 0; j < counter.Length; j++)
            {
                SparseVector <double> vec = new SparseVector <double>();
                foreach (KeyValuePair <int, int> item in counter[j])
                {
                    vec.InnerIdx.Add(item.Key);
                    vec.InnerDat.Add(item.Value);
                }
                vec.Sort();
                mtx[j] = vec;
            }
            idxToLbl = tmp.ToArray();
            return(mtx);
        }
        public void Train(ILabeledExampleCollection <LblT, BinaryVector> dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            mLambda = null;                                                                                                                                   // allow GC to collect this
            SparseMatrix <double> lambda
                = MaxEnt.Gis(dataset, mCutOff, mNumIter, mMoveData, /*mtxFileName=*/ null, ref mIdxToLbl, mNumThreads, /*allowedDiff=*/ 0, mLblCmp, mLogger); // *** allowedDiff

            mLambda = MaxEnt.PrepareForFastPrediction(lambda);
        }
Exemple #10
0
        public static UnlabeledDataset <ExT> ConvertToUnlabeledDataset <LblT, ExT>(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            UnlabeledDataset <ExT> unlabeledDataset = new UnlabeledDataset <ExT>();

            foreach (LabeledExample <LblT, ExT> labeledExample in dataset)
            {
                unlabeledDataset.Add(labeledExample.Example);
            }
            return(unlabeledDataset);
        }
Exemple #11
0
 public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset)
 {
     Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
     Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
     mDatasetMtx = ModelUtils.GetTransposedMatrix(ModelUtils.ConvertToUnlabeledDataset(dataset));
     mLabels     = new ArrayList <LblT>();
     foreach (LabeledExample <LblT, SparseVector <double> > labeledExample in dataset)
     {
         mLabels.Add(labeledExample.Label);
     }
 }
 public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
 {
     foreach (ModelLabel modelLabel in ModelLabels.Take(ModelLabels.Count() - 1))
     {
         modelLabel.Model.Train(dataset);
         ModelLabel modelLabel_ = modelLabel;
         dataset = new LabeledDataset <LblT, ExT>(dataset.Where(le => !le.Label.Equals(modelLabel_.Label)));
     }
     ModelLabels.Last().Model.Train(dataset);
     IsTrained = true;
 }
Exemple #13
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
/*
 *          mLabelCounts = dataset.GroupBy(le => le.Label)
 *              .OrderByDescending(g => g.Count())
 *              .Select(g => new Tuple<SentimentLabel, int>(g.Key, g.Count()))
 *              .ToArray();
 */
            mInnerModel.Train(dataset);
            IsTrained = true;
        }
Exemple #14
0
        private static double GisFindMaxF <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset)
        {
            double maxVal = 0;

            foreach (LabeledExample <LblT, BinaryVector> item in dataset)
            {
                if (item.Example.Count > maxVal)
                {
                    maxVal = item.Example.Count;
                }
            }
            return(maxVal);
        }
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);

            var ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset
                                                                                 .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example)));

            mPosModel = TrainModel(ds, SentimentLabel.Positive, SentimentLabel.Negative, SentimentLabel.Neutral);
            mNegModel = TrainModel(ds, SentimentLabel.Negative, SentimentLabel.Positive, SentimentLabel.Neutral);
            mNeuModel = TrainModel(ds, SentimentLabel.Neutral, SentimentLabel.Positive, SentimentLabel.Negative);

            IsTrained = true;
        }
Exemple #16
0
        public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            Dispose();
            int[] trainSet = new int[dataset.Count];
            int[] labels   = new int[dataset.Count];
            Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>(mLblCmp);
            MultiSet <int>         lblCount = new MultiSet <int>();
            int j = 0;

            foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset)
            {
                SparseVector <double> vec = lblEx.Example;
                int[]   idx = new int[vec.Count];
                float[] val = new float[vec.Count];
                for (int i = 0; i < vec.Count; i++)
                {
                    idx[i] = vec.InnerIdx[i] + 1;
                    val[i] = (float)vec.InnerDat[i]; // *** cast to float
                }
                int lbl;
                if (!lblToIdx.TryGetValue(lblEx.Label, out lbl))
                {
                    lblToIdx.Add(lblEx.Label, lbl = lblToIdx.Count);
                    mIdxToLbl.Add(lblEx.Label);
                }
                Utils.ThrowException(lbl == 2 ? new ArgumentValueException("dataset") : null);
                trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl == 0 ? 1 : -1);
                lblCount.Add(lbl == 0 ? 1 : -1);
            }
            string costFactor = "";

            if (mBiasedCostFunction)
            {
                costFactor = "-j " + ((double)lblCount.GetCount(-1) / (double)lblCount.GetCount(1));
            }
            mModelId = SvmLightLib.TrainModel(string.Format(CultureInfo.InvariantCulture, "-v {0} -c {1} -t {2} -g {3} -d {4} -s {5} -r {6} -b {7} -e {8} -# {9} {10} {11}",
                                                            (int)mVerbosityLevel, mC, (int)mKernelType, mKernelParamGamma, mKernelParamD, mKernelParamS, mKernelParamC, mBiasedHyperplane ? 1 : 0,
                                                            mEps, mMaxIter, mCustomParams, costFactor), trainSet.Length, trainSet);
            // delete training vectors
            foreach (int vecIdx in trainSet)
            {
                SvmLightLib.DeleteFeatureVector(vecIdx);
            }
        }
Exemple #17
0
        public override sealed void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);

            var labeledDataset = (LabeledDataset <SentimentLabel, SparseVector <double> >)dataset;

            var trainDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(labeledDataset.Where(le => le.Label != SentimentLabel.Neutral));

            if (mBinaryClassifier == null)
            {
                mBinaryClassifier = CreateModel();
                mBinaryClassifier.Train(trainDataset);
            }

            IsTrained = true;

            /*Calculate positive and negative average distances*/
            int positiveTweetsNumber = 0;
            int negativeTweetsNumber = 0;

            PosAverageDistance = 0;
            NegAverageDistance = 0;

            foreach (LabeledExample <SentimentLabel, SparseVector <double> > example in trainDataset)
            {
                Prediction <SentimentLabel> prediction = mBinaryClassifier.Predict(example.Example);
                SentimentLabel bestLabelPredicted      = prediction.BestClassLabel;
                double         bestScorePredicted      = bestLabelPredicted == SentimentLabel.Negative ? -prediction.BestScore : prediction.BestScore;

                SentimentLabel actualLabel = example.Label;

                if (actualLabel == SentimentLabel.Positive)
                {
                    PosAverageDistance += bestScorePredicted;
                    positiveTweetsNumber++;
                }
                else if (actualLabel == SentimentLabel.Negative)
                {
                    NegAverageDistance += bestScorePredicted;
                    negativeTweetsNumber++;
                }
            }

            PosAverageDistance = PosAverageDistance / positiveTweetsNumber;
            NegAverageDistance = NegAverageDistance / negativeTweetsNumber;
        }
        public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            MultiSet <LblT> counter = new MultiSet <LblT>(mLblCmp);

            foreach (LabeledExample <LblT, ExT> lblEx in dataset)
            {
                counter.Add(lblEx.Label);
            }
            mPrediction = new Prediction <LblT>();
            foreach (KeyValuePair <LblT, int> keyVal in counter)
            {
                mPrediction.Inner.Add(new KeyDat <double, LblT>((double)keyVal.Value / (double)dataset.Count, keyVal.Key));
            }
            mPrediction.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance);
        }
Exemple #19
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);

            var posDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le =>
                                                                                                        new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == SentimentLabel.Positive
                    ? SentimentLabel.Positive : SentimentLabel.Negative, le.Example)));

            mPosClassifier = CreateModel();
            mPosClassifier.Train(posDataset);

            var negDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le =>
                                                                                                        new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == SentimentLabel.Negative
                    ? SentimentLabel.Negative : SentimentLabel.Positive, le.Example)));

            mNegClassifier = CreateModel();
            mNegClassifier.Train(negDataset);

            if (PosBiasCalibration != null || NegBiasCalibration != null)
            {
                var    labeledDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset);
                double?posBias        = Calibrate(true, labeledDataset);
                double?negBias        = Calibrate(false, labeledDataset);
                BiasToPosRate = posBias ?? BiasToPosRate;
                BiasToNegRate = negBias ?? BiasToNegRate;
            }

            mPosSortedScores = mNegSortedScores = null;
            mExampleScores   = dataset.Select(le =>
            {
                Prediction <SentimentLabel> posPrediction = mPosClassifier.Predict(le.Example);
                Prediction <SentimentLabel> negPrediction = mNegClassifier.Predict(le.Example);
                return(new ExampleScore
                {
                    Label = le.Label,
                    PosScore = posPrediction.BestClassLabel == SentimentLabel.Positive ? posPrediction.BestScore : -posPrediction.BestScore,
                    NegScore = negPrediction.BestClassLabel == SentimentLabel.Negative ? -negPrediction.BestScore : negPrediction.BestScore
                });
            }).ToArray();

            UpdateDistrTable();

            IsTrained = true;
        }
Exemple #20
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckState(BowSpace != null);
            var replDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >();

            foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset)
            {
                SparseVector <double> vector1, vector2;
                Replicate(le.Example, out vector1, out vector2);

                replDataset.Add(new LabeledExample <SentimentLabel, SparseVector <double> >(
                                    le.Label == SentimentLabel.Neutral ? SentimentLabel.Negative : le.Label, vector1));
                replDataset.Add(new LabeledExample <SentimentLabel, SparseVector <double> >(
                                    le.Label == SentimentLabel.Neutral ? SentimentLabel.Positive : le.Label, vector2));
            }

            mClassifier = CreateModel();
            mClassifier.Train(replDataset);

            IsTrained = true;
        }
Exemple #21
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);
            Preconditions.CheckArgumentRange(TagDistrTable == null || TagDistrTable.NumOfDimensions == 2);

            mBinModel = CreateModel();
            mBinModel.Train(new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Neutral)));

            TagDistrTable = new EnumTagDistrTable <SentimentLabel>(1, BinWidth, -5, 5, SentimentLabel.Exclude)
            {
                CalcDistrFunc = (tagCounts, values, tag) => ((double)tagCounts[tag] + 1) / (tagCounts.Values.Sum() + tagCounts.Count)     // use Laplace formula
            };
            foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset)
            {
                Prediction <SentimentLabel> prediction = mBinModel.Predict(le.Example);
                TagDistrTable.AddCount(le.Label, prediction.BestClassLabel == SentimentLabel.Positive ? prediction.BestScore : -prediction.BestScore);
            }
            TagDistrTable.Calculate();

            IsTrained = true;
        }
Exemple #22
0
        public void Train(ILabeledExampleCollection <double, SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            LSqrSparseMatrix mat = new LSqrSparseMatrix(dataset.Count);

            double[] rhs     = new double[dataset.Count];
            int      solSize = -1;
            int      i       = 0;

            foreach (LabeledExample <double, SparseVector <double> > labeledExample in dataset)
            {
                if (labeledExample.Example.LastNonEmptyIndex + 1 > solSize)
                {
                    solSize = labeledExample.Example.LastNonEmptyIndex + 1;
                }
                foreach (IdxDat <double> item in labeledExample.Example)
                {
                    mat.InsertValue(i, item.Idx, item.Dat);
                }
                rhs[i++] = labeledExample.Label;
            }
            Utils.ThrowException((mInitSol != null && mInitSol.Length != solSize) ? new ArgumentValueException("InitialSolution") : null);
            LSqrSparseMatrix matT = new LSqrSparseMatrix(solSize);

            i = 0;
            foreach (LabeledExample <double, SparseVector <double> > labeledExample in dataset)
            {
                foreach (IdxDat <double> item in labeledExample.Example)
                {
                    matT.InsertValue(item.Idx, i, item.Dat);
                }
                i++;
            }
            int numIter = mNumIter < 0 ? solSize + dataset.Count + 50 : mNumIter;

            mSol = new ArrayList <double>(LSqrDll.DoLSqr(solSize, mat, matT, mInitSol, rhs, numIter));
            mat.Dispose();
            matT.Dispose();
        }
Exemple #23
0
        private static SparseMatrix <double> TransposeDataset <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, bool clearDataset)
        {
            SparseMatrix <double> aux = new SparseMatrix <double>();
            int i = 0;

            if (clearDataset)
            {
                foreach (LabeledExample <LblT, BinaryVector> item in dataset)
                {
                    aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example);
                    item.Example.Clear();
                }
            }
            else
            {
                foreach (LabeledExample <LblT, BinaryVector> item in dataset)
                {
                    aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example);
                }
            }
            return(aux.GetTransposedCopy());
        }
Exemple #24
0
        public void Train(ILabeledExampleCollection <LblT, string> dataset)
        {
            Preconditions.CheckState(!IsTrained);
            Preconditions.CheckNotNull(dataset);
            Preconditions.CheckNotNull(BowSpace);
            Preconditions.CheckNotNull(FeatureProcessor);
            Preconditions.CheckNotNull(Model);

            // preprocess the text
            foreach (LabeledExample <LblT, string> le in dataset)
            {
                le.Example = FeatureProcessor.Run(le.Example);
            }

            // bow vectors
            List <SparseVector <double> > bowData = BowSpace is DeltaBowSpace <LblT>
                                                    ?(BowSpace as DeltaBowSpace <LblT>).Initialize(dataset as ILabeledDataset <LblT, string> ?? new LabeledDataset <LblT, string>(dataset))
                                                        : BowSpace.Initialize(dataset.Select(d => d.Example));
            var bowDataset = new LabeledDataset <LblT, SparseVector <double> >();

            for (int i = 0; i < bowData.Count; i++)
            {
                bowDataset.Add(dataset[i].Label, bowData[i]);
            }

            // train
            if (OnTrainModel == null)
            {
                Model.Train(bowDataset);
            }
            else
            {
                OnTrainModel(this, bowDataset);
            }

            IsTrained = true;
        }
        public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            mCentroids = new ArrayList <Pair <LblT, SparseVector <double> > >();
            Dictionary <LblT, ArrayList <SparseVector <double> > > tmp = new Dictionary <LblT, ArrayList <SparseVector <double> > >(mLblCmp);

            foreach (LabeledExample <LblT, SparseVector <double> > labeledExample in dataset)
            {
                if (!tmp.ContainsKey(labeledExample.Label))
                {
                    tmp.Add(labeledExample.Label, new ArrayList <SparseVector <double> >(new SparseVector <double>[] { labeledExample.Example }));
                }
                else
                {
                    tmp[labeledExample.Label].Add(labeledExample.Example);
                }
            }
            foreach (KeyValuePair <LblT, ArrayList <SparseVector <double> > > centroidData in tmp)
            {
                SparseVector <double> centroid = ModelUtils.ComputeCentroid(centroidData.Value, mNormalize ? CentroidType.NrmL2 : CentroidType.Avg);
                mCentroids.Add(new Pair <LblT, SparseVector <double> >(centroidData.Key, centroid));
            }
        }
 void IModel <LblT> .Train(ILabeledExampleCollection <LblT> dataset)
 {
     Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
     Utils.ThrowException(!(dataset is ILabeledExampleCollection <LblT, ExT>) ? new ArgumentTypeException("dataset") : null);
     Train((ILabeledExampleCollection <LblT, ExT>)dataset); // throws ArgumentValueException
 }
Exemple #27
0
        public static SparseMatrix <double> Gis <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, int cutOff, int numIter, bool clearDataset, string mtxFileName, ref LblT[] idxToLbl, int numThreads, double allowedDiff, IEqualityComparer <LblT> lblCmp, Logger logger)
        {
            logger.Info("Gis", "Creating observation matrix ...");
            SparseMatrix <double> observations = null;

            if (Utils.VerifyFileNameOpen(mtxFileName))
            {
                BinarySerializer reader = new BinarySerializer(mtxFileName, FileMode.Open);
                idxToLbl     = new ArrayList <LblT>(reader).ToArray();
                observations = new SparseMatrix <double>(reader);
                reader.Close();
            }
            else
            {
                observations = CreateObservationMatrix(dataset, ref idxToLbl, lblCmp, logger);
                if (Utils.VerifyFileNameCreate(mtxFileName))
                {
                    BinarySerializer writer = new BinarySerializer(mtxFileName, FileMode.Create);
                    new ArrayList <LblT>(idxToLbl).Save(writer);
                    observations.Save(writer);
                    writer.Close();
                }
            }
            int numClasses  = observations.GetLastNonEmptyRowIdx() + 1;
            int numExamples = dataset.Count;

            if (cutOff > 0)
            {
                logger.Info("Gis", "Performing cut-off ...");
                observations = CutOff(observations, cutOff);
            }
            logger.Info("Gis", "Preparing structures ...");
            SparseMatrix <double> lambda       = CopyStructure(observations);
            SparseMatrix <double> expectations = CopyStructure(observations);
            double f = GisFindMaxF(dataset);
            SparseMatrix <double> trainMtxTr = TransposeDataset(dataset, clearDataset);

            logger.Info("Gis", "Entering main loop ...");
            double[] oldLambda = null;
            if (allowedDiff > 0)
            {
                oldLambda = new double[lambda.CountValues()];
            }
            for (int i = 0; i < numIter; i++)
            {
                logger.Info("Gis", "Iteration {0} / {1} ...", i + 1, numIter);
                logger.Info("Gis", "Updating expectations ...");
                if (numThreads > 1)
                {
                    UpdateExpectationMatrix(numClasses, numExamples, trainMtxTr, lambda, expectations, numThreads, logger);
                }
                else
                {
                    UpdateExpectationMatrix(numClasses, numExamples, trainMtxTr, lambda, expectations, logger);
                }
                logger.Info("Gis", "Updating lambdas ...");
                GisUpdate(lambda, expectations, observations, f);
                Reset(expectations);
                // check lambda change
                if (allowedDiff > 0)
                {
                    int    j       = 0;
                    double maxDiff = 0;
                    foreach (IdxDat <SparseVector <double> > row in lambda)
                    {
                        foreach (IdxDat <double> item in row.Dat)
                        {
                            double diff = Math.Abs(item.Dat - oldLambda[j]);
                            if (diff > maxDiff)
                            {
                                maxDiff = diff;
                            }
                            oldLambda[j] = item.Dat;
                            j++;
                        }
                    }
                    logger.Info("Gis", "Max lambda diff: {0:0.0000}", maxDiff);
                    if (maxDiff <= allowedDiff)
                    {
                        logger.Info("Gis", "Max lambda diff is small enough. Exiting optimization loop.");
                        break;
                    }
                }
            }
            return(lambda);
        }
 public void Train(ILabeledExampleCollection <LblT> dataset)
 {
     Train((ILabeledExampleCollection <LblT, ExT>)dataset);
 }
Exemple #29
0
        public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            Dictionary <LblT, CentroidData> centroids = new Dictionary <LblT, CentroidData>(mLblCmp);

            foreach (LabeledExample <LblT, SparseVector <double> > labeledExample in dataset)
            {
                if (!centroids.ContainsKey(labeledExample.Label))
                {
                    CentroidData centroidData = new CentroidData();
                    centroidData.AddToSum(labeledExample.Example);
                    centroids.Add(labeledExample.Label, centroidData);
                }
                else
                {
                    CentroidData centroidData = centroids[labeledExample.Label];
                    centroidData.AddToSum(labeledExample.Example);
                }
            }
            foreach (CentroidData cenData in centroids.Values)
            {
                cenData.UpdateCentroidLen();
            }
            double learnRate = 1;

            double[][]            dotProd = null;
            SparseMatrix <double> dsMtx   = null;

            if (mIterations > 0)
            {
                dotProd = new double[centroids.Count][];
                dsMtx   = ModelUtils.GetTransposedMatrix(ModelUtils.ConvertToUnlabeledDataset(dataset));
            }
            for (int iter = 1; iter <= mIterations; iter++)
            {
                mLogger.Info("Train", "Iteration {0} / {1} ...", iter, mIterations);
                // compute dot products
                mLogger.Info("Train", "Computing dot products ...");
                int j = 0;
                foreach (KeyValuePair <LblT, CentroidData> labeledCentroid in centroids)
                {
                    mLogger.ProgressNormal(Logger.Level.Info, /*sender=*/ this, "Train", "Centroid {0} / {1} ...", j + 1, centroids.Count);
                    SparseVector <double> cenVec = labeledCentroid.Value.GetSparseVector();
                    dotProd[j] = ModelUtils.GetDotProductSimilarity(dsMtx, dataset.Count, cenVec);
                    j++;
                }
                // classify training examples
                mLogger.Info("Train", "Classifying training examples ...");
                int errCount = 0;
                for (int instIdx = 0; instIdx < dataset.Count; instIdx++)
                {
                    mLogger.ProgressFast(Logger.Level.Info, /*sender=*/ this, "Train", "Example {0} / {1} ...", instIdx + 1, dataset.Count);
                    double       maxSim           = double.MinValue;
                    CentroidData assignedCentroid = null;
                    CentroidData actualCentroid   = null;
                    LabeledExample <LblT, SparseVector <double> > labeledExample = dataset[instIdx];
                    SparseVector <double> vec = labeledExample.Example;
                    int cenIdx = 0;
                    foreach (KeyValuePair <LblT, CentroidData> labeledCentroid in centroids)
                    {
                        double sim = dotProd[cenIdx][instIdx];
                        if (sim > maxSim)
                        {
                            maxSim = sim; assignedCentroid = labeledCentroid.Value;
                        }
                        if (labeledCentroid.Key.Equals(labeledExample.Label))
                        {
                            actualCentroid = labeledCentroid.Value;
                        }
                        cenIdx++;
                    }
                    if (assignedCentroid != actualCentroid)
                    {
                        assignedCentroid.AddToDiff(-learnRate, vec);
                        actualCentroid.AddToDiff(learnRate, vec);
                        errCount++;
                    }
                }
                mLogger.Info("Train", "Training set error rate: {0:0.00}%", (double)errCount / (double)dataset.Count * 100.0);
                // update centroids
                int k = 0;
                foreach (CentroidData centroidData in centroids.Values)
                {
                    mLogger.ProgressNormal(Logger.Level.Info, /*sender=*/ this, "Train", "Centroid {0} / {1} ...", ++k, centroids.Count);
                    centroidData.Update(mPositiveValuesOnly);
                    centroidData.UpdateCentroidLen();
                }
                learnRate *= mDamping;
            }
            mCentroidMtxTr = new SparseMatrix <double>();
            mLabels        = new ArrayList <LblT>();
            int rowIdx = 0;

            foreach (KeyValuePair <LblT, CentroidData> labeledCentroid in centroids)
            {
                mCentroidMtxTr[rowIdx++] = labeledCentroid.Value.GetSparseVector();
                mLabels.Add(labeledCentroid.Key);
            }
            mCentroidMtxTr = mCentroidMtxTr.GetTransposedCopy();
        }
 public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
 {
     mClassifier = (SvmMulticlassClassifier <SentimentLabel>)CreateModel();
     mClassifier.Train(dataset);
 }