public void Train(ILabeledExampleCollection <LblT, BinaryVector> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); mLambda = null; // allow GC to collect this mLambda = MaxEnt.Gis(dataset, mCutOff, mNumIter, mMoveData, /*mtxFileName=*/ null, ref mIdxToLbl, mNumThreads, /*allowedDiff=*/ 0); // *** allowedDiff }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { Preconditions.CheckNotNull(dataset); var trainDataset = new LabeledDataset <LblT, ExT>(dataset); for (int i = 0; i < mInnerModels.Length; i++) { if (mInnerModels[i] == null) { mInnerModels[i] = CreateModel(i); } mInnerModels[i].Train(GetTrainSet(i, mInnerModels[i], trainDataset)); } foreach (LabeledExample <LblT, ExT> le in trainDataset) { LabeledExample <LblT, ExT> le_ = le; string key = StringOf(mInnerModels.Select(m => m.Predict(le_.Example).BestClassLabel)); VotingEntry votingEntry = mVotingEntries[key]; votingEntry.LabelCounts[le.Label]++; } foreach (VotingEntry entry in mVotingEntries.Values) { PerformVoting(entry); } IsTrained = true; }
public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); Dispose(); int[] trainSet = new int[dataset.Count]; int[] labels = new int[dataset.Count]; int j = 0; foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset) { SparseVector <double> vec = lblEx.Example; int[] idx = new int[vec.Count]; float[] val = new float[vec.Count]; for (int i = 0; i < vec.Count; i++) { idx[i] = vec.InnerIdx[i] + 1; // *** indices are 1-based in SvmLightLib val[i] = (float)vec.InnerDat[i]; // *** loss of precision (double -> float) } int lbl; if (!mLblToId.TryGetValue(lblEx.Label, out lbl)) { mLblToId.Add(lblEx.Label, lbl = mLblToId.Count + 1); // *** labels start with 1 in SvmLightLib mIdxToLbl.Add(lblEx.Label); } trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl); } mModelId = SvmLightLib.TrainMulticlassModel(string.Format("-c {0} -e {1}", mC.ToString(CultureInfo.InvariantCulture), mEps.ToString(CultureInfo.InvariantCulture)), trainSet.Length, trainSet); // delete training vectors foreach (int vecIdx in trainSet) { SvmLightLib.DeleteFeatureVector(vecIdx); } }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); Dictionary <LblT, int> counter = new Dictionary <LblT, int>(mLblCmp); foreach (LabeledExample <LblT, ExT> lblEx in dataset) { int count; if (counter.TryGetValue(lblEx.Label, out count)) { counter[lblEx.Label] = count + 1; } else { counter.Add(lblEx.Label, 1); } } mPrediction = new Prediction <LblT>(); foreach (KeyValuePair <LblT, int> keyVal in counter) { mPrediction.Inner.Add(new KeyDat <double, LblT>((double)keyVal.Value / (double)dataset.Count, keyVal.Key)); } mPrediction.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); }
public void Train(ILabeledExampleCollection <LblT, BinaryVector> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); PrecomputeProbabilities(dataset); mDatasetCount = dataset.Count; }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { Preconditions.CheckNotNull(dataset); var trainDataset = new LabeledDataset <LblT, ExT>(dataset); for (int i = 0; i < mInnerModels.Length; i++) { if (mInnerModels[i] == null) { mInnerModels[i] = CreateModel(i); } mInnerModels[i].Train(GetTrainSet(i, mInnerModels[i], trainDataset)); } foreach (LabeledExample <LblT, ExT> le in trainDataset) { LabeledExample <LblT, ExT> le_ = le; double[] scores = GetPredictionScores(mInnerModels.Select(m => m.Predict(le_.Example)).ToArray()).ToArray(); mTagDistrTable.AddCount(le.Label, scores); } mTagDistrTable.Calculate(); IsTrained = true; }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { var binaryDataset = new LabeledDataset <LblT, ExT>(dataset.Select(le => new LabeledExample <LblT, ExT>(le.Label.Equals(OneLabel) ? OneLabel : OtherLabel, le.Example))); mBinaryModel.Train(binaryDataset); IsTrained = true; }
private static SparseMatrix <double> CreateObservationMatrix <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, ref LblT[] idxToLbl) { ArrayList <LblT> tmp = new ArrayList <LblT>(); Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>(); foreach (LabeledExample <LblT, BinaryVector> labeledExample in dataset) { if (!lblToIdx.ContainsKey(labeledExample.Label)) { lblToIdx.Add(labeledExample.Label, lblToIdx.Count); tmp.Add(labeledExample.Label); } } // prepare struct for fast computation Dictionary <int, int>[] counter = new Dictionary <int, int> [tmp.Count]; for (int j = 0; j < counter.Length; j++) { counter[j] = new Dictionary <int, int>(); } // count features int i = 0; object id = new object(); foreach (LabeledExample <LblT, BinaryVector> labeledExample in dataset) { mLogger.ProgressFast(id, "CreateObservationMatrix", "{0} / {1}", ++i, dataset.Count); int lblIdx = lblToIdx[labeledExample.Label]; int val; foreach (int idx in labeledExample.Example) { if (counter[lblIdx].TryGetValue(idx, out val)) { counter[lblIdx][idx] = val + 1; } else { counter[lblIdx].Add(idx, 1); } } } // create sparse matrix SparseMatrix <double> mtx = new SparseMatrix <double>(); for (int j = 0; j < counter.Length; j++) { SparseVector <double> vec = new SparseVector <double>(); foreach (KeyValuePair <int, int> item in counter[j]) { vec.InnerIdx.Add(item.Key); vec.InnerDat.Add(item.Value); } vec.Sort(); mtx[j] = vec; } idxToLbl = tmp.ToArray(); return(mtx); }
public void Train(ILabeledExampleCollection <LblT, BinaryVector> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); mLambda = null; // allow GC to collect this SparseMatrix <double> lambda = MaxEnt.Gis(dataset, mCutOff, mNumIter, mMoveData, /*mtxFileName=*/ null, ref mIdxToLbl, mNumThreads, /*allowedDiff=*/ 0, mLblCmp, mLogger); // *** allowedDiff mLambda = MaxEnt.PrepareForFastPrediction(lambda); }
public static UnlabeledDataset <ExT> ConvertToUnlabeledDataset <LblT, ExT>(ILabeledExampleCollection <LblT, ExT> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); UnlabeledDataset <ExT> unlabeledDataset = new UnlabeledDataset <ExT>(); foreach (LabeledExample <LblT, ExT> labeledExample in dataset) { unlabeledDataset.Add(labeledExample.Example); } return(unlabeledDataset); }
public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); mDatasetMtx = ModelUtils.GetTransposedMatrix(ModelUtils.ConvertToUnlabeledDataset(dataset)); mLabels = new ArrayList <LblT>(); foreach (LabeledExample <LblT, SparseVector <double> > labeledExample in dataset) { mLabels.Add(labeledExample.Label); } }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { foreach (ModelLabel modelLabel in ModelLabels.Take(ModelLabels.Count() - 1)) { modelLabel.Model.Train(dataset); ModelLabel modelLabel_ = modelLabel; dataset = new LabeledDataset <LblT, ExT>(dataset.Where(le => !le.Label.Equals(modelLabel_.Label))); } ModelLabels.Last().Model.Train(dataset); IsTrained = true; }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { /* * mLabelCounts = dataset.GroupBy(le => le.Label) * .OrderByDescending(g => g.Count()) * .Select(g => new Tuple<SentimentLabel, int>(g.Key, g.Count())) * .ToArray(); */ mInnerModel.Train(dataset); IsTrained = true; }
private static double GisFindMaxF <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset) { double maxVal = 0; foreach (LabeledExample <LblT, BinaryVector> item in dataset) { if (item.Example.Count > maxVal) { maxVal = item.Example.Count; } } return(maxVal); }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); var ds = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset .Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, le.Example))); mPosModel = TrainModel(ds, SentimentLabel.Positive, SentimentLabel.Negative, SentimentLabel.Neutral); mNegModel = TrainModel(ds, SentimentLabel.Negative, SentimentLabel.Positive, SentimentLabel.Neutral); mNeuModel = TrainModel(ds, SentimentLabel.Neutral, SentimentLabel.Positive, SentimentLabel.Negative); IsTrained = true; }
public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); Dispose(); int[] trainSet = new int[dataset.Count]; int[] labels = new int[dataset.Count]; Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>(mLblCmp); MultiSet <int> lblCount = new MultiSet <int>(); int j = 0; foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset) { SparseVector <double> vec = lblEx.Example; int[] idx = new int[vec.Count]; float[] val = new float[vec.Count]; for (int i = 0; i < vec.Count; i++) { idx[i] = vec.InnerIdx[i] + 1; val[i] = (float)vec.InnerDat[i]; // *** cast to float } int lbl; if (!lblToIdx.TryGetValue(lblEx.Label, out lbl)) { lblToIdx.Add(lblEx.Label, lbl = lblToIdx.Count); mIdxToLbl.Add(lblEx.Label); } Utils.ThrowException(lbl == 2 ? new ArgumentValueException("dataset") : null); trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl == 0 ? 1 : -1); lblCount.Add(lbl == 0 ? 1 : -1); } string costFactor = ""; if (mBiasedCostFunction) { costFactor = "-j " + ((double)lblCount.GetCount(-1) / (double)lblCount.GetCount(1)); } mModelId = SvmLightLib.TrainModel(string.Format(CultureInfo.InvariantCulture, "-v {0} -c {1} -t {2} -g {3} -d {4} -s {5} -r {6} -b {7} -e {8} -# {9} {10} {11}", (int)mVerbosityLevel, mC, (int)mKernelType, mKernelParamGamma, mKernelParamD, mKernelParamS, mKernelParamC, mBiasedHyperplane ? 1 : 0, mEps, mMaxIter, mCustomParams, costFactor), trainSet.Length, trainSet); // delete training vectors foreach (int vecIdx in trainSet) { SvmLightLib.DeleteFeatureVector(vecIdx); } }
public override sealed void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); var labeledDataset = (LabeledDataset <SentimentLabel, SparseVector <double> >)dataset; var trainDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(labeledDataset.Where(le => le.Label != SentimentLabel.Neutral)); if (mBinaryClassifier == null) { mBinaryClassifier = CreateModel(); mBinaryClassifier.Train(trainDataset); } IsTrained = true; /*Calculate positive and negative average distances*/ int positiveTweetsNumber = 0; int negativeTweetsNumber = 0; PosAverageDistance = 0; NegAverageDistance = 0; foreach (LabeledExample <SentimentLabel, SparseVector <double> > example in trainDataset) { Prediction <SentimentLabel> prediction = mBinaryClassifier.Predict(example.Example); SentimentLabel bestLabelPredicted = prediction.BestClassLabel; double bestScorePredicted = bestLabelPredicted == SentimentLabel.Negative ? -prediction.BestScore : prediction.BestScore; SentimentLabel actualLabel = example.Label; if (actualLabel == SentimentLabel.Positive) { PosAverageDistance += bestScorePredicted; positiveTweetsNumber++; } else if (actualLabel == SentimentLabel.Negative) { NegAverageDistance += bestScorePredicted; negativeTweetsNumber++; } } PosAverageDistance = PosAverageDistance / positiveTweetsNumber; NegAverageDistance = NegAverageDistance / negativeTweetsNumber; }
public void Train(ILabeledExampleCollection <LblT, ExT> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); MultiSet <LblT> counter = new MultiSet <LblT>(mLblCmp); foreach (LabeledExample <LblT, ExT> lblEx in dataset) { counter.Add(lblEx.Label); } mPrediction = new Prediction <LblT>(); foreach (KeyValuePair <LblT, int> keyVal in counter) { mPrediction.Inner.Add(new KeyDat <double, LblT>((double)keyVal.Value / (double)dataset.Count, keyVal.Key)); } mPrediction.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance); }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); var posDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == SentimentLabel.Positive ? SentimentLabel.Positive : SentimentLabel.Negative, le.Example))); mPosClassifier = CreateModel(); mPosClassifier.Train(posDataset); var negDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == SentimentLabel.Negative ? SentimentLabel.Negative : SentimentLabel.Positive, le.Example))); mNegClassifier = CreateModel(); mNegClassifier.Train(negDataset); if (PosBiasCalibration != null || NegBiasCalibration != null) { var labeledDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset); double?posBias = Calibrate(true, labeledDataset); double?negBias = Calibrate(false, labeledDataset); BiasToPosRate = posBias ?? BiasToPosRate; BiasToNegRate = negBias ?? BiasToNegRate; } mPosSortedScores = mNegSortedScores = null; mExampleScores = dataset.Select(le => { Prediction <SentimentLabel> posPrediction = mPosClassifier.Predict(le.Example); Prediction <SentimentLabel> negPrediction = mNegClassifier.Predict(le.Example); return(new ExampleScore { Label = le.Label, PosScore = posPrediction.BestClassLabel == SentimentLabel.Positive ? posPrediction.BestScore : -posPrediction.BestScore, NegScore = negPrediction.BestClassLabel == SentimentLabel.Negative ? -negPrediction.BestScore : negPrediction.BestScore }); }).ToArray(); UpdateDistrTable(); IsTrained = true; }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckState(BowSpace != null); var replDataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(); foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset) { SparseVector <double> vector1, vector2; Replicate(le.Example, out vector1, out vector2); replDataset.Add(new LabeledExample <SentimentLabel, SparseVector <double> >( le.Label == SentimentLabel.Neutral ? SentimentLabel.Negative : le.Label, vector1)); replDataset.Add(new LabeledExample <SentimentLabel, SparseVector <double> >( le.Label == SentimentLabel.Neutral ? SentimentLabel.Positive : le.Label, vector2)); } mClassifier = CreateModel(); mClassifier.Train(replDataset); IsTrained = true; }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { Preconditions.CheckNotNull(dataset); Preconditions.CheckArgumentRange(TagDistrTable == null || TagDistrTable.NumOfDimensions == 2); mBinModel = CreateModel(); mBinModel.Train(new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label != SentimentLabel.Neutral))); TagDistrTable = new EnumTagDistrTable <SentimentLabel>(1, BinWidth, -5, 5, SentimentLabel.Exclude) { CalcDistrFunc = (tagCounts, values, tag) => ((double)tagCounts[tag] + 1) / (tagCounts.Values.Sum() + tagCounts.Count) // use Laplace formula }; foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset) { Prediction <SentimentLabel> prediction = mBinModel.Predict(le.Example); TagDistrTable.AddCount(le.Label, prediction.BestClassLabel == SentimentLabel.Positive ? prediction.BestScore : -prediction.BestScore); } TagDistrTable.Calculate(); IsTrained = true; }
public void Train(ILabeledExampleCollection <double, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); LSqrSparseMatrix mat = new LSqrSparseMatrix(dataset.Count); double[] rhs = new double[dataset.Count]; int solSize = -1; int i = 0; foreach (LabeledExample <double, SparseVector <double> > labeledExample in dataset) { if (labeledExample.Example.LastNonEmptyIndex + 1 > solSize) { solSize = labeledExample.Example.LastNonEmptyIndex + 1; } foreach (IdxDat <double> item in labeledExample.Example) { mat.InsertValue(i, item.Idx, item.Dat); } rhs[i++] = labeledExample.Label; } Utils.ThrowException((mInitSol != null && mInitSol.Length != solSize) ? new ArgumentValueException("InitialSolution") : null); LSqrSparseMatrix matT = new LSqrSparseMatrix(solSize); i = 0; foreach (LabeledExample <double, SparseVector <double> > labeledExample in dataset) { foreach (IdxDat <double> item in labeledExample.Example) { matT.InsertValue(item.Idx, i, item.Dat); } i++; } int numIter = mNumIter < 0 ? solSize + dataset.Count + 50 : mNumIter; mSol = new ArrayList <double>(LSqrDll.DoLSqr(solSize, mat, matT, mInitSol, rhs, numIter)); mat.Dispose(); matT.Dispose(); }
private static SparseMatrix <double> TransposeDataset <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, bool clearDataset) { SparseMatrix <double> aux = new SparseMatrix <double>(); int i = 0; if (clearDataset) { foreach (LabeledExample <LblT, BinaryVector> item in dataset) { aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example); item.Example.Clear(); } } else { foreach (LabeledExample <LblT, BinaryVector> item in dataset) { aux[i++] = ModelUtils.ConvertExample <SparseVector <double> >(item.Example); } } return(aux.GetTransposedCopy()); }
public void Train(ILabeledExampleCollection <LblT, string> dataset) { Preconditions.CheckState(!IsTrained); Preconditions.CheckNotNull(dataset); Preconditions.CheckNotNull(BowSpace); Preconditions.CheckNotNull(FeatureProcessor); Preconditions.CheckNotNull(Model); // preprocess the text foreach (LabeledExample <LblT, string> le in dataset) { le.Example = FeatureProcessor.Run(le.Example); } // bow vectors List <SparseVector <double> > bowData = BowSpace is DeltaBowSpace <LblT> ?(BowSpace as DeltaBowSpace <LblT>).Initialize(dataset as ILabeledDataset <LblT, string> ?? new LabeledDataset <LblT, string>(dataset)) : BowSpace.Initialize(dataset.Select(d => d.Example)); var bowDataset = new LabeledDataset <LblT, SparseVector <double> >(); for (int i = 0; i < bowData.Count; i++) { bowDataset.Add(dataset[i].Label, bowData[i]); } // train if (OnTrainModel == null) { Model.Train(bowDataset); } else { OnTrainModel(this, bowDataset); } IsTrained = true; }
public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); mCentroids = new ArrayList <Pair <LblT, SparseVector <double> > >(); Dictionary <LblT, ArrayList <SparseVector <double> > > tmp = new Dictionary <LblT, ArrayList <SparseVector <double> > >(mLblCmp); foreach (LabeledExample <LblT, SparseVector <double> > labeledExample in dataset) { if (!tmp.ContainsKey(labeledExample.Label)) { tmp.Add(labeledExample.Label, new ArrayList <SparseVector <double> >(new SparseVector <double>[] { labeledExample.Example })); } else { tmp[labeledExample.Label].Add(labeledExample.Example); } } foreach (KeyValuePair <LblT, ArrayList <SparseVector <double> > > centroidData in tmp) { SparseVector <double> centroid = ModelUtils.ComputeCentroid(centroidData.Value, mNormalize ? CentroidType.NrmL2 : CentroidType.Avg); mCentroids.Add(new Pair <LblT, SparseVector <double> >(centroidData.Key, centroid)); } }
void IModel <LblT> .Train(ILabeledExampleCollection <LblT> dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(!(dataset is ILabeledExampleCollection <LblT, ExT>) ? new ArgumentTypeException("dataset") : null); Train((ILabeledExampleCollection <LblT, ExT>)dataset); // throws ArgumentValueException }
public static SparseMatrix <double> Gis <LblT>(ILabeledExampleCollection <LblT, BinaryVector> dataset, int cutOff, int numIter, bool clearDataset, string mtxFileName, ref LblT[] idxToLbl, int numThreads, double allowedDiff, IEqualityComparer <LblT> lblCmp, Logger logger) { logger.Info("Gis", "Creating observation matrix ..."); SparseMatrix <double> observations = null; if (Utils.VerifyFileNameOpen(mtxFileName)) { BinarySerializer reader = new BinarySerializer(mtxFileName, FileMode.Open); idxToLbl = new ArrayList <LblT>(reader).ToArray(); observations = new SparseMatrix <double>(reader); reader.Close(); } else { observations = CreateObservationMatrix(dataset, ref idxToLbl, lblCmp, logger); if (Utils.VerifyFileNameCreate(mtxFileName)) { BinarySerializer writer = new BinarySerializer(mtxFileName, FileMode.Create); new ArrayList <LblT>(idxToLbl).Save(writer); observations.Save(writer); writer.Close(); } } int numClasses = observations.GetLastNonEmptyRowIdx() + 1; int numExamples = dataset.Count; if (cutOff > 0) { logger.Info("Gis", "Performing cut-off ..."); observations = CutOff(observations, cutOff); } logger.Info("Gis", "Preparing structures ..."); SparseMatrix <double> lambda = CopyStructure(observations); SparseMatrix <double> expectations = CopyStructure(observations); double f = GisFindMaxF(dataset); SparseMatrix <double> trainMtxTr = TransposeDataset(dataset, clearDataset); logger.Info("Gis", "Entering main loop ..."); double[] oldLambda = null; if (allowedDiff > 0) { oldLambda = new double[lambda.CountValues()]; } for (int i = 0; i < numIter; i++) { logger.Info("Gis", "Iteration {0} / {1} ...", i + 1, numIter); logger.Info("Gis", "Updating expectations ..."); if (numThreads > 1) { UpdateExpectationMatrix(numClasses, numExamples, trainMtxTr, lambda, expectations, numThreads, logger); } else { UpdateExpectationMatrix(numClasses, numExamples, trainMtxTr, lambda, expectations, logger); } logger.Info("Gis", "Updating lambdas ..."); GisUpdate(lambda, expectations, observations, f); Reset(expectations); // check lambda change if (allowedDiff > 0) { int j = 0; double maxDiff = 0; foreach (IdxDat <SparseVector <double> > row in lambda) { foreach (IdxDat <double> item in row.Dat) { double diff = Math.Abs(item.Dat - oldLambda[j]); if (diff > maxDiff) { maxDiff = diff; } oldLambda[j] = item.Dat; j++; } } logger.Info("Gis", "Max lambda diff: {0:0.0000}", maxDiff); if (maxDiff <= allowedDiff) { logger.Info("Gis", "Max lambda diff is small enough. Exiting optimization loop."); break; } } } return(lambda); }
public void Train(ILabeledExampleCollection <LblT> dataset) { Train((ILabeledExampleCollection <LblT, ExT>)dataset); }
public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset) { Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null); Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null); Dictionary <LblT, CentroidData> centroids = new Dictionary <LblT, CentroidData>(mLblCmp); foreach (LabeledExample <LblT, SparseVector <double> > labeledExample in dataset) { if (!centroids.ContainsKey(labeledExample.Label)) { CentroidData centroidData = new CentroidData(); centroidData.AddToSum(labeledExample.Example); centroids.Add(labeledExample.Label, centroidData); } else { CentroidData centroidData = centroids[labeledExample.Label]; centroidData.AddToSum(labeledExample.Example); } } foreach (CentroidData cenData in centroids.Values) { cenData.UpdateCentroidLen(); } double learnRate = 1; double[][] dotProd = null; SparseMatrix <double> dsMtx = null; if (mIterations > 0) { dotProd = new double[centroids.Count][]; dsMtx = ModelUtils.GetTransposedMatrix(ModelUtils.ConvertToUnlabeledDataset(dataset)); } for (int iter = 1; iter <= mIterations; iter++) { mLogger.Info("Train", "Iteration {0} / {1} ...", iter, mIterations); // compute dot products mLogger.Info("Train", "Computing dot products ..."); int j = 0; foreach (KeyValuePair <LblT, CentroidData> labeledCentroid in centroids) { mLogger.ProgressNormal(Logger.Level.Info, /*sender=*/ this, "Train", "Centroid {0} / {1} ...", j + 1, centroids.Count); SparseVector <double> cenVec = labeledCentroid.Value.GetSparseVector(); dotProd[j] = ModelUtils.GetDotProductSimilarity(dsMtx, dataset.Count, cenVec); j++; } // classify training examples mLogger.Info("Train", "Classifying training examples ..."); int errCount = 0; for (int instIdx = 0; instIdx < dataset.Count; instIdx++) { mLogger.ProgressFast(Logger.Level.Info, /*sender=*/ this, "Train", "Example {0} / {1} ...", instIdx + 1, dataset.Count); double maxSim = double.MinValue; CentroidData assignedCentroid = null; CentroidData actualCentroid = null; LabeledExample <LblT, SparseVector <double> > labeledExample = dataset[instIdx]; SparseVector <double> vec = labeledExample.Example; int cenIdx = 0; foreach (KeyValuePair <LblT, CentroidData> labeledCentroid in centroids) { double sim = dotProd[cenIdx][instIdx]; if (sim > maxSim) { maxSim = sim; assignedCentroid = labeledCentroid.Value; } if (labeledCentroid.Key.Equals(labeledExample.Label)) { actualCentroid = labeledCentroid.Value; } cenIdx++; } if (assignedCentroid != actualCentroid) { assignedCentroid.AddToDiff(-learnRate, vec); actualCentroid.AddToDiff(learnRate, vec); errCount++; } } mLogger.Info("Train", "Training set error rate: {0:0.00}%", (double)errCount / (double)dataset.Count * 100.0); // update centroids int k = 0; foreach (CentroidData centroidData in centroids.Values) { mLogger.ProgressNormal(Logger.Level.Info, /*sender=*/ this, "Train", "Centroid {0} / {1} ...", ++k, centroids.Count); centroidData.Update(mPositiveValuesOnly); centroidData.UpdateCentroidLen(); } learnRate *= mDamping; } mCentroidMtxTr = new SparseMatrix <double>(); mLabels = new ArrayList <LblT>(); int rowIdx = 0; foreach (KeyValuePair <LblT, CentroidData> labeledCentroid in centroids) { mCentroidMtxTr[rowIdx++] = labeledCentroid.Value.GetSparseVector(); mLabels.Add(labeledCentroid.Key); } mCentroidMtxTr = mCentroidMtxTr.GetTransposedCopy(); }
public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset) { mClassifier = (SvmMulticlassClassifier <SentimentLabel>)CreateModel(); mClassifier.Train(dataset); }