public override void Run(object[] args) { // get labeled data BinarySvm classifierInst = BinarySvm.RunInstanceNull(args); var labeledData = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"]; // convert dataset to binary vector var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false); // cross validation ...with the convenience class var validation = new CrossValidator <string, BinaryVector> { NumFolds = 10, // default IsStratified = true, // default ExpName = "", // default Dataset = ds, OnAfterTrain = (sender, foldN, model, trainSet) => { var m = (NaiveBayesClassifier <string>)model; // do stuff after model is trained for a fold... }, OnAfterPrediction = (sender, foldN, model, ex, le, prediction) => { Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore); return(true); }, OnAfterFold = (sender, foldN, trainSet, foldPredictions) => { PerfMatrix <string> foldMatrix = sender.PerfData.GetPerfMatrix(sender.ExpName, sender.GetModelName(0), foldN); Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy()); } }; validation.Models.Add(new NaiveBayesClassifier <string>()); validation.Run(); Output.WriteLine("Sum confusion matrix:"); PerfMatrix <string> sumPerfMatrix = validation.PerfData.GetSumPerfMatrix("", validation.GetModelName(0)); Output.WriteLine(sumPerfMatrix.ToString()); Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy()); foreach (string label in validation.PerfData.GetLabels("", validation.GetModelName(0))) { double stdDev; Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label, validation.PerfData.GetAvg("", validation.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev); } }
private double?Calibrate(bool doPosPlane, LabeledDataset <SentimentLabel, SparseVector <double> > dataset) { BiasCalibration calibration = doPosPlane ? PosBiasCalibration : NegBiasCalibration; if (calibration == null) { return(null); } Preconditions.CheckArgument(calibration.BiasStep > 0); Preconditions.CheckNotNull(calibration.OptimizationFunc); double maxScore = double.MinValue; double optimalBias = 0; var biasScorePairs = calibration.IsSaveBiasScorePairs ? new List <Tuple <double, double> >() : null; for (double bias = calibration.BiasLowerBound; bias <= calibration.BiasUpperBound; bias += calibration.BiasStep) { var matrix = new PerfMatrix <SentimentLabel>(null); foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset) { Prediction <SentimentLabel> prediction = PredictInternal(le.Example, doPosPlane ? bias : 0, doPosPlane ? 0 : bias); matrix.AddCount(le.Label, prediction.BestClassLabel); } double score = calibration.OptimizationFunc(matrix); if (score > maxScore) { maxScore = score; optimalBias = bias; } if (biasScorePairs != null) { biasScorePairs.Add(new Tuple <double, double>(bias, score)); } Console.WriteLine("{0}\t{1:0.000}\t{2:0.000}", doPosPlane, bias, score); } if (biasScorePairs != null) { calibration.BiasScorePairs = biasScorePairs.ToArray(); } return(optimalBias); }
public override void Run(object[] args) { // get labeled data BinarySvm classifierInst = BinarySvm.RunInstanceNull(args); var labeledData = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"]; // convert dataset to binary vector var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false); // cross validation with task validator var validator = new TaskCrossValidator <string, BinaryVector>(new System.Func <IModel <string, BinaryVector> >[] { // model instances are constructed on the fly () => new NaiveBayesClassifier <string>() }) { NumFolds = 10, // default IsStratified = true, // default ExpName = "", // default Dataset = ds, OnAfterTrain = (sender, foldN, model, trainSet) => { var m = (NaiveBayesClassifier <string>)model; // do stuff after model is trained for a fold... }, OnAfterPrediction = (sender, foldN, model, ex, le, prediction) => { lock (Output) Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore); return(true); } }; var cores = (int)(Math.Round(Environment.ProcessorCount * 0.9) - 1); // use 90% of cpu cores Output.WriteLine("Multi-threaded using {0} cores\n", cores); Output.Flush(); // using .net framework // model level parallelization Parallel.ForEach( validator.GetFoldAndModelTasks(), new ParallelOptions { MaxDegreeOfParallelism = cores }, foldTask => Parallel.ForEach( foldTask(), new ParallelOptions { MaxDegreeOfParallelism = cores }, modelTask => modelTask() ) ); // fold level /* * Parallel.ForEach(validator.GetFoldTasks(), new ParallelOptions { MaxDegreeOfParallelism = cores }, t => t()); */ // for some serious workload better use SmartThreadPool // requires reference to package https://www.nuget.org/packages/SmartThreadPool.dll/ var exceptions = new List <Exception>(); // model level parallelization /* * var threadPool = new SmartThreadPool { MaxThreads = cores }; * foreach (System.Func<Action[]> foldTask in validator.GetFoldAndModelTasks()) * { * System.Func<Action[]> ft = foldTask; * threadPool.QueueWorkItem(o => * { * foreach (Action modelTask in ft()) * { * Action mt = modelTask; * threadPool.QueueWorkItem(p => * { * mt(); * return null; * }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } }); * } * return null; * }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } }); * } * threadPool.WaitForIdle(); * threadPool.Shutdown(); */ // fold level /* * var threadPool = new SmartThreadPool { MaxThreads = cores }; * foreach (Action foldTask in validator.GetFoldTasks()) * { * Action ft = foldTask; * threadPool.QueueWorkItem(o => * { * ft(); * return null; * }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } }); * } * threadPool.WaitForIdle(); * threadPool.Shutdown(); */ foreach (Exception exception in exceptions) { throw new Exception("Error during validation", exception); } Output.WriteLine("Sum confusion matrix:"); PerfMatrix <string> sumPerfMatrix = validator.PerfData.GetSumPerfMatrix("", validator.GetModelName(0)); Output.WriteLine(sumPerfMatrix.ToString()); Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy()); foreach (string label in validator.PerfData.GetLabels("", validator.GetModelName(0))) { double stdDev; Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label, validator.PerfData.GetAvg("", validator.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev); } }
public override void Run(object[] args) { int foldCount = args.Any() ? (int)args[0] : 10; args = args.Skip(1).ToArray(); // get classifier and labeled data BinarySvm classifierInst = BinarySvm.RunInstanceNull(args); var classifier = (SvmBinaryClassifier <string>)classifierInst.Result["classifier"]; var labeledData = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"]; bool stratified = true; // cross validation if (stratified) { labeledData.GroupLabels(true); } else { labeledData.Shuffle(new Random(1)); } var perfData = new PerfData <string>(); foreach (var g in labeledData.GroupBy(le => le.Label)) { Output.WriteLine("total {0} {1}\t {2:0.00}", g.Key, g.Count(), (double)g.Count() / labeledData.Count); } Output.WriteLine("Performing {0}{1}-fold cross validation...", stratified ? "stratified " : "", foldCount); for (int i = 0; i < foldCount; i++) { int foldN = i + 1; LabeledDataset <string, SparseVector <double> > testSet; LabeledDataset <string, SparseVector <double> > trainSet; if (stratified) { labeledData.SplitForStratifiedCrossValidation(foldCount, foldN, out trainSet, out testSet); } else { labeledData.SplitForCrossValidation(foldCount, foldN, out trainSet, out testSet); } classifier.Train(trainSet); PerfMatrix <string> foldMatrix = perfData.GetPerfMatrix("tutorial", "binary svm", foldN); foreach (LabeledExample <string, SparseVector <double> > labeledExample in testSet) { Prediction <string> prediction = classifier.Predict(labeledExample.Example); foldMatrix.AddCount(labeledExample.Label, prediction.BestClassLabel); } Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy()); } Output.WriteLine("Sum confusion matrix:"); PerfMatrix <string> sumPerfMatrix = perfData.GetSumPerfMatrix("tutorial", "binary svm"); Output.WriteLine(sumPerfMatrix.ToString()); Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy()); Output.WriteLine(); Output.WriteLine(sumPerfMatrix.ToString(new PerfMetric[] { })); Output.WriteLine(sumPerfMatrix.ToString(perfData.GetLabels("tutorial", "binary svm"), new OrdinalPerfMetric[] { })); Output.WriteLine(sumPerfMatrix.ToString(new ClassPerfMetric[] { })); foreach (string label in perfData.GetLabels("tutorial", "binary svm")) { double stdDev; Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label, perfData.GetAvg("tutorial", "binary svm", ClassPerfMetric.Precision, label, out stdDev), stdDev); } }
public override void Make(Task task) { if (Writer != null) { var sb = new StringBuilder(); if (Writer.BaseStream.Length == 0) { // write header sb.Append("Experiment").Append("\t"); sb.Append("Task").Append("\t"); sb.Append("Dataset").Append("\t"); sb.Append("Volume").Append("\t"); sb.Append("Filtered Volume").Append("\t"); sb.Append("Model").Append("\t"); sb.Append("From").Append("\t"); sb.Append("To").Append("\t"); sb.Append("Interval Days").Append("\t"); sb.Append("Performance Millis").Append("\t"); foreach (SentimentLabel label in OrderedLabels) { sb.Append(label).Append(" Actual").Append("\t"); sb.Append(label).Append(" Predicted").Append("\t"); } foreach (PerfMetric metric in Metrics) { sb.Append(metric).Append("\t"); } foreach (ClassPerfMetric metric in ClassMetrics) { foreach (SentimentLabel label in OrderedLabels) { sb.Append(metric).Append(" - ").Append(label).Append("\t"); } } foreach (OrdinalPerfMetric metric in OrdinalMetrics) { sb.Append(metric).Append("\t"); } foreach (Tuple <string, Func <string> > field in mExtraFields) { sb.Append(field.Item1).Append("\t"); } sb.AppendLine(); lock (Writer) { Writer.Write(sb); Writer.Flush(); } sb = new StringBuilder(); } TaskContext ctx = task.Context; for (int i = 0; i < ctx.Models.Length; i++) { PerfMatrix <SentimentLabel> perfMatrix = task.PerfData.GetPerfMatrix(task.ExperimentName, task.Context.GetModelName(i), 1); sb.Append(task.ExperimentName ?? "").Append("\t"); sb.Append(task.Name ?? "").Append("\t"); sb.Append(ctx.DataSource.Name).Append("\t"); sb.Append(ctx.DataSource.DataSize).Append("\t"); sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetSumAll().ToString("d")).Append("\t"); sb.Append(ctx.GetModelName(i)).Append("\t"); sb.Append(ctx.DataSource.From == null ? "" : ctx.DataSource.From.Value.ToString("yyyy-MM-dd hh:mm:ss")).Append("\t"); sb.Append(ctx.DataSource.To == null ? "" : ctx.DataSource.To.Value.ToString("yyyy-MM-dd hh:mm:ss")).Append("\t"); sb.Append(ctx.DataSource.From == null || ctx.DataSource.To == null ? "0" : (ctx.DataSource.To.Value - ctx.DataSource.From.Value).TotalDays.ToString("f1")).Append("\t"); sb.Append((task.PerformDuration ?? TimeSpan.Zero).TotalMilliseconds.ToString("f1")).Append("\t"); if (perfMatrix != null) { perfMatrix.AddLabels(OrderedLabels); // some ordered metrics require this } foreach (SentimentLabel label in OrderedLabels) { sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetActual(label).ToString("d")).Append("\t"); sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetPredicted(label).ToString("d")).Append("\t"); } foreach (PerfMetric metric in Metrics) { sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetScore(metric).ToString("n3")).Append("\t"); } foreach (ClassPerfMetric metric in ClassMetrics) { foreach (SentimentLabel label in OrderedLabels) { sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetScore(metric, label).ToString("n3")).Append("\t"); } } foreach (OrdinalPerfMetric metric in OrdinalMetrics) { sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetScore(metric, OrderedLabels).ToString("n3")).Append("\t"); } foreach (Tuple <string, Func <string> > field in mExtraFields) { sb.Append(field.Item2()).Append("\t"); } sb.AppendLine(); } lock (Writer) { Writer.Write(sb); Writer.Flush(); } } }