public override StageResult Train(Dictionary <string, object> options = null) { ClassifierPropsFile = CreatePropsFile(ClassifierProperties); if (!ClassifierPropsFile.Exists) { Error("Could not find classifier props file {0}.", ClassifierPropsFile.FullName); } else { Debug("Using classifier props file {0}.", ClassifierPropsFile.FullName); } javaCommand = new JavaCommand(JavaHome, ClassPath, "edu.stanford.nlp.classify.ColumnDataClassifier", "-mx16000m", "-trainFile", TrainingFile.FullName, "-testFile", TestFile.FullName, "-prop", ClassifierPropsFile.FullName); PrintCommand(javaCommand); Task c = javaCommand.Run(); if (!CheckCommandStartedAndReport(javaCommand)) { return(StageResult.FAILED); } ClassifierOutput = new List <string>(); foreach (string s in javaCommand.GetOutputAndErrorLines()) { if (!BuiltClassifier && s.StartsWith("Built this classifier")) { BuiltClassifier = true; Match m = builtClassifierRegex.Match(s); if (m.Success) { ClassifierType = m.Groups[1].Value; NumberofFeatures = Int32.Parse(m.Groups[2].Value); NumberofClasses = Int32.Parse(m.Groups[3].Value); NumberofParameters = Int32.Parse(m.Groups[4].Value); Info("Built classifier {0} with {1} features, {2} classes and {3} parameters.", ClassifierType, NumberofFeatures, NumberofClasses, NumberofParameters); } } else if (ClassifierType.IsEmpty() && s.StartsWith("QNMinimizer called on double function")) { ClassifierType = "BinaryLogisticClassifier"; Match m = binaryClassiferQNN.Match(s); if (m.Success) { NumberofFeatures = Int32.Parse(m.Groups[1].Value); Info("Built classifier {0} with {1} features.", ClassifierType, NumberofFeatures); } else { Error("Could not parse BinaryLogisticClassifier output: {0}.", s); } } else if (!ReadTrainingDataset && s.StartsWith("Reading dataset from {0} ... done".F(TrainingFile.FullName))) { ReadTrainingDataset = true; Match m = readDataSetRegex.Match(s); if (m.Success) { TrainingDataSetItems = Int32.Parse(m.Groups[3].Value); Info("{0} items in training dataset read in {1} s.", TrainingDataSetItems, m.Groups[2].Value); } else { Error("Could not parse classifier output line: {0}.", s); return(StageResult.FAILED); } } else if (!ReadTestDataset && s.StartsWith("Reading dataset from {0} ... done".F(TestFile.FullName))) { ReadTestDataset = true; Match m = readDataSetRegex.Match(s); if (m.Success) { TestDataSetItems = Int32.Parse(m.Groups[3].Value); Info("{0} items in test dataset read in {1} s.", TestDataSetItems, m.Groups[2].Value); } else { Error("Could not parse classifier output line: {0}.", s); return(StageResult.FAILED); } } else if (!KFoldCrossValidation && s.StartsWith("### Fold")) { KFoldCrossValidation = true; Match m = kFold.Match(s); if (m.Success) { if (!KFoldIndex.HasValue) { MicroAveragedF1Folds = new float[10]; MacroAveragedF1Folds = new float[10]; } KFoldIndex = Int32.Parse(m.Groups[1].Value); } } else if (KFoldCrossValidation && s.StartsWith("### Fold")) { Match m = kFold.Match(s); if (m.Success) { KFoldIndex = Int32.Parse(m.Groups[1].Value); } else { Error("Could not parse k-fold output line: {0}.", s); return(StageResult.FAILED); } } else if (!KFoldCrossValidation && !MicroAveragedF1.HasValue && s.StartsWith("Accuracy/micro-averaged F1")) { Match m = f1MicroRegex.Match(s); if (m.Success) { MicroAveragedF1 = Single.Parse(m.Groups[1].Value); Info("Micro-averaged F1 = {0}.", MicroAveragedF1); } else { Error("Could not parse micro-averaged F1 statistic {0}.", s); } } else if (KFoldCrossValidation && ReadTestDataset && !MicroAveragedF1.HasValue && s.StartsWith("Accuracy/micro-averaged F1")) { Match m = f1MicroRegex.Match(s); if (m.Success) { MicroAveragedF1 = Single.Parse(m.Groups[1].Value); Info("Micro-averaged F1 = {0}.", MicroAveragedF1); } else { Error("Could not parse micro-averaged F1 statistic {0}.", s); } } else if (KFoldCrossValidation && s.StartsWith("Accuracy/micro-averaged F1")) { Match m = f1MicroRegex.Match(s); if (m.Success) { MicroAveragedF1Folds[KFoldIndex.Value] = Single.Parse(m.Groups[1].Value); Info("Fold {0} Micro-averaged F1 = {1}.", KFoldIndex.Value, MicroAveragedF1Folds[KFoldIndex.Value]); } else { Error("Could not parse micro-averaged F1 statistic {0}.", s); } } else if (!KFoldCrossValidation && !MacroAveragedF1.HasValue && s.StartsWith("Macro-averaged F1")) { Match m = f1MacroRegex.Match(s); if (m.Success) { MacroAveragedF1 = Single.Parse(m.Groups[1].Value); Info("Macro-averaged F1 = {0}.", MacroAveragedF1); } else { Error("Could not parse macro-averaged F1 statistic {0}.", s); } } else if (KFoldCrossValidation && ReadTestDataset && !MacroAveragedF1.HasValue && s.StartsWith("Macro-averaged F1")) { Match m = f1MacroRegex.Match(s); if (m.Success) { MacroAveragedF1Folds[KFoldIndex.Value] = Single.Parse(m.Groups[1].Value); Info("Macro-averaged F1 = {0}.\n", MacroAveragedF1Folds[KFoldIndex.Value]); } else { Error("Could not parse macro-averaged F1 statistic {0}.", s); } } else if (KFoldCrossValidation && s.StartsWith("Macro-averaged F1")) { Match m = f1MacroRegex.Match(s); if (m.Success) { MacroAveragedF1Folds[KFoldIndex.Value] = Single.Parse(m.Groups[1].Value); Info("Fold {0} Macro-averaged F1 = {1}.\n", KFoldIndex.Value, MacroAveragedF1Folds[KFoldIndex.Value]); } else { Error("Could not parse macro-averaged F1 statistic {0}.", s); } } else if (Features == null && s.StartsWith("Built this classifier: 1")) { Features = new Dictionary <string, float>(); string f = s.Remove(0, "Built this classifier: ".Length); foreach (string l in f.Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)) { string[] ls = l.Split('='); Features.Add(ls[0].Trim(), Single.Parse(ls[1].Trim())); } Info("Using {0} features.", Features.Count); } else if (s.StartsWith("Cls")) { Match m = classStatisticRegex.Match(s); if (m.Success) { ClassStatistic cs = new ClassStatistic() { Name = m.Groups[1].Value, TruePositives = Int32.Parse(m.Groups[2].Value), FalsePositives = Int32.Parse(m.Groups[3].Value), TrueNegatives = Int32.Parse(m.Groups[4].Value), Accuracy = Single.Parse(m.Groups[5].Value), Precision = Single.Parse(m.Groups[6].Value), Recall = Single.Parse(m.Groups[7].Value), F1 = Single.Parse(m.Groups[8].Value) }; _ClassStatistics.Add(cs); Info(s); } else { L.Error("Could not parse class statistic: {0}.", s); } } else if (resultRegex.IsMatch(s)) { Match m = resultRegex.Match(s); ClassifierResult cr = new ClassifierResult() { GoldAnswer = m.Groups[1].Value, ClassifierAnswer = m.Groups[2].Value, P_GoldAnswer = Single.Parse(m.Groups[3].Value), P_ClAnswer = Single.Parse(m.Groups[4].Value) }; _Results.Add(cr); } ClassifierOutput.Add(s); Debug(s); } c.Wait(); if (!CheckCommandSuccessAndReport(javaCommand)) { return(StageResult.FAILED); } if (!KFoldCrossValidation) { Info("Got {0} class statistics.", _ClassStatistics.Count); Info("Got {0} results.", _Results.Count); } return(StageResult.SUCCESS); }