public static double Evaluate(Learner learner, SequenceData testSequences, double[] solutionData) { double[] guessedProbs = new double[testSequences.Count]; for (int i = 0; i < guessedProbs.Length; i++) guessedProbs[i] = learner.CalculateProbability(testSequences[i]); guessedProbs = Normalize(guessedProbs); return Evaluate(guessedProbs, solutionData); }
public IEnumerable<double> RunLearner(DataSet dataSet, Learner learner, StreamWriter outputWriter, StreamWriter csvWriter, int iteration) { double[] runScores = new double[numberOfRuns]; double[] runTimes = new double[numberOfRuns]; double[] runTicks = new double[numberOfRuns]; Stopwatch watch = new Stopwatch(); for (int i = 0; i < numberOfRuns; i++) { Console.WriteLine("Run {0}...", (i + 1)); watch.Reset(); dataSet.SplitData(2.0 / 3.0, i); watch.Start(); learner.Learn(dataSet.TrainingData, dataSet.ValidationData, dataSet.TestData); watch.Stop(); double score = 0.0; if (useTestData) { score = PautomacEvaluator.Evaluate(learner, dataSet.TestData, dataSet.SolutionData); } else { foreach (int[] signal in dataSet.ValidationData.GetAll()) { score -= learner.CalculateProbability(signal, true); } } runScores[i] = score; runTimes[i] = (watch.ElapsedMilliseconds / 1000.0); runTicks[i] = watch.ElapsedTicks; outputWriter.WriteLine("Run {0:00}:\t{1:00000000.0000000000}\t{2:000000}\t{3:0000000000000000}", (i + 1), runScores[i], runTimes[i], runTicks[i]); outputWriter.Flush(); csvWriter.WriteLine("{0},{1},{2},{3},{4},", iteration, i, runScores[i], runTimes[i], runTicks[i]); csvWriter.Flush(); using (StreamWriter modelWriter = new StreamWriter(String.Format(@"Benchmark_{0}/DataSet_{1}/Models_{2}/Iter{3}_Run{4}.txt", Name, dataSet.Number, learner.Name().ToLowerInvariant().Replace(' ', '_'), iteration, i)), modelCSVWriter = new StreamWriter(String.Format(@"Benchmark_{0}/DataSet_{1}/Models_{2}/Iter{3}_Run{4}.csv", Name, dataSet.Number, learner.Name().ToLowerInvariant().Replace(' ', '_'), iteration, i))) { modelWriter.AutoFlush = true; modelCSVWriter.AutoFlush = true; modelWriter.WriteLine("DataSet {0}", dataSet.Number); modelWriter.WriteLine("Learner: {0}", learner.Name()); modelWriter.WriteLine("{0}: {1:0000.0000000000}", (useTestData ? "PautomaC Score" : "Log Likelihood"), score); modelWriter.WriteLine(); learner.Save(modelWriter, modelCSVWriter); } } yield return runScores.Average(); yield return Median(runScores); yield return runTimes.Average(); yield return Median(runTimes); }
public IEnumerable<double> BenchmarkLearner(DataSet dataSet, Learner learner) { Console.WriteLine("Benchmarking Learner {0}...", learner.Name()); //This sets up a file for intermediate output during learning if (learner is Learners.GreedyExtendLearner) { string intermediateOutputFileName = String.Format(@"Benchmark_{0}/DataSet_{1}/{2}", Name, dataSet.Number, "intermediate"); ((Learners.GreedyExtendLearner)learner).SetIntermediateOutputFile(intermediateOutputFileName); ((Learners.GreedyExtendLearner)learner).SetSolutions(dataSet.SolutionData); } if (learner is Learners.PadawanLearner) { string intermediateOutputFileName = String.Format(@"Benchmark_{0}/DataSet_{1}/{2}", Name, dataSet.Number, "intermediate"); ((Learners.PadawanLearner)learner).SetIntermediateOutputFile(intermediateOutputFileName); } if (learner is Learners.GGLearner) { string intermediateOutputFileName = String.Format(@"Benchmark_{0}/DataSet_{1}/{2}", Name, dataSet.Number, "intermediate"); ((Learners.GGLearner)learner).SetIntermediateOutputFile(intermediateOutputFileName); } Dictionary<int, double> parameterAverageScores = new Dictionary<int, double>(); Dictionary<int, double> parameterMedianScores = new Dictionary<int, double>(); Dictionary<int, double> parameterAverageRuntimes = new Dictionary<int, double>(); Dictionary<int, double> parameterMedianRuntimes = new Dictionary<int, double>(); int bestIteration = 0; LearnerParameters parameters = learners[learner]; using (StreamWriter outputWriter = new StreamWriter(String.Format(@"Benchmark_{0}/DataSet_{1}/{2}.txt", Name, dataSet.Number, learner.Name().ToLowerInvariant().Replace(' ', '_'))), csvSummaryWriter = new StreamWriter(String.Format(@"Benchmark_{0}/DataSet_{1}/{2}_SUMMARY.csv", Name, dataSet.Number, learner.Name().ToLowerInvariant().Replace(' ', '_'))), csvResultWriter = new StreamWriter(String.Format(@"Benchmark_{0}/DataSet_{1}/{2}_RESULTS.csv", Name, dataSet.Number, learner.Name().ToLowerInvariant().Replace(' ', '_')))) { outputWriter.AutoFlush = true; csvSummaryWriter.AutoFlush = true; csvResultWriter.AutoFlush = true; outputWriter.WriteLine("DataSet {0}", dataSet.Number); outputWriter.WriteLine("Learner: {0}", learner.Name()); outputWriter.WriteLine(); csvSummaryWriter.WriteLine("Iteration,Median Score,Average Score,Median Time,Average Time,"); csvResultWriter.WriteLine("Iteration,Run,Score,Time,Ticks,"); for (int i = 0; ((parameters.Minimum + (i * parameters.StepSize)) <= parameters.Maximum); i++) { Console.WriteLine("Benchmarking Model with {0}...", IterationName(parameters, i)); outputWriter.WriteLine("{0}:", IterationName(parameters, i)); outputWriter.WriteLine(); learner.Initialise(parameters, i); double[] results = RunLearner(dataSet, learner, outputWriter, csvResultWriter, i).ToArray(); parameterAverageScores.Add(i, results[0]); parameterMedianScores.Add(i, results[1]); parameterAverageRuntimes.Add(i, results[2]); parameterMedianRuntimes.Add(i, results[3]); if ((bestIteration < 0) || (parameterMedianScores[bestIteration] > parameterMedianScores[i])) { bestIteration = i; } csvSummaryWriter.WriteLine("{0},{1},{2},{3},{4}", i, parameterMedianScores[i], parameterAverageScores[i], parameterMedianRuntimes[i], parameterAverageRuntimes[i]); csvSummaryWriter.Flush(); outputWriter.WriteLine(); if (parameters.StepSize == 0) { break; } } outputWriter.WriteLine(); outputWriter.WriteLine("SUMMARY"); outputWriter.WriteLine(); foreach (int iteration in parameterMedianScores.Keys) { outputWriter.WriteLine("{0}:\t{1:00000000.0000000000}\t{2:00000000.0000000000}\t{3:000000}\t{4:000000}", IterationName(parameters, iteration), parameterMedianScores[iteration], parameterAverageScores[iteration], parameterMedianRuntimes[iteration], parameterAverageRuntimes[iteration]); } outputWriter.WriteLine(); outputWriter.WriteLine("BEST"); outputWriter.WriteLine(); outputWriter.WriteLine("{0}:\t{1:00000000.0000000000}\t{2:00000000.0000000000}\t{3:000000}\t{4:000000}", IterationName(parameters, bestIteration), parameterMedianScores[bestIteration], parameterAverageScores[bestIteration], parameterMedianRuntimes[bestIteration], parameterAverageRuntimes[bestIteration]); } yield return parameterAverageScores[bestIteration]; yield return parameterMedianScores[bestIteration]; yield return parameterAverageRuntimes[bestIteration]; yield return parameterMedianRuntimes[bestIteration]; }