/// <summary> /// Invoke weka jar to create an arff from a csv file /// </summary> protected override RandomForestFromWekaOutput Perform(RandomForestFromWekaInput input) { s_logger.Debug($"RandomForestFromWeka starts"); try { var outputTreeFile = $"{input.TrainingSetCsv}.wtree"; var outputArffFile = $"{input.TrainingSetCsv}.arff"; RandomForest forest = null; if (!File.Exists(outputTreeFile)) { // lets analyze this in here try { var arffExitCode = WriteArff(input, outputArffFile); s_logger.Debug($"ArffWrite returns exitCode={arffExitCode}"); if (File.Exists(outputArffFile)) { var exitCode = RunWeka(input, outputArffFile, outputTreeFile); s_logger.Debug($"RandomForestFromWeka returns exitCode={exitCode}"); } else { s_logger.Error($"Could not write arff file for [{input.TrainingSetCsv}]"); } } catch (Exception e) { s_logger.Error(e, "An exception ocurred when writting forest from weka"); } finally { if (!File.Exists(outputTreeFile)) { throw new Exception($"RandomForestFromWeka task failed to write output to [{outputTreeFile}]"); } } } // and now load the forest forest = RandomForest.FromWekaFile(outputTreeFile); // done return(new RandomForestFromWekaOutput(forest, input.TrainingSetCsv, outputTreeFile)); } finally { s_logger.Debug($"RandomForestFromWeka ends in {Stopwatch.ElapsedMilliseconds}ms"); } }
private static void EvaluateForests(Args arguments) { Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory"); Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist"); // we have a bunch of forests and we will compare them by classifying against // unknown samples. The forests are in weka format (wtree) s_logger.Info("Evaluating random forests..."); // create the tree from the sample csvs... var treeEvaluationBlock = new ActionBlock <string>(i => { var classifier = RandomForest.FromWekaFile(i); s_logger.Info($"Evaluating forest from [{i}] against universe"); // and now evaluate foreach (var evaluationFile in Directory.EnumerateFiles(arguments.InputDirectory, "*.csv")) { if (!evaluationFile.Contains("-sample")) { classifier.EvaluateOnTrainingSet(evaluationFile, true, false, 0); } } }, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = 1 } ); // post each action foreach (var treeFile in Directory.EnumerateFiles(arguments.InputDirectory, "*.wtree")) { treeEvaluationBlock.Post(treeFile); } // complete treeEvaluationBlock.Complete(); // wait treeEvaluationBlock.Completion.Wait(); // done... }