Ejemplo n.º 1
0
 /// <summary>
 /// Invoke weka jar to create an arff from a csv file
 /// </summary>
 protected override RandomForestFromWekaOutput Perform(RandomForestFromWekaInput input)
 {
     s_logger.Debug($"RandomForestFromWeka starts");
     try
     {
         var          outputTreeFile = $"{input.TrainingSetCsv}.wtree";
         var          outputArffFile = $"{input.TrainingSetCsv}.arff";
         RandomForest forest         = null;
         if (!File.Exists(outputTreeFile))
         {
             // lets analyze this in here
             try
             {
                 var arffExitCode = WriteArff(input, outputArffFile);
                 s_logger.Debug($"ArffWrite returns exitCode={arffExitCode}");
                 if (File.Exists(outputArffFile))
                 {
                     var exitCode = RunWeka(input, outputArffFile, outputTreeFile);
                     s_logger.Debug($"RandomForestFromWeka returns exitCode={exitCode}");
                 }
                 else
                 {
                     s_logger.Error($"Could not write arff file for [{input.TrainingSetCsv}]");
                 }
             }
             catch (Exception e)
             {
                 s_logger.Error(e, "An exception ocurred when writting forest from weka");
             }
             finally
             {
                 if (!File.Exists(outputTreeFile))
                 {
                     throw new Exception($"RandomForestFromWeka task failed to write output to [{outputTreeFile}]");
                 }
             }
         }
         // and now load the forest
         forest = RandomForest.FromWekaFile(outputTreeFile);
         // done
         return(new RandomForestFromWekaOutput(forest, input.TrainingSetCsv, outputTreeFile));
     }
     finally
     {
         s_logger.Debug($"RandomForestFromWeka ends in {Stopwatch.ElapsedMilliseconds}ms");
     }
 }
Ejemplo n.º 2
0
        private static void EvaluateForests(Args arguments)
        {
            Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory");
            Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist");
            // we have a bunch of forests and we will compare them by classifying against
            // unknown samples. The forests are in weka format (wtree)
            s_logger.Info("Evaluating random forests...");
            // create the tree from the sample csvs...
            var treeEvaluationBlock = new ActionBlock <string>(i =>
            {
                var classifier = RandomForest.FromWekaFile(i);
                s_logger.Info($"Evaluating forest from [{i}] against universe");
                // and now evaluate
                foreach (var evaluationFile in Directory.EnumerateFiles(arguments.InputDirectory, "*.csv"))
                {
                    if (!evaluationFile.Contains("-sample"))
                    {
                        classifier.EvaluateOnTrainingSet(evaluationFile, true, false, 0);
                    }
                }
            },
                                                               new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = 1
            }
                                                               );

            // post each action
            foreach (var treeFile in Directory.EnumerateFiles(arguments.InputDirectory, "*.wtree"))
            {
                treeEvaluationBlock.Post(treeFile);
            }
            // complete
            treeEvaluationBlock.Complete();
            // wait
            treeEvaluationBlock.Completion.Wait();
            // done...
        }