public RandomForest(LearningSet data, Hyperparameters parameters) { List <Task <Tree> > tasks = new List <Task <Tree> >(); Random rand = new Random(parameters.Seed); for (int i = 0; i < parameters.NumTrees; ++i) { Random treeRandom = new Random(rand.Next()); tasks.Add(Task.Run(() => new Tree(data, treeRandom, parameters))); } NumFeatures = data.First().Features.Length; Task.WaitAll(tasks.ToArray()); Trees = tasks.Select(t => t.Result).ToArray(); Accuracy = Trees.Average(t => t.Accuracy); }
public Tree(LearningSet data, Random random, Hyperparameters parameters) { List <DataPoint> bag = new List <DataPoint>(); List <DataPoint> outOfBag = new List <DataPoint>(); foreach (DataPoint point in data) { if (random.NextDouble() < parameters.OutOfBag) { outOfBag.Add(point); } else { bag.Add(point); } } Root = new Node(bag, random, parameters); Accuracy = outOfBag.Average(d => Root.Classify(d) == d.Classification ? 1.0 : 0.0); }
public static void Main(string[] args) { Hyperparameters parameters = new Hyperparameters { NumTrees = 10, MaxFeatures = -1, MinFeatures = 1, MaxDepth = 10, Seed = (int)(DateTime.Now.Ticks % int.MaxValue), OutOfBag = 0.3 }; string trainingFile = null; string serializedFile = null; List <double> testData = new List <double>(); for (int i = 0; i < args.Length; ++i) { switch (args[i]) { case "--num-trees": if (i + 1 >= args.Length) { InvalidArgs(); } if (!int.TryParse(args[++i], out parameters.NumTrees)) { InvalidArgs(); } break; case "--max-features": if (i + 1 >= args.Length) { InvalidArgs(); } if (!int.TryParse(args[++i], out parameters.MaxFeatures)) { InvalidArgs(); } break; case "--min-features": if (i + 1 >= args.Length) { InvalidArgs(); } if (!int.TryParse(args[++i], out parameters.MinFeatures)) { InvalidArgs(); } break; case "--max-depth": if (i + 1 >= args.Length) { InvalidArgs(); } if (!int.TryParse(args[++i], out parameters.MaxDepth)) { InvalidArgs(); } break; case "--seed": if (i + 1 >= args.Length) { InvalidArgs(); } if (!int.TryParse(args[++i], out parameters.Seed)) { InvalidArgs(); } break; case "--oob": if (i + 1 >= args.Length) { InvalidArgs(); } if (!double.TryParse(args[++i], out parameters.OutOfBag)) { InvalidArgs(); } break; default: double val; if (args[i].EndsWith(".csv")) { if (trainingFile == null) { trainingFile = args[i]; } else { InvalidArgs(); } } else if (args[i].EndsWith(".bin") || args[i].EndsWith(".xml")) { if (serializedFile == null) { serializedFile = args[i]; } else { InvalidArgs(); } } else if (double.TryParse(args[i], out val)) { testData.Add(val); } else { InvalidArgs(); } break; } } RandomForest forest = null; if (trainingFile == null) { if (serializedFile == null || !File.Exists(serializedFile)) { Console.WriteLine("No model source"); InvalidArgs(); } else { if (serializedFile.EndsWith(".xml")) { XmlSerializer serializer = new XmlSerializerFactory().CreateSerializer(typeof(RandomForest)); using (Stream stream = new FileStream(serializedFile, FileMode.Open, FileAccess.Read)) { forest = (RandomForest)serializer.Deserialize(stream); } } else { BinaryFormatter serializer = new BinaryFormatter(); using (Stream stream = new FileStream(serializedFile, FileMode.Open, FileAccess.Read)) { forest = (RandomForest)serializer.Deserialize(stream); } } } } else { LearningSet learningSet = new LearningSet(trainingFile); if (parameters.MaxFeatures == -1) { parameters.MaxFeatures = (int)Math.Sqrt(learningSet.First().Features.Length); } forest = new RandomForest(learningSet, parameters); if (serializedFile != null) { if (serializedFile.EndsWith(".xml")) { XmlSerializer serializer = new XmlSerializerFactory().CreateSerializer(typeof(RandomForest)); using (Stream stream = new FileStream(serializedFile, FileMode.Create, FileAccess.Write)) { serializer.Serialize(stream, forest); } } else { BinaryFormatter serializer = new BinaryFormatter(); using (Stream stream = new FileStream(serializedFile, FileMode.Create, FileAccess.Write)) { serializer.Serialize(stream, forest); } } } } if (testData.Count > 0) { if (testData.Count == forest.NumFeatures) { Console.WriteLine(forest.Classify(new DataPoint { Features = testData.ToArray() })); } else { Console.WriteLine("Invalid number of features"); InvalidArgs(); } } else { Console.WriteLine("Accuracy: {0}%", forest.Accuracy * 100.0); } }