예제 #1
0
        public RandomForest(LearningSet data, Hyperparameters parameters)
        {
            List <Task <Tree> > tasks = new List <Task <Tree> >();
            Random rand = new Random(parameters.Seed);

            for (int i = 0; i < parameters.NumTrees; ++i)
            {
                Random treeRandom = new Random(rand.Next());
                tasks.Add(Task.Run(() => new Tree(data, treeRandom, parameters)));
            }
            NumFeatures = data.First().Features.Length;
            Task.WaitAll(tasks.ToArray());
            Trees    = tasks.Select(t => t.Result).ToArray();
            Accuracy = Trees.Average(t => t.Accuracy);
        }
예제 #2
0
        public Tree(LearningSet data, Random random, Hyperparameters parameters)
        {
            List <DataPoint> bag      = new List <DataPoint>();
            List <DataPoint> outOfBag = new List <DataPoint>();

            foreach (DataPoint point in data)
            {
                if (random.NextDouble() < parameters.OutOfBag)
                {
                    outOfBag.Add(point);
                }
                else
                {
                    bag.Add(point);
                }
            }
            Root     = new Node(bag, random, parameters);
            Accuracy = outOfBag.Average(d => Root.Classify(d) == d.Classification ? 1.0 : 0.0);
        }
예제 #3
0
        public static void Main(string[] args)
        {
            Hyperparameters parameters = new Hyperparameters {
                NumTrees    = 10,
                MaxFeatures = -1,
                MinFeatures = 1,
                MaxDepth    = 10,
                Seed        = (int)(DateTime.Now.Ticks % int.MaxValue),
                OutOfBag    = 0.3
            };
            string        trainingFile   = null;
            string        serializedFile = null;
            List <double> testData       = new List <double>();

            for (int i = 0; i < args.Length; ++i)
            {
                switch (args[i])
                {
                case "--num-trees":
                    if (i + 1 >= args.Length)
                    {
                        InvalidArgs();
                    }
                    if (!int.TryParse(args[++i], out parameters.NumTrees))
                    {
                        InvalidArgs();
                    }
                    break;

                case "--max-features":
                    if (i + 1 >= args.Length)
                    {
                        InvalidArgs();
                    }
                    if (!int.TryParse(args[++i], out parameters.MaxFeatures))
                    {
                        InvalidArgs();
                    }
                    break;

                case "--min-features":
                    if (i + 1 >= args.Length)
                    {
                        InvalidArgs();
                    }
                    if (!int.TryParse(args[++i], out parameters.MinFeatures))
                    {
                        InvalidArgs();
                    }
                    break;

                case "--max-depth":
                    if (i + 1 >= args.Length)
                    {
                        InvalidArgs();
                    }
                    if (!int.TryParse(args[++i], out parameters.MaxDepth))
                    {
                        InvalidArgs();
                    }
                    break;

                case "--seed":
                    if (i + 1 >= args.Length)
                    {
                        InvalidArgs();
                    }
                    if (!int.TryParse(args[++i], out parameters.Seed))
                    {
                        InvalidArgs();
                    }
                    break;

                case "--oob":
                    if (i + 1 >= args.Length)
                    {
                        InvalidArgs();
                    }
                    if (!double.TryParse(args[++i], out parameters.OutOfBag))
                    {
                        InvalidArgs();
                    }
                    break;

                default:
                    double val;
                    if (args[i].EndsWith(".csv"))
                    {
                        if (trainingFile == null)
                        {
                            trainingFile = args[i];
                        }
                        else
                        {
                            InvalidArgs();
                        }
                    }
                    else if (args[i].EndsWith(".bin") || args[i].EndsWith(".xml"))
                    {
                        if (serializedFile == null)
                        {
                            serializedFile = args[i];
                        }
                        else
                        {
                            InvalidArgs();
                        }
                    }
                    else if (double.TryParse(args[i], out val))
                    {
                        testData.Add(val);
                    }
                    else
                    {
                        InvalidArgs();
                    }
                    break;
                }
            }
            RandomForest forest = null;

            if (trainingFile == null)
            {
                if (serializedFile == null || !File.Exists(serializedFile))
                {
                    Console.WriteLine("No model source");
                    InvalidArgs();
                }
                else
                {
                    if (serializedFile.EndsWith(".xml"))
                    {
                        XmlSerializer serializer = new XmlSerializerFactory().CreateSerializer(typeof(RandomForest));
                        using (Stream stream = new FileStream(serializedFile, FileMode.Open, FileAccess.Read)) {
                            forest = (RandomForest)serializer.Deserialize(stream);
                        }
                    }
                    else
                    {
                        BinaryFormatter serializer = new BinaryFormatter();
                        using (Stream stream = new FileStream(serializedFile, FileMode.Open, FileAccess.Read)) {
                            forest = (RandomForest)serializer.Deserialize(stream);
                        }
                    }
                }
            }
            else
            {
                LearningSet learningSet = new LearningSet(trainingFile);
                if (parameters.MaxFeatures == -1)
                {
                    parameters.MaxFeatures = (int)Math.Sqrt(learningSet.First().Features.Length);
                }
                forest = new RandomForest(learningSet, parameters);
                if (serializedFile != null)
                {
                    if (serializedFile.EndsWith(".xml"))
                    {
                        XmlSerializer serializer = new XmlSerializerFactory().CreateSerializer(typeof(RandomForest));
                        using (Stream stream = new FileStream(serializedFile, FileMode.Create, FileAccess.Write)) {
                            serializer.Serialize(stream, forest);
                        }
                    }
                    else
                    {
                        BinaryFormatter serializer = new BinaryFormatter();
                        using (Stream stream = new FileStream(serializedFile, FileMode.Create, FileAccess.Write)) {
                            serializer.Serialize(stream, forest);
                        }
                    }
                }
            }
            if (testData.Count > 0)
            {
                if (testData.Count == forest.NumFeatures)
                {
                    Console.WriteLine(forest.Classify(new DataPoint {
                        Features = testData.ToArray()
                    }));
                }
                else
                {
                    Console.WriteLine("Invalid number of features");
                    InvalidArgs();
                }
            }
            else
            {
                Console.WriteLine("Accuracy: {0}%", forest.Accuracy * 100.0);
            }
        }