public static void RandomMonteCarlo(string Filename, Dataset Train, int NumberOfTrainingSamplesMin, int NumberOfTrainingSamplesMax, int NumberOfDecisionTreesMin, int NumberOfDecisionTreesMax, int MaxTreeDepthMin, int MaxTreeDepthMax, int SamplesPerTreeMin, int SamplesPerTreeMax) { StringBuilder sb = new StringBuilder(); sb.AppendLine("numberOfTrainingSamples,numberofDecisionTrees,maxTreeDepth,samplesPerTree,fitness"); while (true) { int numberOfTrainingSamples = RNG.Next(NumberOfTrainingSamplesMin, NumberOfTrainingSamplesMax); int numberOfDecisionTrees = RNG.Next(NumberOfDecisionTreesMin, NumberOfDecisionTreesMax); int maxTreeDepth = RNG.Next(MaxTreeDepthMin, MaxTreeDepthMax); int samplesPerTree = RNG.Next(SamplesPerTreeMin, SamplesPerTreeMax); if (samplesPerTree > numberOfTrainingSamples) { samplesPerTree = numberOfTrainingSamples; } Dataset currentTrain = new Dataset(); currentTrain.Inputs.AddRange(Train.Inputs.GetRange(0, numberOfTrainingSamples)); currentTrain.Outputs.AddRange(Train.Outputs.GetRange(0, numberOfTrainingSamples)); Dataset currentValidation = new Dataset(); currentValidation.Inputs.AddRange(Train.Inputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); currentValidation.Outputs.AddRange(Train.Outputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); DecisionForest df = new DecisionForest(currentTrain, numberOfDecisionTrees, maxTreeDepth, samplesPerTree); float fitness = Fitness(df, currentValidation); string line = numberOfTrainingSamples + "," + numberOfDecisionTrees + "," + maxTreeDepth + "," + samplesPerTree + "," + fitness; sb.AppendLine(line); Console.WriteLine(line); using (StreamWriter sw = new StreamWriter(Filename)) { sw.Write(sb.ToString()); } } }
public static float Fitness (DecisionForest DecisionForest, Dataset Validation) { int correct = 0; for (int row = 0; row < Validation.Inputs.Count; row++) { if (Validation.Outputs[row][0] == DecisionForest.Classify(Validation.Inputs[row])) { correct++; } } return ((float)correct) / ((float)Validation.Inputs.Count); }
static void spiral () { Dataset train = Dataset.GenerateSpiral(0, 0, 1, 1, 0.1f, 1000, 10, 100); train.WriteCSV("./data/spiral_train.csv", true); Dataset test = Dataset.CloneInputSet(train); DecisionForest df = new DecisionForest(train, 10, 30, 20); for (int row = 0; row < test.Inputs.Count; row++) { List<float> outputs = new List<float>(); outputs.Add(df.Classify(test.Inputs[row])); test.Outputs.Add(outputs); } test.WriteCSV("./data/spiral_test.csv", true); }
static void ocr () { OCRDataset train = new OCRDataset(); train.ReadTrainCSV("./data/ocr_train.csv"); OCRDataset test = new OCRDataset(); test.ReadTestCSV("./data/ocr_test.csv"); DecisionForest df = new DecisionForest(train, 20000, 1000, 10000); for (int row = 0; row < test.Inputs.Count; row++) { Console.WriteLine("i: " + row + "/" + test.Inputs.Count); List<float> outputs = new List<float>(); outputs.Add(df.Classify(test.Inputs[row])); test.Outputs.Add(outputs); } test.WriteTestCSV("./data/ocr_est.csv"); }
static void fin() { Dataset train = new Dataset(); train.ReadCSV("./data/validate.win", 2000, false); Dataset test = new Dataset(); test.Inputs.Add(train.Inputs[train.Inputs.Count - 1]); DecisionForest df = new DecisionForest(train, 10, 500, 500); for (int row = 0; row < test.Inputs.Count; row++) { Console.WriteLine("i: " + row + "/" + test.Inputs.Count); List<float> outputs = new List<float>(); outputs.Add(df.Classify(test.Inputs[row])); test.Outputs.Add(outputs); } test.WriteCSV("./data/est.win", false); }
public static void IterativeMonteCarlo (string Filename, Dataset Train, int NumberOfTrainingSamplesMin, int NumberOfTrainingSamplesMax, int NumberOfDecisionTreesMin, int NumberOfDecisionTreesMax, int MaxTreeDepthMin, int MaxTreeDepthMax, int SamplesPerTreeMin, int SamplesPerTreeMax) { StringBuilder sb = new StringBuilder(); sb.AppendLine("numberOfTrainingSamples,numberofDecisionTrees,maxTreeDepth,samplesPerTree,fitness"); for (int numberOfTrainingSamples = NumberOfDecisionTreesMin; numberOfTrainingSamples <= NumberOfDecisionTreesMax; numberOfTrainingSamples += 1) { for (int numberOfDecisionTrees = NumberOfDecisionTreesMin; numberOfDecisionTrees <= NumberOfDecisionTreesMax; numberOfDecisionTrees += 1) { for (int maxTreeDepth = MaxTreeDepthMin; maxTreeDepth <= MaxTreeDepthMax; maxTreeDepth += 1) { for (int samplesPerTree = SamplesPerTreeMin; samplesPerTree <= SamplesPerTreeMax && samplesPerTree <= numberOfTrainingSamples; samplesPerTree += 1) { Dataset currentTrain = new Dataset(); currentTrain.Inputs.AddRange(Train.Inputs.GetRange(0, numberOfTrainingSamples)); currentTrain.Outputs.AddRange(Train.Outputs.GetRange(0, numberOfTrainingSamples)); Dataset currentValidation = new Dataset(); currentValidation.Inputs.AddRange(Train.Inputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); currentValidation.Outputs.AddRange(Train.Outputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); DecisionForest df = new DecisionForest(currentTrain, numberOfDecisionTrees, maxTreeDepth, samplesPerTree); float fitness = Fitness(df, currentValidation); string line = numberOfTrainingSamples + "," + numberOfDecisionTrees + "," + maxTreeDepth + "," + samplesPerTree + "," + fitness; sb.AppendLine(line); Console.WriteLine(line); using (StreamWriter sw = new StreamWriter(Filename)) { sw.Write(sb.ToString()); } } } } } using (StreamWriter sw = new StreamWriter(Filename)) { sw.Write(sb.ToString()); } }