public static void RandomMonteCarlo(string Filename, Dataset Train, int NumberOfTrainingSamplesMin, int NumberOfTrainingSamplesMax, int NumberOfDecisionTreesMin, int NumberOfDecisionTreesMax, int MaxTreeDepthMin, int MaxTreeDepthMax, int SamplesPerTreeMin, int SamplesPerTreeMax) { StringBuilder sb = new StringBuilder(); sb.AppendLine("numberOfTrainingSamples,numberofDecisionTrees,maxTreeDepth,samplesPerTree,fitness"); while (true) { int numberOfTrainingSamples = RNG.Next(NumberOfTrainingSamplesMin, NumberOfTrainingSamplesMax); int numberOfDecisionTrees = RNG.Next(NumberOfDecisionTreesMin, NumberOfDecisionTreesMax); int maxTreeDepth = RNG.Next(MaxTreeDepthMin, MaxTreeDepthMax); int samplesPerTree = RNG.Next(SamplesPerTreeMin, SamplesPerTreeMax); if (samplesPerTree > numberOfTrainingSamples) { samplesPerTree = numberOfTrainingSamples; } Dataset currentTrain = new Dataset(); currentTrain.Inputs.AddRange(Train.Inputs.GetRange(0, numberOfTrainingSamples)); currentTrain.Outputs.AddRange(Train.Outputs.GetRange(0, numberOfTrainingSamples)); Dataset currentValidation = new Dataset(); currentValidation.Inputs.AddRange(Train.Inputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); currentValidation.Outputs.AddRange(Train.Outputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); DecisionForest df = new DecisionForest(currentTrain, numberOfDecisionTrees, maxTreeDepth, samplesPerTree); float fitness = Fitness(df, currentValidation); string line = numberOfTrainingSamples + "," + numberOfDecisionTrees + "," + maxTreeDepth + "," + samplesPerTree + "," + fitness; sb.AppendLine(line); Console.WriteLine(line); using (StreamWriter sw = new StreamWriter(Filename)) { sw.Write(sb.ToString()); } } }
public static Dataset GenerateSpiral (float CenterX, float CenterY, float AValue, float BValue, float RadiusMod, int Points, int ClassCounts, int OutputCountSwap) { Dataset ds = new Dataset(); int outputCount = 0; int outputClass = 0; for (int p = 0; p < Points; p++) { float angle = RadiusMod * p; List<float> inputs = new List<float>(); inputs.Add(CenterX + (AValue + BValue * angle) * (float)Math.Cos(angle)); inputs.Add(CenterY + (AValue + BValue * angle) * (float)Math.Sin(angle)); ds.Inputs.Add(inputs); List<float> outputs = new List<float>(); outputs.Add(outputClass); ds.Outputs.Add(outputs); outputCount++; if (outputCount >= OutputCountSwap) { outputCount = 0; outputClass += 1; if (outputClass >= ClassCounts) { outputClass = 0; } } } return ds; }
public static Dataset CloneInputSet (Dataset Host) { Dataset clone = new Dataset(); for (int row = 0; row < Host.Inputs.Count; row++) { clone.Inputs.Add(Host.Inputs[row]); } return clone; }
public static float Fitness (DecisionForest DecisionForest, Dataset Validation) { int correct = 0; for (int row = 0; row < Validation.Inputs.Count; row++) { if (Validation.Outputs[row][0] == DecisionForest.Classify(Validation.Inputs[row])) { correct++; } } return ((float)correct) / ((float)Validation.Inputs.Count); }
public DecisionTree(Dataset Dataset, int MaxTreeDepth, int CurrentDepth = 0) { if (CurrentDepth == MaxTreeDepth || Dataset.Inputs.Count <= 10) { DetermineClassification(Dataset); return; } else { DoBranch(Dataset, MaxTreeDepth, CurrentDepth); return; } }
public DecisionForest (Dataset Dataset, int NumberOfDecisionTrees, int MaxTreeDepth, int SamplesPerTree) { DecisionTrees = new List<DecisionTree>(); for (int tree = 0; tree < NumberOfDecisionTrees; tree++) { Console.WriteLine("t: " + tree + " / " + NumberOfDecisionTrees); Dataset randomSample = new Dataset(); for (int s = 0; s < SamplesPerTree; s++) { int randomRow = RNG.Next(Dataset.Inputs.Count); randomSample.Inputs.Add(Dataset.Inputs[randomRow]); randomSample.Outputs.Add(Dataset.Outputs[randomRow]); } DecisionTrees.Add(new DecisionTree(randomSample, MaxTreeDepth)); } }
private void DetermineClassification(Dataset Dataset) { Dictionary<float, int> classifications = new Dictionary<float, int>(); for (int row = 0; row < Dataset.Inputs.Count; row++) { if (classifications.ContainsKey(Dataset.Outputs[row][0])) { classifications[Dataset.Outputs[row][0]]++; } else { classifications[Dataset.Outputs[row][0]] = 1; } } List<KeyValuePair<float, int>> classificationsList = classifications.ToList(); classificationsList.Sort((a, b) => { return -a.Value.CompareTo(b.Value); }); Classification = classificationsList[0].Key; }
static void fin() { Dataset train = new Dataset(); train.ReadCSV("./data/validate.win", 2000, false); Dataset test = new Dataset(); test.Inputs.Add(train.Inputs[train.Inputs.Count - 1]); DecisionForest df = new DecisionForest(train, 10, 500, 500); for (int row = 0; row < test.Inputs.Count; row++) { Console.WriteLine("i: " + row + "/" + test.Inputs.Count); List<float> outputs = new List<float>(); outputs.Add(df.Classify(test.Inputs[row])); test.Outputs.Add(outputs); } test.WriteCSV("./data/est.win", false); }
public BigDecisionForest(Dataset Dataset, int NumberOfDecisionTrees, int MaxTreeDepth, int SamplesPerTree) { if (!Directory.Exists("./trees/")) { Directory.CreateDirectory("./trees/"); } BigDecisionTrees = new List<BigDecisionTree>(); for (int tree = 0; tree < NumberOfDecisionTrees; tree++) { Console.WriteLine("t: " + tree + " / " + NumberOfDecisionTrees); Dataset randomSample = new Dataset(); for (int s = 0; s < SamplesPerTree; s++) { int randomRow = RNG.Next(Dataset.Inputs.Count); randomSample.Inputs.Add(Dataset.Inputs[randomRow]); randomSample.Outputs.Add(Dataset.Outputs[randomRow]); } BigDecisionTrees.Add(new BigDecisionTree(randomSample, MaxTreeDepth)); } }
public static void IterativeMonteCarlo (string Filename, Dataset Train, int NumberOfTrainingSamplesMin, int NumberOfTrainingSamplesMax, int NumberOfDecisionTreesMin, int NumberOfDecisionTreesMax, int MaxTreeDepthMin, int MaxTreeDepthMax, int SamplesPerTreeMin, int SamplesPerTreeMax) { StringBuilder sb = new StringBuilder(); sb.AppendLine("numberOfTrainingSamples,numberofDecisionTrees,maxTreeDepth,samplesPerTree,fitness"); for (int numberOfTrainingSamples = NumberOfDecisionTreesMin; numberOfTrainingSamples <= NumberOfDecisionTreesMax; numberOfTrainingSamples += 1) { for (int numberOfDecisionTrees = NumberOfDecisionTreesMin; numberOfDecisionTrees <= NumberOfDecisionTreesMax; numberOfDecisionTrees += 1) { for (int maxTreeDepth = MaxTreeDepthMin; maxTreeDepth <= MaxTreeDepthMax; maxTreeDepth += 1) { for (int samplesPerTree = SamplesPerTreeMin; samplesPerTree <= SamplesPerTreeMax && samplesPerTree <= numberOfTrainingSamples; samplesPerTree += 1) { Dataset currentTrain = new Dataset(); currentTrain.Inputs.AddRange(Train.Inputs.GetRange(0, numberOfTrainingSamples)); currentTrain.Outputs.AddRange(Train.Outputs.GetRange(0, numberOfTrainingSamples)); Dataset currentValidation = new Dataset(); currentValidation.Inputs.AddRange(Train.Inputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); currentValidation.Outputs.AddRange(Train.Outputs.GetRange(numberOfTrainingSamples, Train.Inputs.Count - numberOfTrainingSamples)); DecisionForest df = new DecisionForest(currentTrain, numberOfDecisionTrees, maxTreeDepth, samplesPerTree); float fitness = Fitness(df, currentValidation); string line = numberOfTrainingSamples + "," + numberOfDecisionTrees + "," + maxTreeDepth + "," + samplesPerTree + "," + fitness; sb.AppendLine(line); Console.WriteLine(line); using (StreamWriter sw = new StreamWriter(Filename)) { sw.Write(sb.ToString()); } } } } } using (StreamWriter sw = new StreamWriter(Filename)) { sw.Write(sb.ToString()); } }
public BigRegressionTree(Dataset Dataset, int MaxTreeDepth, int CurrentDepth = 0) { Root = new RegressionTree(Dataset, MaxTreeDepth); Filename = "./trees/" + Now().ToString() + "." + RNG.Next(0, 65535); Root.Write(Filename); }
private void Split(int Component, float ComponentValue, Dataset Original, out Dataset LeftDataset, out Dataset RightDataset) { LeftDataset = new Dataset(); RightDataset = new Dataset(); for (int row = 0; row < Original.Inputs.Count; row++) { if (Original.Inputs[row][Component] <= ComponentValue) { LeftDataset.Inputs.Add(Original.Inputs[row]); LeftDataset.Outputs.Add(Original.Outputs[row]); } else { RightDataset.Inputs.Add(Original.Inputs[row]); RightDataset.Outputs.Add(Original.Outputs[row]); } } }
private void DoBranch(Dataset Dataset, int MaxTreeDepth, int CurrentDepth) { Dataset leftDataset; Dataset rightDataset; int timeout = 0; do { timeout++; if (timeout > 1000) { DetermineClassification(Dataset); } BranchComponent = RNG.Next(Dataset.Inputs[0].Count); float componentLow; float componentHigh; float componentRange; Dataset.GetInputComponentRange(BranchComponent, out componentLow, out componentHigh, out componentRange); BranchValue = (float)((RNG.NextDouble() * componentRange) + componentLow); leftDataset = new Dataset(); rightDataset = new Dataset(); Split(BranchComponent, BranchValue, Dataset, out leftDataset, out rightDataset); } while (leftDataset.Inputs.Count == 0 || rightDataset.Inputs.Count == 0); LeftBranch = new DecisionTree(leftDataset, MaxTreeDepth, CurrentDepth + 1); RightBranch = new DecisionTree(rightDataset, MaxTreeDepth, CurrentDepth + 1); }
private void DetermineRegression(Dataset Dataset) { Regression = new List<float>(); for (int i = 0; i < Dataset.Outputs[0].Count; i++) { Regression.Add(0f); } for (int row = 0; row < Dataset.Outputs.Count; row++) { for (int i = 0; i < Dataset.Outputs[0].Count; i++) { Regression[i] += Dataset.Outputs[row][i]; } } for (int i = 0; i < Dataset.Outputs[0].Count; i++) { Regression[i] /= ((float)Dataset.Outputs[0].Count); } }