public void TestAutoDetect() { List <DAttribute> attributes = new List <DAttribute>(); //read in a simple file with 7 examples with 5 attributes List <Case> TestData = DRT.ParseCSV(TestPath + @"\simple\simple1.txt", out attributes); Assert.AreEqual(7, TestData.Count); Assert.AreEqual(5, attributes.Count); }
public void TestDepthLimit() { List <DAttribute> attributes = new List <DAttribute>(5); string[] alphabet = new string[2]; alphabet[0] = "0"; alphabet[1] = "1"; attributes.Add(new DAttribute("X_1", 0, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("X_2", 1, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("X_3", 2, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("X_4", 3, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("VarFinal", 4, new List <string>(alphabet), false, true)); List <Case> TestData = DRT.ParseCSV(attributes.ToArray(), TestPath + @"\simple\simple1.txt"); ID3_Node Tree = ID3Tools.ID3(attributes, TestData, 1, ID3Tools.EntropyCalucalation.IG); //it works System.Console.WriteLine(Tree.PrintTree(attributes.ToArray())); int i = 0; // a line of code on which to wait afterwards. }
public void TestSimple() { //Initialize the attributes beforehand to make it more readable when debugging List <DAttribute> attributes = new List <DAttribute>(5); string[] alphabet = new string[2]; alphabet[0] = "0"; alphabet[1] = "1"; attributes.Add(new DAttribute("X_1", 0, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("X_2", 1, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("X_3", 2, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("X_4", 3, new List <string>(alphabet), false, false)); attributes.Add(new DAttribute("VarFinal", 4, new List <string>(alphabet), false, true)); List <Case> TestData = DRT.ParseCSV(attributes.ToArray(), TestPath + @"\simple\simple1.txt"); ID3_Node Tree = ID3Tools.ID3(attributes, TestData, 999, ID3Tools.EntropyCalucalation.IG); System.Console.WriteLine(Tree.PrintTree(attributes.ToArray())); Assert.AreEqual(0, ID3Tools.TestWithTree(TestData[6], Tree)); }
public static void Main() { //Attributes for the data DAttribute[] Attributes = new DAttribute[5]; Attributes[0] = new DAttribute("Varaince", 1, null, DAttribute.Type.Numeric, false); Attributes[1] = new DAttribute("Skew", 1, null, DAttribute.Type.Numeric, false); Attributes[2] = new DAttribute("Curtosis", 1, null, DAttribute.Type.Numeric, false); Attributes[3] = new DAttribute("Entropy", 1, null, DAttribute.Type.Numeric, false); Attributes[4] = new DAttribute("Genuine", 1, new List <String>(new String[] { "0", "1" }), DAttribute.Type.Categorical, false); List <Case> TrainBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\train.csv", false); List <Case> TestBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\test.csv", false); //Convert output to -1,1 as opposed to 0,1 Case.ColXtoY(TrainBank, 4, 0, -1); Case.ColXtoY(TestBank, 4, 0, -1); StringBuilder output = new StringBuilder(); //begin testing double[] C = new double[] { 100.0 / 873.0, 500.0 / 873.0, 700.0 / 873.0 }; Console.WriteLine("Starting primal Subgradient Decent with C = { 100/873, 500/873, 700/873 }"); Console.WriteLine("====================================================================================="); Console.WriteLine("\nUsing NewLR = LR / (1 + LR * T / D) for learning rate. \n"); //set up parameters double LearningRate = .2; double LearningAdjust = .75; int Seed = 1500; //report Console.WriteLine("\tBase Learning Rate = " + LearningRate); Console.WriteLine("\tNum epochs (T) = 100"); Console.WriteLine("\tLearning Rate Adjustment = " + LearningAdjust); SVMGradient current; //set up the variable for the SVM for the all the tests for (int j = 0; j < 3; j++) { current = new SVMGradient(C[j], LearningRate, LearningAdjust, Seed, TrainBank); Console.WriteLine("\nCreated new primal learner with C = " + C[j]); for (int i = 0; i < 100; i++) { if (i % 10 == 9) { Console.WriteLine("\tCompleted " + (i + 1) + " Epochs."); Console.WriteLine("\tTraining error = " + current.getTrainingError()); } current.PGradientEpoch(1); //do 100 epochs } Console.WriteLine("\n\tTraining error = " + current.getTrainingError()); Console.WriteLine("\tTesting error = " + current.getTestError(TestBank)); double[] weight = current.getWeight(); Console.Write("\tWeight = { " + weight[0]); for (int i = 1; i < weight.Length; i++) { Console.Write(", " + weight[i]); } Console.Write("}\n"); Console.WriteLine("\tBias = " + current.getBias()); } Console.WriteLine("-------------------------------------------------------------------------------------\n"); Console.WriteLine("Using NewLR = LR/ (1 + T) for learning rate.\n"); Console.WriteLine("\tBase Learning Rate = " + LearningRate); Console.WriteLine("\tNum epochs (T) = 100"); Console.WriteLine("\tLearning Rate Adjustment = " + LearningAdjust); for (int j = 0; j < 3; j++) { current = new SVMGradient(C[j], LearningRate, LearningAdjust, Seed, TrainBank); Console.WriteLine("\nCreated new primal learner with C = " + C[j]); for (int i = 0; i < 100; i++) { if (i % 10 == 9) { Console.WriteLine("\tCompleted " + (i + 1) + " Epochs."); Console.WriteLine("\tTraining error = " + current.getTrainingError()); } current.PGradientEpoch(1); //do 100 epochs } Console.WriteLine("\n\tTraining error = " + current.getTrainingError()); Console.WriteLine("\tTesting error = " + current.getTestError(TestBank)); double[] weight = current.getWeight(); Console.Write("\tWeight = { " + weight[0]); for (int i = 1; i < weight.Length; i++) { Console.Write(", " + weight[i]); } Console.Write("}\n"); Console.WriteLine("\tBias = " + current.getBias()); } //let the user read the stuff on screen. Console.WriteLine("\n\n\nFinished execution. Hit any key to exit."); Console.Read(); }
public static void Main() { // ========= Part 1 ============= // if (BuildCarTrees) { //This is the car example. List <DAttribute> attributeCars = new List <DAttribute>(7); //while I could auto detect this, it's much easier to read the trees if I name the DataAttributes ahead of time //below data descriptions come from data-desc.txt, located near the data for this training data. string[] AVariants = new string[] { "vhigh", "high", "med", "low" }; //array of attribute variants to pass in to an attribute attributeCars.Add(new DAttribute("buying", 0, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeCars.Add(new DAttribute("maint", 1, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "2", "3", "4", "5more" }; attributeCars.Add(new DAttribute("doors", 2, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "2", "4", "more" }; attributeCars.Add(new DAttribute("persons", 3, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "small", "med", "big" }; attributeCars.Add(new DAttribute("lug_boot", 4, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "low", "med", "high" }; attributeCars.Add(new DAttribute("safety", 5, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "unacc", "acc", "good", "vgood" }; attributeCars.Add(new DAttribute("label", 6, new List <string>(AVariants), DAttribute.Type.Categorical, true)); List <Case> TrainCars = DRT.ParseCSV(attributeCars.ToArray(), TestPath + @"\car\train.csv"); List <Case> TestCars = DRT.ParseCSV(attributeCars.ToArray(), TestPath + @"\car\test.csv"); StringBuilder TreeLayout = new StringBuilder(); for (int depth = 1; depth < 7; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeCars, TrainCars, depth, ID3Tools.EntropyCalucalation.IG); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainCars, attributeCars, Tree); Double TestError = ID3Tools.FindTestError(TestCars, attributeCars, Tree); TreeLayout.Append("Information Gain Cars, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeCars.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished an IG Tree"); } for (int depth = 1; depth < 7; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeCars, TrainCars, depth, ID3Tools.EntropyCalucalation.GI); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainCars, attributeCars, Tree); Double TestError = ID3Tools.FindTestError(TestCars, attributeCars, Tree); TreeLayout.Append("Gini Index Cars, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeCars.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished a GI Tree"); } for (int depth = 1; depth < 7; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeCars, TrainCars, depth, ID3Tools.EntropyCalucalation.ME); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainCars, attributeCars, Tree); Double TestError = ID3Tools.FindTestError(TestCars, attributeCars, Tree); TreeLayout.Append("Majority Error Cars, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeCars.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished an ME Tree"); } Console.WriteLine("Writing all results to DecisionTree/TestingData/RunResults/ResultsCars.txt"); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsCars.txt", TreeLayout.ToString()); } // ========= Part 2 ============= // // bank information if (BuildBankTrees) { List <DAttribute> attributeBank = new List <DAttribute>(7); //Once again, could auto detect, but doing so makes the data harder to read. Furthermore, autodetecting doesn't work for filling in missing values. //below data descriptions come from data-desc.txt, located near the data for this training data. string[] AVariants; //age being numeric means that the actual variants will be figured out at run time. The variant will be overwritten when we pull in the testing data. attributeBank.Add(new DAttribute("age", 0, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "admin.", "unknown", "unemployed", "management", "housemaid", "entrepreneur", "student", "blue-collar", "self-employed", "retired", "technician", "services" }; attributeBank.Add(new DAttribute("job", 1, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "married", "divorced", "single" }; attributeBank.Add(new DAttribute("marital", 2, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "unknown", "secondary", "primary", "tertiary" }; attributeBank.Add(new DAttribute("education", 3, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("default", 4, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeBank.Add(new DAttribute("balance", 5, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("housing", 6, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("loan", 7, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "unknown", "telephone", "cellular" }; attributeBank.Add(new DAttribute("contact", 8, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeBank.Add(new DAttribute("day", 9, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; attributeBank.Add(new DAttribute("month", 10, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeBank.Add(new DAttribute("duration", 11, null, DAttribute.Type.BinaryNumeric, false)); attributeBank.Add(new DAttribute("campaign", 12, null, DAttribute.Type.BinaryNumeric, false)); attributeBank.Add(new DAttribute("pdays", 13, null, DAttribute.Type.BinaryNumeric, false)); attributeBank.Add(new DAttribute("previous", 14, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "unknown", "other", "failure", "success" }; //If unknown needs to be filled in, remove it from this list. attributeBank.Add(new DAttribute("poutcome", 15, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("result", 16, new List <string>(AVariants), DAttribute.Type.Categorical, true)); if (BuildBankTreeNormal) { List <Case> TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true); List <Case> TestBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\test.csv", false); StringBuilder TreeLayout = new StringBuilder(); for (int depth = 1; depth < 17; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.IG); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree); Double TestError = ID3Tools.FindTestError(TestBank, attributeBank, Tree); TreeLayout.Append("Information Gain Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished an IG Tree"); } for (int depth = 1; depth < 17; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.GI); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree); Double TestError = ID3Tools.FindTestError(TestBank, attributeBank, Tree); TreeLayout.Append("Gini Index Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished a GI Tree"); } for (int depth = 1; depth < 17; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.ME); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree); Double TestError = ID3Tools.FindTestError(TestBank, attributeBank, Tree); TreeLayout.Append("Majority Error Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished an ME Tree"); } Console.WriteLine("Writing all results to DecisionTree/TestingData/RunResults/ResultsBankNormal.txt"); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankNormal.txt", TreeLayout.ToString()); } if (BuildBankMissingVals) { //In this case, the "unknown" values in poutcome attributeBank[15] = new DAttribute("poutcome", 15, new List <string>(new string[] { "unknown", "other", "failure", "success" }), DAttribute.Type.Categorical, false); //Now we rebuild all the datasets, which will have elements filled in by the majority elements. List <Case> TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true); List <Case> TestBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\test.csv", false); StringBuilder TreeLayout = new StringBuilder(); for (int depth = 1; depth < 17; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.IG); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree); Double TestError = ID3Tools.FindTestError(TestBank, attributeBank, Tree); TreeLayout.Append("Information Gain Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished an IG Tree"); } for (int depth = 1; depth < 17; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.GI); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree); Double TestError = ID3Tools.FindTestError(TestBank, attributeBank, Tree); TreeLayout.Append("Gini Index Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished a GI Tree"); } for (int depth = 1; depth < 17; depth++) { ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.ME); //add the tree to the string builder and prepare to write it to a file. Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree); Double TestError = ID3Tools.FindTestError(TestBank, attributeBank, Tree); TreeLayout.Append("Majority Error Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n"); Console.WriteLine("Finished an ME Tree"); } Console.WriteLine("Writing all results to DecisionTree/TestingData/RunResults/ResultsBankMissingVals.txt"); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankMissingVals.txt", TreeLayout.ToString()); } } }
public static void Main() { //Attributes for the data DAttribute[] Attributes = new DAttribute[5]; Attributes[0] = new DAttribute("Varaince", 1, null, DAttribute.Type.Numeric, false); Attributes[1] = new DAttribute("Skew", 1, null, DAttribute.Type.Numeric, false); Attributes[2] = new DAttribute("Curtosis", 1, null, DAttribute.Type.Numeric, false); Attributes[3] = new DAttribute("Entropy", 1, null, DAttribute.Type.Numeric, false); Attributes[4] = new DAttribute("Genuine", 1, new List <String>(new String[] { "0", "1" }), DAttribute.Type.Categorical, false); List <Case> TrainBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\train.csv", false); List <Case> TestBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\test.csv", false); //Convert output to -1,1 as opposed to 0,1 Case.ColXtoY(TrainBank, 4, 0, -1); Case.ColXtoY(TestBank, 4, 0, -1); //StringBuilder output = new StringBuilder(); //begin testing int[] NumNeurons = new int[] { 5, 10, 25, 50, 100 }; Console.WriteLine("Testing 3 layer neural nets with X neurons per layer = { 5, 10, 25, 50, 100 }"); Console.WriteLine("====================================================================================="); Console.WriteLine("\nUsing NewLR = Base LR / (1 + Base LR * T / D) for learning rate. \n"); //set up parameters double LearningRate = 1; //arbitrary number int Seed = 1500; //report Console.WriteLine("\tBase Learning Rate = " + LearningRate); Console.WriteLine("\tNum epochs (T) = 100"); NeuralNet current; //set up the variable for the SVM for the all the tests for (int j = 0; j < NumNeurons.Length; j++) { current = new NeuralNet(LearningRate, Seed, 4, NumNeurons[j], 2); //two hidden layers + 1 output, always Console.WriteLine("\nCreated new three layer Neural Net with " + NumNeurons[j] + " Neurons per layer."); for (int i = 0; i < 20; i++) { current.runEpochs(10, TrainBank); //do 100 epochs Console.WriteLine("\tCompleted " + (i + 1) * 10 + " Epochs."); Console.WriteLine("\tTraining error at " + (i + 1) * 10 + " epochs = " + current.getError(TrainBank)); } Console.WriteLine("\n\tFinal Training error = " + current.getError(TrainBank)); Console.WriteLine("\tTesting error \t= " + current.getError(TestBank)); } /* * Console.WriteLine("-------------------------------------------------------------------------------------\n"); * * Console.WriteLine("Using NewLR = LR/ (1 + T) for learning rate.\n"); * Console.WriteLine("\tBase Learning Rate = " + LearningRate); * Console.WriteLine("\tNum epochs (T) = 100"); * * for (int j = 0; j < 3; j++) * { * current = new SVMGradient(C[j], LearningRate, LearningAdjust, Seed, TrainBank); * * Console.WriteLine("\nCreated new primal learner with C = " + C[j]); * * for (int i = 0; i < 100; i++) * { * if (i % 10 == 9) * { * Console.WriteLine("\tCompleted " + (i + 1) + " Epochs."); * Console.WriteLine("\tTraining error = " + current.getTrainingError()); * } * current.PGradientEpoch(1); //do 100 epochs * * } * * Console.WriteLine("\n\tTraining error = " + current.getTrainingError()); * Console.WriteLine("\tTesting error = " + current.getTestError(TestBank)); * double[] weight = current.getWeight(); * Console.Write("\tWeight = { " + weight[0]); * for (int i = 1; i < weight.Length; i++) * { * Console.Write(", " + weight[i]); * } * Console.Write("}\n"); * Console.WriteLine("\tBias = " + current.getBias()); * } */ //let the user read the stuff on screen. Console.WriteLine("\n\n\nFinished execution. Hit any key to exit."); Console.Read(); }
public static void Main() { List <DAttribute> attributeBank = new List <DAttribute>(7); //Once again, could auto detect, but doing so makes the data harder to read. Furthermore, autodetecting doesn't work for filling in missing values. //below data descriptions come from data-desc.txt, located near the data for this training data. string[] AVariants; //age being numeric means that the actual variants will be figured out at run time. The variant will be overwritten when we pull in the testing data. attributeBank.Add(new DAttribute("age", 0, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "admin.", "unknown", "unemployed", "management", "housemaid", "entrepreneur", "student", "blue-collar", "self-employed", "retired", "technician", "services" }; attributeBank.Add(new DAttribute("job", 1, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "married", "divorced", "single" }; attributeBank.Add(new DAttribute("marital", 2, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "unknown", "secondary", "primary", "tertiary" }; attributeBank.Add(new DAttribute("education", 3, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("default", 4, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeBank.Add(new DAttribute("balance", 5, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("housing", 6, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("loan", 7, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "unknown", "telephone", "cellular" }; attributeBank.Add(new DAttribute("contact", 8, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeBank.Add(new DAttribute("day", 9, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; attributeBank.Add(new DAttribute("month", 10, new List <string>(AVariants), DAttribute.Type.Categorical, false)); attributeBank.Add(new DAttribute("duration", 11, null, DAttribute.Type.BinaryNumeric, false)); attributeBank.Add(new DAttribute("campaign", 12, null, DAttribute.Type.BinaryNumeric, false)); attributeBank.Add(new DAttribute("pdays", 13, null, DAttribute.Type.BinaryNumeric, false)); attributeBank.Add(new DAttribute("previous", 14, null, DAttribute.Type.BinaryNumeric, false)); AVariants = new string[] { "unknown", "other", "failure", "success" }; //If unknown needs to be filled in, remove it from this list. attributeBank.Add(new DAttribute("poutcome", 15, new List <string>(AVariants), DAttribute.Type.Categorical, false)); AVariants = new string[] { "yes", "no" }; attributeBank.Add(new DAttribute("result", 16, new List <string>(AVariants), DAttribute.Type.Categorical, true)); List <Case> TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true); List <Case> TestBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\test.csv", false); if (UseBoost) { StringBuilder output = new StringBuilder(); StringBuilder outputTree = new StringBuilder(); output.Append("T(rees),Training Error,Testing Error\n"); //going to generate a csv file the ensemble learner's performance EnsembleLearner current = null; //initialize. Doesn't matter what to for (int i = 1; i < NumIterations; i++) //Assignment specifies 1000 iterations { current = EnsembleTools.AdaBoost(i, TrainBank, attributeBank); double TrainingError = current.TestEnsembleClassMass(TrainBank, attributeBank); double TestingError = current.TestEnsembleClassMass(TestBank, attributeBank); Console.WriteLine("Built an AdaBoost Learner with " + i + " Trees."); output.Append(i + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true); //reset data since I'm too lazy to copy it } StringBuilder output2 = new StringBuilder(); outputTree.Append("Tree#,Training Error,Testing Error\n"); for (int i = 0; i < NumIterations - 1; i++) { ID3_Node node = current.Trees[i]; double TrainingError = ID3Tools.FindTestError(TrainBank, attributeBank, node); double TestingError = ID3Tools.FindTestError(TestBank, attributeBank, node); int index = i + 1; outputTree.Append(index + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file } Console.WriteLine("Writing all results to Ensemble\\ Learning/TestingData/RunResults/ResultsBankBoost.csv"); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBoost.csv", output.ToString()); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBoostTrees.csv", outputTree.ToString()); } if (UseBag) { StringBuilder output = new StringBuilder(); StringBuilder outputTree = new StringBuilder(); output.Append("T(rees),Training Error,Testing Error\n"); //going to generate a csv file the ensemble learner's performance EnsembleLearner current = null; //initialize. Doesn't matter what to for (int i = 1; i < NumIterations; i++) //Assignment specifies 1000 iterations { current = EnsembleTools.Bagging(i, TrainBank.Count, true, RNGseed, TrainBank, attributeBank); double TrainingError = current.TestEnsembleClassMass(TrainBank, attributeBank); double TestingError = current.TestEnsembleClassMass(TestBank, attributeBank); Console.WriteLine("Built a Bagged Learner with " + i + " Trees."); output.Append(i + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file } StringBuilder output2 = new StringBuilder(); outputTree.Append("Tree#,Training Error,Testing Error\n"); for (int i = 0; i < NumIterations - 1; i++) { ID3_Node node = current.Trees[i]; double TrainingError = ID3Tools.FindTestError(TrainBank, attributeBank, node); double TestingError = ID3Tools.FindTestError(TestBank, attributeBank, node); int index = i + 1; outputTree.Append(index + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file } Console.WriteLine("Writing all results to Ensemble\\ Learning/TestingData/RunResults/ResultsBankBag.csv"); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBagTemp.csv", output.ToString()); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBagTreesTemp.csv", outputTree.ToString()); } if (UseBagBias) { Random Gen = new Random(RNGseed); double averageResult = 0; foreach (Case c in TrainBank) { averageResult += c.AttributeVals[16]; //add target label value } averageResult = averageResult / (double)TrainBank.Count; double AverageTreeVariance = 0; double AverageBagVariance = 0; double AverageTreeBias = 0; double AverageBagBias = 0; StringBuilder output = new StringBuilder(); output.Append("TreeBias,EnsBias,TreeVar,EnsVar\n"); for (int i = 1; i < 101; i++) { List <Case> Sample = EnsembleTools.GetRandomSubset(true, 1000, Gen, TrainBank); //Generate samples without replacement EnsembleLearner current = EnsembleTools.Bagging(1000, 1000, true, RNGseed, Sample, attributeBank); //Generate samples allowing duplicates //Calculate bias first double Bias = 0;//tree foreach (Case c in TrainBank) { // (1 - prediction) ^ 2 if (ID3Tools.TestWithTree(c, current.Trees[0]) != c.AttributeVals[16]) //Incorrect guess { Bias += 1; } } Bias = Bias / (double)TrainBank.Count; output.Append(Bias + ","); AverageTreeBias += Bias; Bias = 0;//Ensemble foreach (Case c in TrainBank) { // (1 - prediction) ^ 2 if (current.TestEnsembleClassificaiton(c, attributeBank[16]) != c.AttributeVals[16]) //Incorrect guess { Bias += 1; } } Bias = Bias / (double)TrainBank.Count; AverageBagBias += Bias; output.Append(Bias + ","); //now variance double Variance = 0;//tree foreach (Case c in TrainBank) { Variance += Math.Pow(ID3Tools.TestWithTree(c, current.Trees[0]) - averageResult, 2); //add target label value } Variance = Variance / (double)(TrainBank.Count); AverageTreeVariance += Variance; output.Append(Variance + ","); Variance = 0;//ensemble foreach (Case c in TrainBank) { Variance += Math.Pow(current.TestEnsembleClassificaiton(c, attributeBank[16]) - averageResult, 2); //add target label value } Variance = Variance / (double)(TrainBank.Count); AverageBagVariance += Variance; output.Append(Variance + "\n"); Console.WriteLine("Completed Bias and Variance calculations for Bagged Learner number " + i); } AverageTreeVariance = AverageTreeVariance / 100; AverageTreeBias = AverageTreeBias / 100; AverageBagVariance = AverageBagVariance / 100; AverageBagBias = AverageBagBias / 100; output.Append("FinalVals\n" + AverageTreeBias + "," + AverageBagBias + "," + AverageTreeVariance + "," + AverageBagVariance); Console.WriteLine(); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBagAnalysis.csv", output.ToString()); } if (UseRandTrees) { for (int numAttributes = 2; numAttributes < 7; numAttributes += 2) { StringBuilder output = new StringBuilder(); StringBuilder output2 = new StringBuilder(); output.Append("T(rees),Training Error,Testing Error\n"); //going to generate a csv file the ensemble learner's performance EnsembleLearner current = null; //initialize. Doesn't matter what to for (int i = 1; i < NumIterations; i++) //Assignment specifies 1000 iterations { current = EnsembleTools.RandomForest(i, TrainBank.Count, true, RNGseed, numAttributes, TrainBank, attributeBank); double TrainingError = current.TestEnsembleClassMass(TrainBank, attributeBank); double TestingError = current.TestEnsembleClassMass(TestBank, attributeBank); Console.WriteLine("Built a Random Forest Learner with " + i + " Trees."); output.Append(i + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file } output2.Append("Tree#,Training Error,Testing Error\n"); for (int i = 0; i < NumIterations - 1; i++) { ID3_Node node = current.Trees[i]; double TrainingError = ID3Tools.FindTestError(TrainBank, attributeBank, node); double TestingError = ID3Tools.FindTestError(TestBank, attributeBank, node); int index = i + 1; output2.Append(index + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file } Console.WriteLine("Writing all results to Ensemble\\ Learning/TestingData/RunResults/ResultsBankRForest.csv"); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankRForest" + numAttributes + ".csv", output.ToString()); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankRForest" + numAttributes + "Trees.csv", output2.ToString()); } } if (UseRForestBias) { Random Gen = new Random(RNGseed); double averageResult = 0; foreach (Case c in TrainBank) { averageResult += c.AttributeVals[16]; //add target label value } averageResult = averageResult / (double)TrainBank.Count; double AverageTreeVariance = 0; double AverageRForestVariance = 0; double AverageTreeBias = 0; double AverageRForestBias = 0; StringBuilder output = new StringBuilder(); output.Append("TreeBias,EnsBias,TreeVar,EnsVar\n"); for (int i = 1; i < 101; i++) { List <Case> Sample = EnsembleTools.GetRandomSubset(true, 1000, Gen, TrainBank); //Generate samples without replacement EnsembleLearner current = EnsembleTools.RandomForest(1000, 1000, true, RNGseed, 4, Sample, attributeBank); //Generate samples allowing duplicates //Calculate bias first double Bias = 0;//tree foreach (Case c in TrainBank) { // (1 - prediction) ^ 2 if (ID3Tools.TestWithTree(c, current.Trees[0]) != c.AttributeVals[16]) //Incorrect guess { Bias += 1; } } Bias = Bias / (double)TrainBank.Count; output.Append(Bias + ","); AverageTreeBias += Bias; Bias = 0;//Ensemble foreach (Case c in TrainBank) { // (1 - prediction) ^ 2 if (current.TestEnsembleClassificaiton(c, attributeBank[16]) != c.AttributeVals[16]) //Incorrect guess { Bias += 1; } } Bias = Bias / (double)TrainBank.Count; AverageRForestBias += Bias; output.Append(Bias + ","); //now variance double Variance = 0;//tree foreach (Case c in TrainBank) { Variance += Math.Pow(ID3Tools.TestWithTree(c, current.Trees[0]) - averageResult, 2); //add target label value } Variance = Variance / (double)(TrainBank.Count); AverageTreeVariance += Variance; output.Append(Variance + ","); Variance = 0;//ensemble foreach (Case c in TrainBank) { Variance += Math.Pow(current.TestEnsembleClassificaiton(c, attributeBank[16]) - averageResult, 2); //add target label value } Variance = Variance / (double)(TrainBank.Count); AverageRForestVariance += Variance; output.Append(Variance + "\n"); Console.WriteLine("Completed Bias and Variance calculations for RForest Learner number " + i); } AverageTreeVariance = AverageTreeVariance / 100; AverageTreeBias = AverageTreeBias / 100; AverageRForestVariance = AverageRForestVariance / 100; AverageRForestBias = AverageRForestBias / 100; output.Append("FinalVals\n" + AverageTreeBias + "," + AverageRForestBias + "," + AverageTreeVariance + "," + AverageRForestVariance); Console.WriteLine(); System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankRForest4Analysis.csv", output.ToString()); } }
public static void Main() { //Attributes for the data DAttribute[] Attributes = new DAttribute[5]; Attributes[0] = new DAttribute("Varaince", 1, null, DAttribute.Type.Numeric, false); Attributes[1] = new DAttribute("Skew", 1, null, DAttribute.Type.Numeric, false); Attributes[2] = new DAttribute("Curtosis", 1, null, DAttribute.Type.Numeric, false); Attributes[3] = new DAttribute("Entropy", 1, null, DAttribute.Type.Numeric, false); Attributes[4] = new DAttribute("Genuine", 1, new List <String>(new String[] { "0", "1" }), DAttribute.Type.Categorical, false); List <Case> TrainBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\train.csv", false); List <Case> TestBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\test.csv", false); StringBuilder output = new StringBuilder(); //start testing here PerceptronLearner NormalPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Normal); output.Append("NormalPerceptron \nTrain,Test\n"); for (int i = 1; i < 12; i++) { NormalPerceptron.SingleEpoch(); //do an epoch then test it double trainError = NormalPerceptron.GetError(TrainBank); double testError = NormalPerceptron.GetError(TestBank); Console.WriteLine("Training error Normal Epoch# " + i + " = " + trainError); Console.WriteLine("Testing error Normal Epoch# " + i + " = " + testError); output.Append(trainError + "," + testError + "\n"); } Console.Write("Final Weight = {"); foreach (double d in NormalPerceptron.getWeight()) { Console.Write(d + ", "); } Console.Write("} with a bias of " + NormalPerceptron.getBias() + ". \n"); PerceptronLearner VotedPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Voted); output.Append("\nVotedPerceptron \nTrain,Test\n"); for (int i = 1; i < 12; i++) { VotedPerceptron.SingleEpoch(); double trainError = VotedPerceptron.GetError(TrainBank); double testError = VotedPerceptron.GetError(TestBank); Console.WriteLine("Training error Voted Epoch# " + i + " = " + trainError); Console.WriteLine("Testing error Voted Epoch# " + i + " = " + testError); output.Append(trainError + "," + testError + "\n"); } Console.Write("Final Weight = {"); foreach (double d in VotedPerceptron.getWeight()) { Console.Write(d + ", "); } Console.Write("} with a bias of " + VotedPerceptron.getBias() + ". \n"); PerceptronLearner AveragedPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Averaged); output.Append("\nAveragedPerceptron \nTrain,Test\n"); for (int i = 1; i < 12; i++) { AveragedPerceptron.SingleEpoch(); double trainError = AveragedPerceptron.GetError(TrainBank); double testError = AveragedPerceptron.GetError(TestBank); Console.WriteLine("Training error Averaged Epoch# " + i + " = " + trainError); Console.WriteLine("Testing error Averaged Epoch# " + i + " = " + testError); output.Append(trainError + "," + testError + "\n"); } Console.Write("Final Weight = {"); foreach (double d in AveragedPerceptron.getWeight()) { Console.Write(d + ", "); } Console.Write("} with a bias of " + AveragedPerceptron.getBias() + ". \n"); PerceptronLearner MarginPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Margin, 6); output.Append("\nMarginPerceptron \nTrain,Test\n"); for (int i = 1; i < 12; i++) { MarginPerceptron.SingleEpoch(); double trainError = MarginPerceptron.GetError(TrainBank); double testError = MarginPerceptron.GetError(TestBank); Console.WriteLine("Training error Margin Epoch# " + i + " = " + trainError); Console.WriteLine("Testing error Margin Epoch# " + i + " = " + testError); output.Append(trainError + "," + testError + "\n"); } Console.Write("Final Weight = {"); foreach (double d in NormalPerceptron.getWeight()) { Console.Write(d + ", "); } Console.Write("} with a bias of " + NormalPerceptron.getBias() + ". \n"); Console.WriteLine("\n\n\n\n\n\n Writing all results to TestingData/RunResults/Perceptron.csv"); System.IO.File.WriteAllText(TestPath + @"/RunResults/Perceptron.csv", output.ToString()); Console.Read(); }