Exemplo n.º 1
0
        public void TestAutoDetect()
        {
            List <DAttribute> attributes = new List <DAttribute>();
            //read in a simple file with 7 examples with 5 attributes
            List <Case> TestData = DRT.ParseCSV(TestPath + @"\simple\simple1.txt", out attributes);

            Assert.AreEqual(7, TestData.Count);
            Assert.AreEqual(5, attributes.Count);
        }
Exemplo n.º 2
0
        public void TestDepthLimit()
        {
            List <DAttribute> attributes = new List <DAttribute>(5);

            string[] alphabet = new string[2];
            alphabet[0] = "0"; alphabet[1] = "1";
            attributes.Add(new DAttribute("X_1", 0, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("X_2", 1, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("X_3", 2, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("X_4", 3, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("VarFinal", 4, new List <string>(alphabet), false, true));

            List <Case> TestData = DRT.ParseCSV(attributes.ToArray(), TestPath + @"\simple\simple1.txt");


            ID3_Node Tree = ID3Tools.ID3(attributes, TestData, 1, ID3Tools.EntropyCalucalation.IG);

            //it works

            System.Console.WriteLine(Tree.PrintTree(attributes.ToArray()));
            int i = 0; // a line of code on which to wait afterwards.
        }
Exemplo n.º 3
0
        public void TestSimple()
        {
            //Initialize the attributes beforehand to make it more readable when debugging

            List <DAttribute> attributes = new List <DAttribute>(5);

            string[] alphabet = new string[2];
            alphabet[0] = "0"; alphabet[1] = "1";
            attributes.Add(new DAttribute("X_1", 0, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("X_2", 1, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("X_3", 2, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("X_4", 3, new List <string>(alphabet), false, false));
            attributes.Add(new DAttribute("VarFinal", 4, new List <string>(alphabet), false, true));

            List <Case> TestData = DRT.ParseCSV(attributes.ToArray(), TestPath + @"\simple\simple1.txt");


            ID3_Node Tree = ID3Tools.ID3(attributes, TestData, 999, ID3Tools.EntropyCalucalation.IG);

            System.Console.WriteLine(Tree.PrintTree(attributes.ToArray()));

            Assert.AreEqual(0, ID3Tools.TestWithTree(TestData[6], Tree));
        }
Exemplo n.º 4
0
        public static void Main()
        {
            //Attributes for the data
            DAttribute[] Attributes = new DAttribute[5];

            Attributes[0] = new DAttribute("Varaince", 1, null, DAttribute.Type.Numeric, false);
            Attributes[1] = new DAttribute("Skew", 1, null, DAttribute.Type.Numeric, false);
            Attributes[2] = new DAttribute("Curtosis", 1, null, DAttribute.Type.Numeric, false);
            Attributes[3] = new DAttribute("Entropy", 1, null, DAttribute.Type.Numeric, false);

            Attributes[4] = new DAttribute("Genuine", 1, new List <String>(new String[] { "0", "1" }), DAttribute.Type.Categorical, false);

            List <Case> TrainBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\train.csv", false);
            List <Case> TestBank  = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\test.csv", false);

            //Convert output to -1,1 as opposed to 0,1

            Case.ColXtoY(TrainBank, 4, 0, -1);
            Case.ColXtoY(TestBank, 4, 0, -1);

            StringBuilder output = new StringBuilder();

            //begin testing


            double[] C = new double[] { 100.0 / 873.0, 500.0 / 873.0, 700.0 / 873.0 };

            Console.WriteLine("Starting primal Subgradient Decent with C = { 100/873, 500/873, 700/873 }");
            Console.WriteLine("=====================================================================================");

            Console.WriteLine("\nUsing NewLR = LR / (1 + LR * T / D) for learning rate. \n");

            //set up parameters
            double LearningRate   = .2;
            double LearningAdjust = .75;
            int    Seed           = 1500;

            //report
            Console.WriteLine("\tBase Learning Rate = " + LearningRate);
            Console.WriteLine("\tNum epochs (T) = 100");
            Console.WriteLine("\tLearning Rate Adjustment = " + LearningAdjust);

            SVMGradient current; //set up the variable for the SVM for the all the tests

            for (int j = 0; j < 3; j++)
            {
                current = new SVMGradient(C[j], LearningRate, LearningAdjust, Seed, TrainBank);
                Console.WriteLine("\nCreated new primal learner with C = " + C[j]);


                for (int i = 0; i < 100; i++)
                {
                    if (i % 10 == 9)
                    {
                        Console.WriteLine("\tCompleted " + (i + 1) + " Epochs.");
                        Console.WriteLine("\tTraining error = " + current.getTrainingError());
                    }
                    current.PGradientEpoch(1); //do 100 epochs
                }

                Console.WriteLine("\n\tTraining error = " + current.getTrainingError());
                Console.WriteLine("\tTesting error  = " + current.getTestError(TestBank));
                double[] weight = current.getWeight();
                Console.Write("\tWeight = { " + weight[0]);
                for (int i = 1; i < weight.Length; i++)
                {
                    Console.Write(", " + weight[i]);
                }
                Console.Write("}\n");
                Console.WriteLine("\tBias = " + current.getBias());
            }

            Console.WriteLine("-------------------------------------------------------------------------------------\n");

            Console.WriteLine("Using NewLR = LR/ (1 + T) for learning rate.\n");
            Console.WriteLine("\tBase Learning Rate = " + LearningRate);
            Console.WriteLine("\tNum epochs (T) = 100");
            Console.WriteLine("\tLearning Rate Adjustment = " + LearningAdjust);

            for (int j = 0; j < 3; j++)
            {
                current = new SVMGradient(C[j], LearningRate, LearningAdjust, Seed, TrainBank);

                Console.WriteLine("\nCreated new primal learner with C = " + C[j]);

                for (int i = 0; i < 100; i++)
                {
                    if (i % 10 == 9)
                    {
                        Console.WriteLine("\tCompleted " + (i + 1) + " Epochs.");
                        Console.WriteLine("\tTraining error = " + current.getTrainingError());
                    }
                    current.PGradientEpoch(1); //do 100 epochs
                }

                Console.WriteLine("\n\tTraining error = " + current.getTrainingError());
                Console.WriteLine("\tTesting error  = " + current.getTestError(TestBank));
                double[] weight = current.getWeight();
                Console.Write("\tWeight = { " + weight[0]);
                for (int i = 1; i < weight.Length; i++)
                {
                    Console.Write(", " + weight[i]);
                }
                Console.Write("}\n");
                Console.WriteLine("\tBias = " + current.getBias());
            }
            //let the user read the stuff on screen.
            Console.WriteLine("\n\n\nFinished execution. Hit any key to exit.");

            Console.Read();
        }
Exemplo n.º 5
0
        public static void Main()
        {
            // ========= Part 1 ============= //

            if (BuildCarTrees)
            {
                //This is the car example.
                List <DAttribute> attributeCars = new List <DAttribute>(7);
                //while I could auto detect this, it's much easier to read the trees if I name the DataAttributes ahead of time
                //below data descriptions come from data-desc.txt, located near the data for this training data.
                string[] AVariants = new string[] { "vhigh", "high", "med", "low" }; //array of attribute variants to pass in to an attribute


                attributeCars.Add(new DAttribute("buying", 0, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                attributeCars.Add(new DAttribute("maint", 1, new List <string>(AVariants), DAttribute.Type.Categorical, false));

                AVariants = new string[] { "2", "3", "4", "5more" };
                attributeCars.Add(new DAttribute("doors", 2, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "2", "4", "more" };
                attributeCars.Add(new DAttribute("persons", 3, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "small", "med", "big" };
                attributeCars.Add(new DAttribute("lug_boot", 4, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "low", "med", "high" };
                attributeCars.Add(new DAttribute("safety", 5, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "unacc", "acc", "good", "vgood" };
                attributeCars.Add(new DAttribute("label", 6, new List <string>(AVariants), DAttribute.Type.Categorical, true));


                List <Case> TrainCars = DRT.ParseCSV(attributeCars.ToArray(), TestPath + @"\car\train.csv");
                List <Case> TestCars  = DRT.ParseCSV(attributeCars.ToArray(), TestPath + @"\car\test.csv");

                StringBuilder TreeLayout = new StringBuilder();

                for (int depth = 1; depth < 7; depth++)
                {
                    ID3_Node Tree = ID3Tools.ID3(attributeCars, TrainCars, depth, ID3Tools.EntropyCalucalation.IG);
                    //add the tree to the string builder and prepare to write it to a file.

                    Double TrainError = ID3Tools.FindTestError(TrainCars, attributeCars, Tree);
                    Double TestError  = ID3Tools.FindTestError(TestCars, attributeCars, Tree);

                    TreeLayout.Append("Information Gain Cars, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeCars.ToArray()) + "\n ----------------------------------------------------------------- \n");
                    Console.WriteLine("Finished an IG Tree");
                }

                for (int depth = 1; depth < 7; depth++)
                {
                    ID3_Node Tree = ID3Tools.ID3(attributeCars, TrainCars, depth, ID3Tools.EntropyCalucalation.GI);
                    //add the tree to the string builder and prepare to write it to a file.

                    Double TrainError = ID3Tools.FindTestError(TrainCars, attributeCars, Tree);
                    Double TestError  = ID3Tools.FindTestError(TestCars, attributeCars, Tree);

                    TreeLayout.Append("Gini Index Cars, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeCars.ToArray()) + "\n ----------------------------------------------------------------- \n");
                    Console.WriteLine("Finished a GI Tree");
                }

                for (int depth = 1; depth < 7; depth++)
                {
                    ID3_Node Tree = ID3Tools.ID3(attributeCars, TrainCars, depth, ID3Tools.EntropyCalucalation.ME);
                    //add the tree to the string builder and prepare to write it to a file.

                    Double TrainError = ID3Tools.FindTestError(TrainCars, attributeCars, Tree);
                    Double TestError  = ID3Tools.FindTestError(TestCars, attributeCars, Tree);

                    TreeLayout.Append("Majority Error Cars, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeCars.ToArray()) + "\n ----------------------------------------------------------------- \n");
                    Console.WriteLine("Finished an ME Tree");
                }

                Console.WriteLine("Writing all results to DecisionTree/TestingData/RunResults/ResultsCars.txt");
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsCars.txt", TreeLayout.ToString());
            }

            // ========= Part 2 ============= //
            // bank information
            if (BuildBankTrees)
            {
                List <DAttribute> attributeBank = new List <DAttribute>(7);
                //Once again, could auto detect, but doing so makes the data harder to read. Furthermore, autodetecting doesn't work for filling in missing values.
                //below data descriptions come from data-desc.txt, located near the data for this training data.

                string[] AVariants;

                //age being numeric means that the actual variants will be figured out at run time. The variant will be overwritten when we pull in the testing data.
                attributeBank.Add(new DAttribute("age", 0, null, DAttribute.Type.BinaryNumeric, false));
                AVariants = new string[] { "admin.", "unknown", "unemployed", "management", "housemaid", "entrepreneur", "student",
                                           "blue-collar", "self-employed", "retired", "technician", "services" };
                attributeBank.Add(new DAttribute("job", 1, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "married", "divorced", "single" };
                attributeBank.Add(new DAttribute("marital", 2, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "unknown", "secondary", "primary", "tertiary" };
                attributeBank.Add(new DAttribute("education", 3, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "yes", "no" };
                attributeBank.Add(new DAttribute("default", 4, new List <string>(AVariants), DAttribute.Type.Categorical, false));

                attributeBank.Add(new DAttribute("balance", 5, null, DAttribute.Type.BinaryNumeric, false));
                AVariants = new string[] { "yes", "no" };
                attributeBank.Add(new DAttribute("housing", 6, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "yes", "no" };
                attributeBank.Add(new DAttribute("loan", 7, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "unknown", "telephone", "cellular" };
                attributeBank.Add(new DAttribute("contact", 8, new List <string>(AVariants), DAttribute.Type.Categorical, false));

                attributeBank.Add(new DAttribute("day", 9, null, DAttribute.Type.BinaryNumeric, false));
                AVariants = new string[] { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" };
                attributeBank.Add(new DAttribute("month", 10, new List <string>(AVariants), DAttribute.Type.Categorical, false));

                attributeBank.Add(new DAttribute("duration", 11, null, DAttribute.Type.BinaryNumeric, false));

                attributeBank.Add(new DAttribute("campaign", 12, null, DAttribute.Type.BinaryNumeric, false));

                attributeBank.Add(new DAttribute("pdays", 13, null, DAttribute.Type.BinaryNumeric, false));

                attributeBank.Add(new DAttribute("previous", 14, null, DAttribute.Type.BinaryNumeric, false));
                AVariants = new string[] { "unknown", "other", "failure", "success" }; //If unknown needs to be filled in, remove it from this list.
                attributeBank.Add(new DAttribute("poutcome", 15, new List <string>(AVariants), DAttribute.Type.Categorical, false));
                AVariants = new string[] { "yes", "no" };
                attributeBank.Add(new DAttribute("result", 16, new List <string>(AVariants), DAttribute.Type.Categorical, true));

                if (BuildBankTreeNormal)
                {
                    List <Case> TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true);
                    List <Case> TestBank  = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\test.csv", false);

                    StringBuilder TreeLayout = new StringBuilder();

                    for (int depth = 1; depth < 17; depth++)
                    {
                        ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.IG);
                        //add the tree to the string builder and prepare to write it to a file.

                        Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree);
                        Double TestError  = ID3Tools.FindTestError(TestBank, attributeBank, Tree);

                        TreeLayout.Append("Information Gain Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n");
                        Console.WriteLine("Finished an IG Tree");
                    }

                    for (int depth = 1; depth < 17; depth++)
                    {
                        ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.GI);
                        //add the tree to the string builder and prepare to write it to a file.

                        Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree);
                        Double TestError  = ID3Tools.FindTestError(TestBank, attributeBank, Tree);

                        TreeLayout.Append("Gini Index Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n");
                        Console.WriteLine("Finished a GI Tree");
                    }

                    for (int depth = 1; depth < 17; depth++)
                    {
                        ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.ME);
                        //add the tree to the string builder and prepare to write it to a file.

                        Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree);
                        Double TestError  = ID3Tools.FindTestError(TestBank, attributeBank, Tree);

                        TreeLayout.Append("Majority Error Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n");
                        Console.WriteLine("Finished an ME Tree");
                    }

                    Console.WriteLine("Writing all results to DecisionTree/TestingData/RunResults/ResultsBankNormal.txt");
                    System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankNormal.txt", TreeLayout.ToString());
                }
                if (BuildBankMissingVals)
                {
                    //In this case, the "unknown" values in poutcome
                    attributeBank[15] = new DAttribute("poutcome", 15, new List <string>(new string[] { "unknown", "other", "failure", "success" }), DAttribute.Type.Categorical, false);

                    //Now we rebuild all the datasets, which will have elements filled in by the majority elements.
                    List <Case> TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true);
                    List <Case> TestBank  = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\test.csv", false);

                    StringBuilder TreeLayout = new StringBuilder();

                    for (int depth = 1; depth < 17; depth++)
                    {
                        ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.IG);
                        //add the tree to the string builder and prepare to write it to a file.

                        Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree);
                        Double TestError  = ID3Tools.FindTestError(TestBank, attributeBank, Tree);

                        TreeLayout.Append("Information Gain Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n");
                        Console.WriteLine("Finished an IG Tree");
                    }

                    for (int depth = 1; depth < 17; depth++)
                    {
                        ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.GI);
                        //add the tree to the string builder and prepare to write it to a file.

                        Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree);
                        Double TestError  = ID3Tools.FindTestError(TestBank, attributeBank, Tree);

                        TreeLayout.Append("Gini Index Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n");
                        Console.WriteLine("Finished a GI Tree");
                    }

                    for (int depth = 1; depth < 17; depth++)
                    {
                        ID3_Node Tree = ID3Tools.ID3(attributeBank, TrainBank, depth, ID3Tools.EntropyCalucalation.ME);
                        //add the tree to the string builder and prepare to write it to a file.

                        Double TrainError = ID3Tools.FindTestError(TrainBank, attributeBank, Tree);
                        Double TestError  = ID3Tools.FindTestError(TestBank, attributeBank, Tree);

                        TreeLayout.Append("Majority Error Bank, Max Depth of " + depth + ". Test Error = " + TestError + ". TrainError = " + TrainError + " \n \n" + Tree.PrintTree(attributeBank.ToArray()) + "\n ----------------------------------------------------------------- \n");
                        Console.WriteLine("Finished an ME Tree");
                    }

                    Console.WriteLine("Writing all results to DecisionTree/TestingData/RunResults/ResultsBankMissingVals.txt");
                    System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankMissingVals.txt", TreeLayout.ToString());
                }
            }
        }
Exemplo n.º 6
0
        public static void Main()
        {
            //Attributes for the data
            DAttribute[] Attributes = new DAttribute[5];

            Attributes[0] = new DAttribute("Varaince", 1, null, DAttribute.Type.Numeric, false);
            Attributes[1] = new DAttribute("Skew", 1, null, DAttribute.Type.Numeric, false);
            Attributes[2] = new DAttribute("Curtosis", 1, null, DAttribute.Type.Numeric, false);
            Attributes[3] = new DAttribute("Entropy", 1, null, DAttribute.Type.Numeric, false);

            Attributes[4] = new DAttribute("Genuine", 1, new List <String>(new String[] { "0", "1" }), DAttribute.Type.Categorical, false);

            List <Case> TrainBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\train.csv", false);
            List <Case> TestBank  = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\test.csv", false);

            //Convert output to -1,1 as opposed to 0,1

            Case.ColXtoY(TrainBank, 4, 0, -1);
            Case.ColXtoY(TestBank, 4, 0, -1);

            //StringBuilder output = new StringBuilder();

            //begin testing


            int[] NumNeurons = new int[] { 5, 10, 25, 50, 100 };

            Console.WriteLine("Testing 3 layer neural nets with X neurons per layer = { 5, 10, 25, 50, 100 }");
            Console.WriteLine("=====================================================================================");

            Console.WriteLine("\nUsing NewLR = Base LR / (1 + Base LR * T / D) for learning rate. \n");

            //set up parameters
            double LearningRate = 1; //arbitrary number
            int    Seed         = 1500;

            //report
            Console.WriteLine("\tBase Learning Rate = " + LearningRate);
            Console.WriteLine("\tNum epochs (T) = 100");

            NeuralNet current; //set up the variable for the SVM for the all the tests

            for (int j = 0; j < NumNeurons.Length; j++)
            {
                current = new NeuralNet(LearningRate, Seed, 4, NumNeurons[j], 2); //two hidden layers + 1 output, always
                Console.WriteLine("\nCreated new three layer Neural Net with " + NumNeurons[j] + " Neurons per layer.");


                for (int i = 0; i < 20; i++)
                {
                    current.runEpochs(10, TrainBank); //do 100 epochs
                    Console.WriteLine("\tCompleted " + (i + 1) * 10 + " Epochs.");
                    Console.WriteLine("\tTraining error at " + (i + 1) * 10 + " epochs = " + current.getError(TrainBank));
                }

                Console.WriteLine("\n\tFinal Training error = " + current.getError(TrainBank));
                Console.WriteLine("\tTesting error  \t= " + current.getError(TestBank));
            }

            /*
             * Console.WriteLine("-------------------------------------------------------------------------------------\n");
             *
             * Console.WriteLine("Using NewLR = LR/ (1 + T) for learning rate.\n");
             * Console.WriteLine("\tBase Learning Rate = " + LearningRate);
             * Console.WriteLine("\tNum epochs (T) = 100");
             *
             * for (int j = 0; j < 3; j++)
             * {
             *  current = new SVMGradient(C[j], LearningRate, LearningAdjust, Seed, TrainBank);
             *
             *  Console.WriteLine("\nCreated new primal learner with C = " + C[j]);
             *
             *  for (int i = 0; i < 100; i++)
             *  {
             *      if (i % 10 == 9)
             *      {
             *          Console.WriteLine("\tCompleted " + (i + 1) + " Epochs.");
             *          Console.WriteLine("\tTraining error = " + current.getTrainingError());
             *      }
             *      current.PGradientEpoch(1); //do 100 epochs
             *
             *  }
             *
             *  Console.WriteLine("\n\tTraining error = " + current.getTrainingError());
             *  Console.WriteLine("\tTesting error  = " + current.getTestError(TestBank));
             *  double[] weight = current.getWeight();
             *  Console.Write("\tWeight = { " + weight[0]);
             *  for (int i = 1; i < weight.Length; i++)
             *  {
             *      Console.Write(", " + weight[i]);
             *  }
             *  Console.Write("}\n");
             *  Console.WriteLine("\tBias = " + current.getBias());
             * }
             */
            //let the user read the stuff on screen.
            Console.WriteLine("\n\n\nFinished execution. Hit any key to exit.");

            Console.Read();
        }
Exemplo n.º 7
0
        public static void Main()
        {
            List <DAttribute> attributeBank = new List <DAttribute>(7);

            //Once again, could auto detect, but doing so makes the data harder to read. Furthermore, autodetecting doesn't work for filling in missing values.
            //below data descriptions come from data-desc.txt, located near the data for this training data.

            string[] AVariants;

            //age being numeric means that the actual variants will be figured out at run time. The variant will be overwritten when we pull in the testing data.
            attributeBank.Add(new DAttribute("age", 0, null, DAttribute.Type.BinaryNumeric, false));
            AVariants = new string[] { "admin.", "unknown", "unemployed", "management", "housemaid", "entrepreneur", "student",
                                       "blue-collar", "self-employed", "retired", "technician", "services" };
            attributeBank.Add(new DAttribute("job", 1, new List <string>(AVariants), DAttribute.Type.Categorical, false));
            AVariants = new string[] { "married", "divorced", "single" };
            attributeBank.Add(new DAttribute("marital", 2, new List <string>(AVariants), DAttribute.Type.Categorical, false));
            AVariants = new string[] { "unknown", "secondary", "primary", "tertiary" };
            attributeBank.Add(new DAttribute("education", 3, new List <string>(AVariants), DAttribute.Type.Categorical, false));
            AVariants = new string[] { "yes", "no" };
            attributeBank.Add(new DAttribute("default", 4, new List <string>(AVariants), DAttribute.Type.Categorical, false));

            attributeBank.Add(new DAttribute("balance", 5, null, DAttribute.Type.BinaryNumeric, false));
            AVariants = new string[] { "yes", "no" };
            attributeBank.Add(new DAttribute("housing", 6, new List <string>(AVariants), DAttribute.Type.Categorical, false));
            AVariants = new string[] { "yes", "no" };
            attributeBank.Add(new DAttribute("loan", 7, new List <string>(AVariants), DAttribute.Type.Categorical, false));
            AVariants = new string[] { "unknown", "telephone", "cellular" };
            attributeBank.Add(new DAttribute("contact", 8, new List <string>(AVariants), DAttribute.Type.Categorical, false));

            attributeBank.Add(new DAttribute("day", 9, null, DAttribute.Type.BinaryNumeric, false));
            AVariants = new string[] { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" };
            attributeBank.Add(new DAttribute("month", 10, new List <string>(AVariants), DAttribute.Type.Categorical, false));

            attributeBank.Add(new DAttribute("duration", 11, null, DAttribute.Type.BinaryNumeric, false));

            attributeBank.Add(new DAttribute("campaign", 12, null, DAttribute.Type.BinaryNumeric, false));

            attributeBank.Add(new DAttribute("pdays", 13, null, DAttribute.Type.BinaryNumeric, false));

            attributeBank.Add(new DAttribute("previous", 14, null, DAttribute.Type.BinaryNumeric, false));
            AVariants = new string[] { "unknown", "other", "failure", "success" }; //If unknown needs to be filled in, remove it from this list.
            attributeBank.Add(new DAttribute("poutcome", 15, new List <string>(AVariants), DAttribute.Type.Categorical, false));
            AVariants = new string[] { "yes", "no" };
            attributeBank.Add(new DAttribute("result", 16, new List <string>(AVariants), DAttribute.Type.Categorical, true));



            List <Case> TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true);
            List <Case> TestBank  = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\test.csv", false);



            if (UseBoost)
            {
                StringBuilder output     = new StringBuilder();
                StringBuilder outputTree = new StringBuilder();

                output.Append("T(rees),Training Error,Testing Error\n"); //going to generate a csv file the ensemble learner's performance

                EnsembleLearner current = null;                          //initialize. Doesn't matter what to
                for (int i = 1; i < NumIterations; i++)                  //Assignment specifies 1000 iterations
                {
                    current = EnsembleTools.AdaBoost(i, TrainBank, attributeBank);

                    double TrainingError = current.TestEnsembleClassMass(TrainBank, attributeBank);
                    double TestingError  = current.TestEnsembleClassMass(TestBank, attributeBank);


                    Console.WriteLine("Built an AdaBoost Learner with " + i + " Trees.");
                    output.Append(i + "," + TrainingError + "," + TestingError + "\n");                     //write a new line for the CSV file
                    TrainBank = DRT.ParseCSV(attributeBank.ToArray(), TestPath + @"\bank\train.csv", true); //reset data since I'm too lazy to copy it
                }

                StringBuilder output2 = new StringBuilder();
                outputTree.Append("Tree#,Training Error,Testing Error\n");
                for (int i = 0; i < NumIterations - 1; i++)
                {
                    ID3_Node node = current.Trees[i];

                    double TrainingError = ID3Tools.FindTestError(TrainBank, attributeBank, node);
                    double TestingError  = ID3Tools.FindTestError(TestBank, attributeBank, node);

                    int index = i + 1;
                    outputTree.Append(index + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file
                }

                Console.WriteLine("Writing all results to Ensemble\\ Learning/TestingData/RunResults/ResultsBankBoost.csv");
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBoost.csv", output.ToString());
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBoostTrees.csv", outputTree.ToString());
            }

            if (UseBag)
            {
                StringBuilder output     = new StringBuilder();
                StringBuilder outputTree = new StringBuilder();

                output.Append("T(rees),Training Error,Testing Error\n"); //going to generate a csv file the ensemble learner's performance

                EnsembleLearner current = null;                          //initialize. Doesn't matter what to
                for (int i = 1; i < NumIterations; i++)                  //Assignment specifies 1000 iterations
                {
                    current = EnsembleTools.Bagging(i, TrainBank.Count, true, RNGseed, TrainBank, attributeBank);

                    double TrainingError = current.TestEnsembleClassMass(TrainBank, attributeBank);
                    double TestingError  = current.TestEnsembleClassMass(TestBank, attributeBank);

                    Console.WriteLine("Built a Bagged Learner with " + i + " Trees.");

                    output.Append(i + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file
                }

                StringBuilder output2 = new StringBuilder();
                outputTree.Append("Tree#,Training Error,Testing Error\n");
                for (int i = 0; i < NumIterations - 1; i++)
                {
                    ID3_Node node = current.Trees[i];

                    double TrainingError = ID3Tools.FindTestError(TrainBank, attributeBank, node);
                    double TestingError  = ID3Tools.FindTestError(TestBank, attributeBank, node);

                    int index = i + 1;
                    outputTree.Append(index + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file
                }

                Console.WriteLine("Writing all results to Ensemble\\ Learning/TestingData/RunResults/ResultsBankBag.csv");
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBagTemp.csv", output.ToString());
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBagTreesTemp.csv", outputTree.ToString());
            }

            if (UseBagBias)
            {
                Random Gen = new Random(RNGseed);

                double averageResult = 0;
                foreach (Case c in TrainBank)
                {
                    averageResult += c.AttributeVals[16]; //add target label value
                }
                averageResult = averageResult / (double)TrainBank.Count;

                double AverageTreeVariance = 0;
                double AverageBagVariance  = 0;

                double AverageTreeBias = 0;
                double AverageBagBias  = 0;

                StringBuilder output = new StringBuilder();
                output.Append("TreeBias,EnsBias,TreeVar,EnsVar\n");

                for (int i = 1; i < 101; i++)
                {
                    List <Case>     Sample  = EnsembleTools.GetRandomSubset(true, 1000, Gen, TrainBank);               //Generate samples without replacement
                    EnsembleLearner current = EnsembleTools.Bagging(1000, 1000, true, RNGseed, Sample, attributeBank); //Generate samples allowing duplicates
                    //Calculate bias first

                    double Bias = 0;//tree
                    foreach (Case c in TrainBank)
                    {
                        // (1 - prediction) ^ 2
                        if (ID3Tools.TestWithTree(c, current.Trees[0]) != c.AttributeVals[16]) //Incorrect guess
                        {
                            Bias += 1;
                        }
                    }
                    Bias = Bias / (double)TrainBank.Count;
                    output.Append(Bias + ",");
                    AverageTreeBias += Bias;

                    Bias = 0;//Ensemble
                    foreach (Case c in TrainBank)
                    {
                        // (1 - prediction) ^ 2
                        if (current.TestEnsembleClassificaiton(c, attributeBank[16]) != c.AttributeVals[16]) //Incorrect guess
                        {
                            Bias += 1;
                        }
                    }
                    Bias            = Bias / (double)TrainBank.Count;
                    AverageBagBias += Bias;
                    output.Append(Bias + ",");

                    //now variance
                    double Variance = 0;//tree
                    foreach (Case c in TrainBank)
                    {
                        Variance += Math.Pow(ID3Tools.TestWithTree(c, current.Trees[0]) - averageResult, 2); //add target label value
                    }
                    Variance = Variance / (double)(TrainBank.Count);

                    AverageTreeVariance += Variance;
                    output.Append(Variance + ",");


                    Variance = 0;//ensemble
                    foreach (Case c in TrainBank)
                    {
                        Variance += Math.Pow(current.TestEnsembleClassificaiton(c, attributeBank[16]) - averageResult, 2); //add target label value
                    }
                    Variance = Variance / (double)(TrainBank.Count);

                    AverageBagVariance += Variance;
                    output.Append(Variance + "\n");

                    Console.WriteLine("Completed Bias and Variance calculations for Bagged Learner number " + i);
                }

                AverageTreeVariance = AverageTreeVariance / 100;
                AverageTreeBias     = AverageTreeBias / 100;
                AverageBagVariance  = AverageBagVariance / 100;
                AverageBagBias      = AverageBagBias / 100;

                output.Append("FinalVals\n" + AverageTreeBias + "," + AverageBagBias + "," + AverageTreeVariance + "," + AverageBagVariance);
                Console.WriteLine();
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankBagAnalysis.csv", output.ToString());
            }

            if (UseRandTrees)
            {
                for (int numAttributes = 2; numAttributes < 7; numAttributes += 2)
                {
                    StringBuilder output  = new StringBuilder();
                    StringBuilder output2 = new StringBuilder();

                    output.Append("T(rees),Training Error,Testing Error\n"); //going to generate a csv file the ensemble learner's performance

                    EnsembleLearner current = null;                          //initialize. Doesn't matter what to
                    for (int i = 1; i < NumIterations; i++)                  //Assignment specifies 1000 iterations
                    {
                        current = EnsembleTools.RandomForest(i, TrainBank.Count, true, RNGseed, numAttributes, TrainBank, attributeBank);

                        double TrainingError = current.TestEnsembleClassMass(TrainBank, attributeBank);
                        double TestingError  = current.TestEnsembleClassMass(TestBank, attributeBank);

                        Console.WriteLine("Built a Random Forest Learner with " + i + " Trees.");

                        output.Append(i + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file
                    }

                    output2.Append("Tree#,Training Error,Testing Error\n");
                    for (int i = 0; i < NumIterations - 1; i++)
                    {
                        ID3_Node node = current.Trees[i];

                        double TrainingError = ID3Tools.FindTestError(TrainBank, attributeBank, node);
                        double TestingError  = ID3Tools.FindTestError(TestBank, attributeBank, node);

                        int index = i + 1;
                        output2.Append(index + "," + TrainingError + "," + TestingError + "\n"); //write a new line for the CSV file
                    }

                    Console.WriteLine("Writing all results to Ensemble\\ Learning/TestingData/RunResults/ResultsBankRForest.csv");
                    System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankRForest" + numAttributes + ".csv", output.ToString());
                    System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankRForest" + numAttributes + "Trees.csv", output2.ToString());
                }
            }


            if (UseRForestBias)
            {
                Random Gen = new Random(RNGseed);

                double averageResult = 0;
                foreach (Case c in TrainBank)
                {
                    averageResult += c.AttributeVals[16]; //add target label value
                }
                averageResult = averageResult / (double)TrainBank.Count;

                double AverageTreeVariance    = 0;
                double AverageRForestVariance = 0;

                double AverageTreeBias    = 0;
                double AverageRForestBias = 0;

                StringBuilder output = new StringBuilder();
                output.Append("TreeBias,EnsBias,TreeVar,EnsVar\n");

                for (int i = 1; i < 101; i++)
                {
                    List <Case>     Sample  = EnsembleTools.GetRandomSubset(true, 1000, Gen, TrainBank);                       //Generate samples without replacement
                    EnsembleLearner current = EnsembleTools.RandomForest(1000, 1000, true, RNGseed, 4, Sample, attributeBank); //Generate samples allowing duplicates
                    //Calculate bias first

                    double Bias = 0;//tree
                    foreach (Case c in TrainBank)
                    {
                        // (1 - prediction) ^ 2
                        if (ID3Tools.TestWithTree(c, current.Trees[0]) != c.AttributeVals[16]) //Incorrect guess
                        {
                            Bias += 1;
                        }
                    }
                    Bias = Bias / (double)TrainBank.Count;
                    output.Append(Bias + ",");
                    AverageTreeBias += Bias;

                    Bias = 0;//Ensemble
                    foreach (Case c in TrainBank)
                    {
                        // (1 - prediction) ^ 2
                        if (current.TestEnsembleClassificaiton(c, attributeBank[16]) != c.AttributeVals[16]) //Incorrect guess
                        {
                            Bias += 1;
                        }
                    }
                    Bias = Bias / (double)TrainBank.Count;
                    AverageRForestBias += Bias;
                    output.Append(Bias + ",");

                    //now variance
                    double Variance = 0;//tree
                    foreach (Case c in TrainBank)
                    {
                        Variance += Math.Pow(ID3Tools.TestWithTree(c, current.Trees[0]) - averageResult, 2); //add target label value
                    }
                    Variance = Variance / (double)(TrainBank.Count);

                    AverageTreeVariance += Variance;
                    output.Append(Variance + ",");


                    Variance = 0;//ensemble
                    foreach (Case c in TrainBank)
                    {
                        Variance += Math.Pow(current.TestEnsembleClassificaiton(c, attributeBank[16]) - averageResult, 2); //add target label value
                    }
                    Variance = Variance / (double)(TrainBank.Count);

                    AverageRForestVariance += Variance;
                    output.Append(Variance + "\n");

                    Console.WriteLine("Completed Bias and Variance calculations for RForest Learner number " + i);
                }

                AverageTreeVariance    = AverageTreeVariance / 100;
                AverageTreeBias        = AverageTreeBias / 100;
                AverageRForestVariance = AverageRForestVariance / 100;
                AverageRForestBias     = AverageRForestBias / 100;

                output.Append("FinalVals\n" + AverageTreeBias + "," + AverageRForestBias + "," + AverageTreeVariance + "," + AverageRForestVariance);
                Console.WriteLine();
                System.IO.File.WriteAllText(TestPath + @"/RunResults/ResultsBankRForest4Analysis.csv", output.ToString());
            }
        }
Exemplo n.º 8
0
        public static void Main()
        {
            //Attributes for the data
            DAttribute[] Attributes = new DAttribute[5];

            Attributes[0] = new DAttribute("Varaince", 1, null, DAttribute.Type.Numeric, false);
            Attributes[1] = new DAttribute("Skew", 1, null, DAttribute.Type.Numeric, false);
            Attributes[2] = new DAttribute("Curtosis", 1, null, DAttribute.Type.Numeric, false);
            Attributes[3] = new DAttribute("Entropy", 1, null, DAttribute.Type.Numeric, false);

            Attributes[4] = new DAttribute("Genuine", 1, new List <String>(new String[] { "0", "1" }), DAttribute.Type.Categorical, false);

            List <Case> TrainBank = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\train.csv", false);
            List <Case> TestBank  = DRT.ParseCSV(Attributes, TestPath + @"\bank-note\bank-note\test.csv", false);

            StringBuilder output = new StringBuilder();

            //start testing here


            PerceptronLearner NormalPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Normal);

            output.Append("NormalPerceptron \nTrain,Test\n");

            for (int i = 1; i < 12; i++)
            {
                NormalPerceptron.SingleEpoch(); //do an epoch then test it
                double trainError = NormalPerceptron.GetError(TrainBank);
                double testError  = NormalPerceptron.GetError(TestBank);
                Console.WriteLine("Training error Normal Epoch# " + i + " = " + trainError);
                Console.WriteLine("Testing error Normal Epoch# " + i + " = " + testError);

                output.Append(trainError + "," + testError + "\n");
            }

            Console.Write("Final Weight =  {");
            foreach (double d in NormalPerceptron.getWeight())
            {
                Console.Write(d + ", ");
            }

            Console.Write("} with a bias of " + NormalPerceptron.getBias() + ". \n");

            PerceptronLearner VotedPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Voted);

            output.Append("\nVotedPerceptron \nTrain,Test\n");
            for (int i = 1; i < 12; i++)
            {
                VotedPerceptron.SingleEpoch();
                double trainError = VotedPerceptron.GetError(TrainBank);
                double testError  = VotedPerceptron.GetError(TestBank);
                Console.WriteLine("Training error Voted Epoch# " + i + " = " + trainError);
                Console.WriteLine("Testing error Voted Epoch# " + i + " = " + testError);
                output.Append(trainError + "," + testError + "\n");
            }

            Console.Write("Final Weight =  {");
            foreach (double d in VotedPerceptron.getWeight())
            {
                Console.Write(d + ", ");
            }

            Console.Write("} with a bias of " + VotedPerceptron.getBias() + ". \n");

            PerceptronLearner AveragedPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Averaged);

            output.Append("\nAveragedPerceptron \nTrain,Test\n");
            for (int i = 1; i < 12; i++)
            {
                AveragedPerceptron.SingleEpoch();
                double trainError = AveragedPerceptron.GetError(TrainBank);
                double testError  = AveragedPerceptron.GetError(TestBank);
                Console.WriteLine("Training error Averaged Epoch# " + i + " = " + trainError);
                Console.WriteLine("Testing error Averaged Epoch# " + i + " = " + testError);

                output.Append(trainError + "," + testError + "\n");
            }

            Console.Write("Final Weight =  {");
            foreach (double d  in AveragedPerceptron.getWeight())
            {
                Console.Write(d + ", ");
            }

            Console.Write("} with a bias of " + AveragedPerceptron.getBias() + ". \n");

            PerceptronLearner MarginPerceptron = new PerceptronLearner(10, TrainBank, 1, 1500, PerceptronLearner.PType.Margin, 6);

            output.Append("\nMarginPerceptron \nTrain,Test\n");
            for (int i = 1; i < 12; i++)
            {
                MarginPerceptron.SingleEpoch();
                double trainError = MarginPerceptron.GetError(TrainBank);
                double testError  = MarginPerceptron.GetError(TestBank);
                Console.WriteLine("Training error Margin Epoch# " + i + " = " + trainError);
                Console.WriteLine("Testing error Margin Epoch# " + i + " = " + testError);
                output.Append(trainError + "," + testError + "\n");
            }

            Console.Write("Final Weight =  {");
            foreach (double d in NormalPerceptron.getWeight())
            {
                Console.Write(d + ", ");
            }

            Console.Write("} with a bias of " + NormalPerceptron.getBias() + ". \n");

            Console.WriteLine("\n\n\n\n\n\n Writing all results to TestingData/RunResults/Perceptron.csv");
            System.IO.File.WriteAllText(TestPath + @"/RunResults/Perceptron.csv", output.ToString());

            Console.Read();
        }