Exemple #1
0
        public double DecisionTreeAccuracyPercentageLib()
        {
            DataTable data = GenerateTestingDataTableLib();

            DataTable symbols = codebook.Apply(data);

            int[][] inputs = DataTableToMatrix(symbols, new string[] { "CAP SHAPE", "CAP SURFACE", "CAP COLOR",
                                                                       "BRUISES", "ODOR", "GILL ATTACHMENT",
                                                                       "GILL SPACING", "GILL SIZE", "GILL COLOR",
                                                                       "STALK SHAPE", "STALK ROOT", "STALK SURFACE ABOVE RING",
                                                                       "STALK SURFACE BELOW RING", "STALK COLOR ABOVE RING", "STALK COLOR BELOW RING",
                                                                       "VEIL TYPE", "VEIL COLOR", "RING NUMBER",
                                                                       "RING TYPE", "SPORE PRINT COLOR", "POPULATION",
                                                                       "HABITAT" });

            int[][] mOutputs = DataTableToMatrix(symbols, new string[] { "TYPE" });
            int[]   outputs  = new int[mOutputs.Length];
            for (int i = 0; i < mOutputs.Length; i++)
            {
                outputs[i] = mOutputs[i][0];
            }

            double error = new ZeroOneLoss(outputs).Loss(decisionTreeLib.Decide(inputs));

            return(1 - error);
        }
Exemple #2
0
        static void Main(string[] args)
        {
            // Create a new reader, opening a given path
            ExcelReader excel     = new ExcelReader("Intake Inf Cohort 2017 - Training Set.xlsx");
            ExcelReader excelTest = new ExcelReader("Intake Inf Cohort 2017 - Test Set.xlsx");

            // Afterwards, we can query the file for all
            // worksheets within the specified workbook:
            string[] sheets     = excel.GetWorksheetList();
            string[] sheetsTest = excelTest.GetWorksheetList();

            // Finally, we can request an specific sheet:
            DataTable data     = excel.GetWorksheet(sheets[0]);
            DataTable dataTest = excelTest.GetWorksheet(sheets[0]);

            // Loop through each column in data
            foreach (DataColumn column in data.Columns)
            {
                // Replace empty with underscore
                column.ColumnName = column.ColumnName.Replace(" ", "_");
            }

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codibook = new Codification(data);

            // Set codibook
            Codification = codibook;

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codibook.Apply(data);

            int[][] inputs = symbols.ToJagged <int>(
                codibook.Columns[5].ColumnName,
                codibook.Columns[7].ColumnName,
                codibook.Columns[8].ColumnName,
                codibook.Columns[9].ColumnName,
                codibook.Columns[12].ColumnName,
                codibook.Columns[13].ColumnName,
                codibook.Columns[14].ColumnName,
                codibook.Columns[15].ColumnName,
                codibook.Columns[16].ColumnName,
                codibook.Columns[20].ColumnName,
                codibook.Columns[29].ColumnName,
                codibook.Columns[30].ColumnName,
                codibook.Columns[34].ColumnName
                );
            int[] outputs = symbols.ToMatrix <int>(codibook.Columns[6].ColumnName).GetColumn(0);

            // Create a teacher ID3 algorithm
            var id3 = new ID3Learning()
            {
                new DecisionVariable(codibook.Columns[5].ColumnName, 2),
                new DecisionVariable(codibook.Columns[7].ColumnName, codibook.Columns[7].NumberOfSymbols),
                new DecisionVariable(codibook.Columns[8].ColumnName, codibook.Columns[8].NumberOfSymbols),
                new DecisionVariable(codibook.Columns[9].ColumnName, 3),
                new DecisionVariable(codibook.Columns[12].ColumnName, 10),
                new DecisionVariable(codibook.Columns[13].ColumnName, 10),
                new DecisionVariable(codibook.Columns[14].ColumnName, 10),
                new DecisionVariable(codibook.Columns[15].ColumnName, 10),
                new DecisionVariable(codibook.Columns[16].ColumnName, 2),
                new DecisionVariable(codibook.Columns[20].ColumnName, 2),
                new DecisionVariable(codibook.Columns[29].ColumnName, 2),
                new DecisionVariable(codibook.Columns[30].ColumnName, 2),
                new DecisionVariable(codibook.Columns[34].ColumnName, 2),
            };

            // Learn the training instances!
            Accord.MachineLearning.DecisionTrees.DecisionTree tree = id3.Learn(inputs, outputs);

            // Create a console table for display
            ConsoleTable table = new ConsoleTable("Studentnumber", "Advice", "Conclusion");

            // Loop through each row in data
            foreach (DataRow row in dataTest.Rows)
            {
                // The tree can now be queried for new examples through
                // its decide method. For example, we can create a query
                int[] query = null;

                try
                {
                    query = codibook.Transform(new[, ]
                    {
                        { codibook.Columns[5].ColumnName, row.ItemArray[5].ToString() },
                        { codibook.Columns[7].ColumnName, row.ItemArray[7].ToString() },
                        { codibook.Columns[8].ColumnName, row.ItemArray[8].ToString() },
                        { codibook.Columns[9].ColumnName, row.ItemArray[9].ToString() },
                        { codibook.Columns[12].ColumnName, row.ItemArray[12].ToString() },
                        { codibook.Columns[13].ColumnName, row.ItemArray[13].ToString() },
                        { codibook.Columns[14].ColumnName, row.ItemArray[14].ToString() },
                        { codibook.Columns[15].ColumnName, row.ItemArray[15].ToString() },
                        { codibook.Columns[16].ColumnName, row.ItemArray[16].ToString() },
                        { codibook.Columns[20].ColumnName, row.ItemArray[20].ToString() },
                        { codibook.Columns[29].ColumnName, row.ItemArray[29].ToString() },
                        { codibook.Columns[30].ColumnName, row.ItemArray[30].ToString() },
                        { codibook.Columns[34].ColumnName, row.ItemArray[34].ToString() },
                    });
                }
                catch (Exception)
                {
                    // Show the result of skipped students
                    var studentnumber = row.ItemArray[0].ToString();
                    var advice        = row.ItemArray[6].ToString();
                    var conclusion    = "(Twijfel)";
                    table.AddRow(studentnumber, advice, conclusion);

                    continue;
                }

                // And then predict the label using
                int predicted = tree.Decide(query);

                // Any predictions off are ignored for consistency
                if (predicted != -1)
                {
                    // We can translate it back to strings using
                    string answer = codibook.Revert("advies", predicted);

                    // Show the result in the output
                    var studentnumber = row.ItemArray[0].ToString();
                    var advice        = row.ItemArray[6].ToString();
                    var conclusion    = answer;
                    table.AddRow(studentnumber, advice, conclusion);
                }
                else
                {
                    // Show the result of skipped students
                    var studentnumber = row.ItemArray[0].ToString();
                    var advice        = row.ItemArray[6].ToString();
                    var conclusion    = "(Twijfel)";
                    table.AddRow(studentnumber, advice, conclusion);
                }
            }

            // Write the table in console
            table.Write();

            // Read Key
            Console.ReadKey();
        }
Exemple #3
0
        public string Decide(Accord.MachineLearning.DecisionTrees.DecisionTree tree, Codification codebook, params double[] query)
        {
            int predicted = tree.Decide(query);

            return(codebook.Revert(Constants.RESULT_COLUMN_NAME, predicted));
        }
        public void same_input_different_output()
        {
            double[][] inputs = new double[][] {
                new double[] { 1 },
                new double[] { 0 },
                new double[] { 2 },
                new double[] { 3 },
                new double[] { 0 },
            };

            int[] outputs = new int[] {
                11,
                00,
                22,
                33,
                01
            };


            DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous) };

            DecisionTree decisionTree = new DecisionTree(variables, 34);
            C45Learning c45Learning = new C45Learning(decisionTree)
            {
                Join = 10,
                MaxHeight = 10
            };
            c45Learning.Run(inputs, outputs); // System.AggregateException thrown here

            int[] actual = decisionTree.Decide(inputs);

            Assert.AreEqual(11, actual[0]);
            Assert.AreEqual(00, actual[1]);
            Assert.AreEqual(22, actual[2]);
            Assert.AreEqual(33, actual[3]);
            Assert.AreEqual(00, actual[4]);
        }
        public void same_input_different_output_minimal()
        {
            double[][] inputs = new double[][] {
                new double[] { 0 },
                new double[] { 0 }
            };

            int[] outputs = new int[] {
                1,
                0
            };


            DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous) };

            DecisionTree decisionTree = new DecisionTree(variables, 2);
            C45Learning c45Learning = new C45Learning(decisionTree);
            c45Learning.Run(inputs, outputs); // System.AggregateException thrown here

            Assert.AreEqual(decisionTree.Decide(new[] { 0 }), 0);
        }
        public void IrisDatasetTest()
        {
            #region doc_iris
            // In this example, we will process the famous Fisher's Iris dataset in 
            // which the task is to classify weather the features of an Iris flower 
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Let's declare the names of our input variables:
            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };

            // Now, we can finally create our tree for the 3 classes:
            var tree = new DecisionTree(inputs: features, classes: 3);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning(tree);

            // And finally induce the tree:
            teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);
            
            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";
            #endregion

            Assert.AreEqual(0.026666666666666668, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }