Beispiel #1
0
        public Accord.MachineLearning.DecisionTrees.DecisionTree Learn(string[][] records, string[] columnNamesWithoutResult, ref Codification codebook)
        {
            DataTable data = new DataTable();

            foreach (var columnName in records[0])
            {
                data.Columns.Add(columnName);
            }

            int rowsAdderCounter = 0;

            foreach (var record in records)
            {
                if (rowsAdderCounter == 0)
                {
                    rowsAdderCounter++;
                    continue;
                }

                data.Rows.Add(record);
            }

            double[][] inputs = data.ToJagged(columnNamesWithoutResult);
            string[]   labels = data.ToArray <string>(Constants.RESULT_COLUMN_NAME);

            int[] outputs = codebook.Translate(Constants.RESULT_COLUMN_NAME, labels);

            var teacher = new C45Learning();

            foreach (var columnName in columnNamesWithoutResult)
            {
                DecisionVariable decVar = new DecisionVariable(columnName, DecisionVariableKind.Continuous);
                teacher.Add(decVar);
            }

            Accord.MachineLearning.DecisionTrees.DecisionTree tree = teacher.Learn(inputs, outputs);

            return(tree);
        }
        public ClassifierTitanic()
        {
            rawData      = new DataTable("Titanic Data");
            trainingData = new DataTable();
            testingData  = new DataTable();

            string filedata = System.IO.File.ReadAllText("../titanicData.txt");

            string[] dataColumns = System.IO.File.ReadAllText("../titanicColumns.txt").Split(',');

            //Input columns are to be learned from
            string[] inputColumns = new string[dataColumns.Length - 1];
            Array.Copy(dataColumns, 0, inputColumns, 0, dataColumns.Length - 1);

            //Output is what we are trying to predict
            string outputColumn = dataColumns[dataColumns.Length - 1];

            //Create an easy way to store and manipulate data
            rawData.Columns.Add(inputColumns);
            rawData.Columns.Add(outputColumn);

            trainingData.Columns.Add(inputColumns);
            trainingData.Columns.Add(outputColumn);

            testingData.Columns.Add(inputColumns);
            testingData.Columns.Add(outputColumn);

            string[] lines = filedata.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                rawData.Rows.Add(line.Split(','));
            }


            //Clean up data representation and missing data
            rawData = cleanData(rawData);

            DataTable[] dt = splitDataForTraining(rawData, .8, inputColumns, outputColumn);
            trainingData = dt[0];
            testingData  = dt[1];

            //---------
            codebook = new Codification(trainingData);

            DataTable symbols = codebook.Apply(trainingData);

            int[][] inputs  = symbols.ToJagged <int>("Pclass", "Title", "Sex", "Age", "SibSp", "Parch", "Fare", "Cabin", "Embarked");
            int[]   outputs = symbols.ToArray <int>("Survived");


            // We can either specify the decision attributes we want
            // manually, or we can ask the codebook to do it for us:
            DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook, inputColumns);

            // Create a teaching algorithm:
            var teacher = new C45Learning();

            teacher.Add(attributes[0]);
            teacher.Add(attributes[1]);
            teacher.Add(attributes[4]);
            teacher.Add(new DecisionVariable("Age", new DoubleRange(0, 99)));
            teacher.Add(new DecisionVariable("SibSp", new DoubleRange(0, 10)));
            teacher.Add(new DecisionVariable("Parch", new DoubleRange(0, 10)));
            teacher.Add(new DecisionVariable("Fare", new DoubleRange(0, 400)));
            teacher.Add(attributes[10]);
            teacher.Add(attributes[11]);

            // and induce a decision tree from the data:
            DecisionTree tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);


            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Survived",
                                             System.Globalization.CultureInfo.InvariantCulture);

            foreach (DataRow d in testingData.Rows)
            {
                int[] tempVars = codebook.Transform(new[, ]
                {
                    { "Pclass", d[0].ToString() },
                    { "Title", d[1].ToString() },
                    { "Sex", d[4].ToString() },
                    { "Cabin", d[10].ToString() },
                    { "Embarked", d[11].ToString() }
                });

                int[] query =
                {
                    tempVars[0],
                    tempVars[1],
                    tempVars[2],
                    int.Parse(d[5].ToString()),
                    int.Parse(d[6].ToString()),
                    int.Parse(d[7].ToString()),
                    int.Parse(d[9].ToString()),
                    tempVars[3],
                    tempVars[4]
                };


                int predictedValue = tree.Decide(query);
                int actualValue    = int.Parse(d[12].ToString());
                if (predictedValue == actualValue)
                {
                    if (actualValue == 1)
                    {
                        truePositives++;
                    }
                    else
                    {
                        trueNegatives++;
                    }
                }
                else
                {
                    if (actualValue == 1)
                    {
                        falseNegatives++;
                    }
                    else
                    {
                        falsePositives++;
                    }
                }
            }


            var dasdfasd = 5;



            //// And the classification error (of 0.0) can be computed as
            //double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            //// To compute a decision for one of the input points,
            ////   such as the 25-th example in the set, we can use
            ////
            //int y = tree.Decide(inputs[25]); // should be 1



            //int[][] inputs = symbols.ToJagged<int>("???");
            //int[] outputs = symbols.ToArray<int>("Survived");

            //string[] decisionVariables = { "???" };
            //DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook, decisionVariables);
            //// Create a teacher ID3 algorithm
            //var id3learning = new ID3Learning(attributes);

            //tree = id3learning.Learn(inputs, outputs);

            //// Compute the training error when predicting training instances
            //double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));
        }