public Accord.MachineLearning.DecisionTrees.DecisionTree Learn(string[][] records, string[] columnNamesWithoutResult, ref Codification codebook) { DataTable data = new DataTable(); foreach (var columnName in records[0]) { data.Columns.Add(columnName); } int rowsAdderCounter = 0; foreach (var record in records) { if (rowsAdderCounter == 0) { rowsAdderCounter++; continue; } data.Rows.Add(record); } double[][] inputs = data.ToJagged(columnNamesWithoutResult); string[] labels = data.ToArray <string>(Constants.RESULT_COLUMN_NAME); int[] outputs = codebook.Translate(Constants.RESULT_COLUMN_NAME, labels); var teacher = new C45Learning(); foreach (var columnName in columnNamesWithoutResult) { DecisionVariable decVar = new DecisionVariable(columnName, DecisionVariableKind.Continuous); teacher.Add(decVar); } Accord.MachineLearning.DecisionTrees.DecisionTree tree = teacher.Learn(inputs, outputs); return(tree); }
public ClassifierTitanic() { rawData = new DataTable("Titanic Data"); trainingData = new DataTable(); testingData = new DataTable(); string filedata = System.IO.File.ReadAllText("../titanicData.txt"); string[] dataColumns = System.IO.File.ReadAllText("../titanicColumns.txt").Split(','); //Input columns are to be learned from string[] inputColumns = new string[dataColumns.Length - 1]; Array.Copy(dataColumns, 0, inputColumns, 0, dataColumns.Length - 1); //Output is what we are trying to predict string outputColumn = dataColumns[dataColumns.Length - 1]; //Create an easy way to store and manipulate data rawData.Columns.Add(inputColumns); rawData.Columns.Add(outputColumn); trainingData.Columns.Add(inputColumns); trainingData.Columns.Add(outputColumn); testingData.Columns.Add(inputColumns); testingData.Columns.Add(outputColumn); string[] lines = filedata.Split( new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (var line in lines) { rawData.Rows.Add(line.Split(',')); } //Clean up data representation and missing data rawData = cleanData(rawData); DataTable[] dt = splitDataForTraining(rawData, .8, inputColumns, outputColumn); trainingData = dt[0]; testingData = dt[1]; //--------- codebook = new Codification(trainingData); DataTable symbols = codebook.Apply(trainingData); int[][] inputs = symbols.ToJagged <int>("Pclass", "Title", "Sex", "Age", "SibSp", "Parch", "Fare", "Cabin", "Embarked"); int[] outputs = symbols.ToArray <int>("Survived"); // We can either specify the decision attributes we want // manually, or we can ask the codebook to do it for us: DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook, inputColumns); // Create a teaching algorithm: var teacher = new C45Learning(); teacher.Add(attributes[0]); teacher.Add(attributes[1]); teacher.Add(attributes[4]); teacher.Add(new DecisionVariable("Age", new DoubleRange(0, 99))); teacher.Add(new DecisionVariable("SibSp", new DoubleRange(0, 10))); teacher.Add(new DecisionVariable("Parch", new DoubleRange(0, 10))); teacher.Add(new DecisionVariable("Fare", new DoubleRange(0, 400))); teacher.Add(attributes[10]); teacher.Add(attributes[11]); // and induce a decision tree from the data: DecisionTree tree = teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Survived", System.Globalization.CultureInfo.InvariantCulture); foreach (DataRow d in testingData.Rows) { int[] tempVars = codebook.Transform(new[, ] { { "Pclass", d[0].ToString() }, { "Title", d[1].ToString() }, { "Sex", d[4].ToString() }, { "Cabin", d[10].ToString() }, { "Embarked", d[11].ToString() } }); int[] query = { tempVars[0], tempVars[1], tempVars[2], int.Parse(d[5].ToString()), int.Parse(d[6].ToString()), int.Parse(d[7].ToString()), int.Parse(d[9].ToString()), tempVars[3], tempVars[4] }; int predictedValue = tree.Decide(query); int actualValue = int.Parse(d[12].ToString()); if (predictedValue == actualValue) { if (actualValue == 1) { truePositives++; } else { trueNegatives++; } } else { if (actualValue == 1) { falseNegatives++; } else { falsePositives++; } } } var dasdfasd = 5; //// And the classification error (of 0.0) can be computed as //double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); //// To compute a decision for one of the input points, //// such as the 25-th example in the set, we can use //// //int y = tree.Decide(inputs[25]); // should be 1 //int[][] inputs = symbols.ToJagged<int>("???"); //int[] outputs = symbols.ToArray<int>("Survived"); //string[] decisionVariables = { "???" }; //DecisionVariable[] attributes = DecisionVariable.FromCodebook(codebook, decisionVariables); //// Create a teacher ID3 algorithm //var id3learning = new ID3Learning(attributes); //tree = id3learning.Learn(inputs, outputs); //// Compute the training error when predicting training instances //double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); }