public static DataSet ConstructFromCsv(string filePath, bool hasClassLabel) { var set = new DataSet(); var contents = File.ReadAllText(filePath); var entries = contents.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); var correctAttrCount = 0; foreach (var entry in entries) { var example = new Example(); var fields = entry.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); // if we have a class label, the last field is not an attribute var attrCount = hasClassLabel ? fields.Length - 1 : fields.Length; // make sure that all entries have the same number of attributes if (correctAttrCount == 0) { correctAttrCount = attrCount; } else if (correctAttrCount != attrCount) { throw new Exception("Invalid CSV entry, wrong number of attributes."); } // add all attributes to example for (var i = 0; i < attrCount; i++) { var value = fields[i].Trim(); example.Attributes[i] = value; if (set.Attributes.Count == i) { set.Attributes.Add(new DataAttribute(i)); } set.Attributes[i].Values.Add(value); } // add class label if we have it if (hasClassLabel) { example.ClassLabel = fields[attrCount].Trim(); } set.Examples.Add(example); } return set; }
/// <summary> /// Recursive down the decision tree using the example's attributes /// until we hit a leaf node, which has the class label /// </summary> private static string GetClassLabel(TreeNode decisionTree, Example testExample) { if (decisionTree.ClassLabel != null) { return decisionTree.ClassLabel; } var exampleAttrVal = testExample.Attributes[decisionTree.ColNum]; var subTree = decisionTree.Children[exampleAttrVal]; return GetClassLabel(subTree, testExample); }