public static DataSet ConstructFromCsv(string filePath, bool hasClassLabel) { var set = new DataSet(); var contents = File.ReadAllText(filePath); var entries = contents.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); var correctAttrCount = 0; foreach (var entry in entries) { var example = new Example(); var fields = entry.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); // if we have a class label, the last field is not an attribute var attrCount = hasClassLabel ? fields.Length - 1 : fields.Length; // make sure that all entries have the same number of attributes if (correctAttrCount == 0) { correctAttrCount = attrCount; } else if (correctAttrCount != attrCount) { throw new Exception("Invalid CSV entry, wrong number of attributes."); } // add all attributes to example for (var i = 0; i < attrCount; i++) { var value = fields[i].Trim(); example.Attributes[i] = value; if (set.Attributes.Count == i) { set.Attributes.Add(new DataAttribute(i)); } set.Attributes[i].Values.Add(value); } // add class label if we have it if (hasClassLabel) { example.ClassLabel = fields[attrCount].Trim(); } set.Examples.Add(example); } return set; }
/// <summary> /// Classifies test data examples using decision tree, and outputs results to stdout. /// </summary> public static void Run(TreeNode decisionTree, DataSet testData) { try { Console.WriteLine(); foreach (var example in testData.Examples) { var classLabel = GetClassLabel(decisionTree, example); var attrs = example.Attributes.OrderBy(x => x.Key).Select(x => x.Value); Console.WriteLine("{0} ==> {1}", string.Join(",", attrs), classLabel); } } catch // we will only get an exception if the inputs were invalid { throw new Exception("Test data cannot be used with decision tree - one of them is invalid."); } }
/// <summary> /// Construct decision tree from training data /// </summary> public static TreeNode ConstructDecisionTree(DataSet trainingData) { return LearnInternal(trainingData.Examples, trainingData.Attributes, new List<Example>()); }