private Node Learn(Instance[] instanceList, Arff.Attribute[] attributeList, Arff.Attribute classAttribute) { // compute the class distribution of all the examples var classDistribution = GetClassDistribution(instanceList, classAttribute); string[] classesWithExamples = classDistribution.Where(a => a.Value > 0).Select(a => a.Key).ToArray(); if (1 == classesWithExamples.Length) { // all examples belong to the same class so we have reached a leaf node Logger.Log(LogLevel.Progress, "."); return new Node() { Label = classesWithExamples[0], Children = null }; } string mostCommonClass = classDistribution.OrderBy(a => a.Value).Last().Key; if (null == attributeList || 0 == attributeList.Length) { // no more attributes to split on Logger.Log(LogLevel.Progress, "."); return new Node() { Label = mostCommonClass, Children = null }; } var decisionAttribute = GetDecisionAttribute(instanceList, attributeList, classDistribution, classAttribute); if (null == decisionAttribute) { // can't find a attribute to split on that passes the split-termination condition Logger.Log(LogLevel.Progress, "."); return new Node() { Label = mostCommonClass, Children = null }; } // recursively build the tree var root = new Node() { Label = decisionAttribute.Name, Children = new Dictionary<string, Node>() }; // pre-process the instances var instanceGroups = instanceList .GroupBy(a => a.Data[decisionAttribute.Name]) .ToDictionary(g => g.Key, v => v.ToArray()); // build the sub-trees foreach (string value in decisionAttribute.Values) { Node childNode = null; if (!instanceGroups.ContainsKey(value) || 0 == instanceGroups[value].Length) { // if there are not example for the node value assign the // label of most instances to the value branch childNode = new Node() { Label = mostCommonClass, Children = null }; root.Children.Add(value, childNode); continue; } // build the subtree recursively childNode = this.Learn( instanceGroups[value], attributeList.Where(a => !a.Name.Equals(decisionAttribute.Name)).ToArray(), classAttribute); root.Children.Add(value, childNode); } return root; }
/// <summary> /// Initalizes a new instance of the DTClassifier class. /// </summary> /// <param name="model">the trained model</param> public DTClassifier(Node model) { this.model = model; }