/// <summary> /// /// </summary> /// <param name="dataSet"></param> /// <param name="labels">label name for each feature column</param> /// <param name="node">the parent node</param> private static void CreateTree(List <DataSetItem> dataSet, List <string> labels, DecisionTreeNode node) { var classList = (from r in dataSet select r.ClassName).ToList(); if (classList.Distinct().Count() <= 1) { //stop when all class are equal DecisionTreeNode child = new DecisionTreeNode() { ClassName = classList[0] }; node.ChildrenList.Add(child); return; } if (dataSet[0].Features == null || dataSet[0].Features.Length <= 0) { //no more feature to split,choose the most frequent class as classname DecisionTreeNode child = new DecisionTreeNode() { ClassName = MajorityClass(classList), IsMajority = true }; node.ChildrenList.Add(child); return; } int bestIndex = ChooseBestFeatureToSplit(dataSet); string bestFeatLabel = labels[bestIndex]; labels.RemoveAt(bestIndex); var uniqueVals = (from r in dataSet select r.Features[bestIndex]).Distinct().ToList(); foreach (var featVal in uniqueVals) { DecisionTreeNode child = new DecisionTreeNode() { label = bestFeatLabel, FeatValue = featVal }; node.ChildrenList.Add(child); CreateTree(SplitDataSet(dataSet, bestIndex, featVal), labels.ToList(), child); } }