Example #1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="dataSet"></param>
        /// <param name="labels">label name for each feature column</param>
        /// <param name="node">the parent node</param>
        private static void CreateTree(List <DataSetItem> dataSet, List <string> labels, DecisionTreeNode node)
        {
            var classList = (from r in dataSet select r.ClassName).ToList();

            if (classList.Distinct().Count() <= 1)
            {
                //stop when all class are equal
                DecisionTreeNode child = new DecisionTreeNode()
                {
                    ClassName = classList[0]
                };
                node.ChildrenList.Add(child);
                return;
            }

            if (dataSet[0].Features == null || dataSet[0].Features.Length <= 0)
            {
                //no more feature to split,choose the most frequent class as classname
                DecisionTreeNode child = new DecisionTreeNode()
                {
                    ClassName = MajorityClass(classList), IsMajority = true
                };
                node.ChildrenList.Add(child);
                return;
            }

            int    bestIndex     = ChooseBestFeatureToSplit(dataSet);
            string bestFeatLabel = labels[bestIndex];

            labels.RemoveAt(bestIndex);
            var uniqueVals = (from r in dataSet select r.Features[bestIndex]).Distinct().ToList();

            foreach (var featVal in uniqueVals)
            {
                DecisionTreeNode child = new DecisionTreeNode()
                {
                    label = bestFeatLabel, FeatValue = featVal
                };
                node.ChildrenList.Add(child);
                CreateTree(SplitDataSet(dataSet, bestIndex, featVal), labels.ToList(), child);
            }
        }