Beispiel #1
0
        internal static Decision GetBestDecisionFromDataSet(IReadOnlyCollection <DataSetObject> dataSet, out NodeSplit splitResult)
        {
            var      currentEntropy = dataSet.GetEntropy();
            double   minEntropy     = currentEntropy;
            Decision bestDecision   = k => true;

            splitResult = new NodeSplit(dataSet, Array.Empty <DataSetObject>());
            foreach (var obj in dataSet)
            {
                for (int i = 0; i < obj.Features.Length; i++)
                {
                    int      j            = i;
                    Decision lessDecision = k => obj.Features[j] < k.Features[j];

                    var split = dataSet.SplitByDecision(lessDecision);
                    var lessDecisionEntropy = split.GetWeightedEntropy();

                    if (lessDecisionEntropy < minEntropy)
                    {
                        (minEntropy, bestDecision, splitResult) = (lessDecisionEntropy, lessDecision, split);
                    }
                }
            }

            return(bestDecision);
        }
Beispiel #2
0
        private static DecisionTreeItem InitializeTreeItem(Decision decision, IReadOnlyCollection <DataSetObject> dataSet, NodeSplit curSplit, int levelAvailableCount)
        {
            var label = dataSet.First().Label;

            if (dataSet.All(k => k.Label == label))
            {
                return(new DecisionTreeLeaf(dataSet.First().Label));
            }

            if (levelAvailableCount == 1)
            {
                return(new DecisionTreeLeaf(dataSet.GetClassByMaxCount()));
            }

            levelAvailableCount--;

            var leftDecision = DecisionTreeItem.GetBestDecisionFromDataSet(curSplit.LeftPart, out var leftSplit);
            var leftChild    = InitializeTreeItem(leftDecision, curSplit.LeftPart, leftSplit, levelAvailableCount);

            var rightDecision = DecisionTreeItem.GetBestDecisionFromDataSet(curSplit.RightPart, out var rightSplit);
            var rightChild    = InitializeTreeItem(rightDecision, curSplit.RightPart, rightSplit, levelAvailableCount);

            return(new DecisionTreeNode(leftChild, rightChild, decision, dataSet.GetClassByMaxCount()));
        }