internal static Decision GetBestDecisionFromDataSet(IReadOnlyCollection <DataSetObject> dataSet, out NodeSplit splitResult) { var currentEntropy = dataSet.GetEntropy(); double minEntropy = currentEntropy; Decision bestDecision = k => true; splitResult = new NodeSplit(dataSet, Array.Empty <DataSetObject>()); foreach (var obj in dataSet) { for (int i = 0; i < obj.Features.Length; i++) { int j = i; Decision lessDecision = k => obj.Features[j] < k.Features[j]; var split = dataSet.SplitByDecision(lessDecision); var lessDecisionEntropy = split.GetWeightedEntropy(); if (lessDecisionEntropy < minEntropy) { (minEntropy, bestDecision, splitResult) = (lessDecisionEntropy, lessDecision, split); } } } return(bestDecision); }
private static DecisionTreeItem InitializeTreeItem(Decision decision, IReadOnlyCollection <DataSetObject> dataSet, NodeSplit curSplit, int levelAvailableCount) { var label = dataSet.First().Label; if (dataSet.All(k => k.Label == label)) { return(new DecisionTreeLeaf(dataSet.First().Label)); } if (levelAvailableCount == 1) { return(new DecisionTreeLeaf(dataSet.GetClassByMaxCount())); } levelAvailableCount--; var leftDecision = DecisionTreeItem.GetBestDecisionFromDataSet(curSplit.LeftPart, out var leftSplit); var leftChild = InitializeTreeItem(leftDecision, curSplit.LeftPart, leftSplit, levelAvailableCount); var rightDecision = DecisionTreeItem.GetBestDecisionFromDataSet(curSplit.RightPart, out var rightSplit); var rightChild = InitializeTreeItem(rightDecision, curSplit.RightPart, rightSplit, levelAvailableCount); return(new DecisionTreeNode(leftChild, rightChild, decision, dataSet.GetClassByMaxCount())); }