public void Learn() { var rootMask = new BitArrayX(input.Length, true); Root = new DecisionNode { SubsetMask = new SubsetMask(rootMask, input.Length, output, outputFactorsCount), UsedFactors = new BitArrayX(inputFactorsCount) }; var stack = new Stack <DecisionNode>(); stack.Push(Root); while (stack.Count > 0) { var node = stack.Pop(); SplitNode(node); if (node.IsLeaf) { continue; } stack.Push(node.Present); stack.Push(node.Absent); } }
private static IEnumerable <DecisionNode> GetChildren(DecisionNode node) { if (!node.IsLeaf) { yield return(node.Present); yield return(node.Absent); } }
private string ToString(DecisionNode node) { var factor = node.IsLeaf ? OutputFactors[node.Factor] : InputFactors[node.Factor]; return($"[{indices[node]}] " + (node.IsLeaf ? $"Return {factor}" : $"If {factor} present {indices[node.Present]}, absent {indices[node.Absent]}")); }
private void SplitNode(DecisionNode node) { var mask = node.SubsetMask.Mask; var size = node.SubsetMask.Size; if (node.SubsetMask.EntropyZeroFactor >= 0) { node.IsLeaf = true; node.Factor = node.SubsetMask.EntropyZeroFactor; return; } var bestInformationGain = double.MinValue; var bestFactor = -1; var bestPresentSplitMask = default(SubsetMask); var bestAbsentSplitMask = default(SubsetMask); for (var inputFactor = 0; inputFactor < inputFactorsCount; inputFactor++) { if (node.UsedFactors[inputFactor]) { continue; } var factorPresentMask = new BitArrayX(mask).And(factorPresentMasks[inputFactor]); var factorAbsentMask = new BitArrayX(mask).AndNot(factorPresentMask); var factorPresentCount = factorPresentMask.CountBitSet(); var factorAbsentCount = size - factorPresentCount; var factorPresentSubset = new SubsetMask(factorPresentMask, factorPresentCount, output, outputFactorsCount); var factorAbsentSubset = new SubsetMask(factorAbsentMask, factorAbsentCount, output, outputFactorsCount); var informationGain = node.SubsetMask.Entropy - (factorPresentSubset.Entropy * factorPresentCount / size) - (factorAbsentSubset.Entropy * factorAbsentCount / size); if (informationGain > bestInformationGain) { bestInformationGain = informationGain; bestFactor = inputFactor; bestPresentSplitMask = factorPresentSubset; bestAbsentSplitMask = factorAbsentSubset; } } if (bestFactor == -1) { var counts = new int[outputFactorsCount]; for (var i = 0; i < output.Length; i++) { if (mask[i]) { counts[output[i]]++; } } node.IsLeaf = true; node.Factor = counts.IndexOfMax(); return; } var childUsedFactors = new BitArrayX(node.UsedFactors) { [bestFactor] = true }; node.Factor = bestFactor; node.Present = new DecisionNode { SubsetMask = bestPresentSplitMask, Factor = bestFactor, Parent = node, UsedFactors = childUsedFactors, }; node.Absent = new DecisionNode { SubsetMask = bestAbsentSplitMask, Factor = bestFactor, Parent = node, UsedFactors = childUsedFactors, }; }