private static DecisionTreeNode <T, D> computeDepthFirst(DecisionTreeNode <T, D> node, List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int numLabels, float[] labelWeights, int depth) { GC.Collect(); if (data.Count == 0) { UpdateManager.WriteLine("No data at depth {0}", depth); return(null); } if (data[0] is IComparable <T> ) { data.Sort(); } if (checkDelta(data)) { UpdateManager.WriteLine("Delta function at depth {0}", depth); int label = data[0].Label; float[] dist = new float[numLabels]; dist[label] = 1; return(new DecisionTreeNode <T, D>(dist)); } int dataCount = data.Count; Decider <T, D> bestDecider = null; float bestScore = float.MinValue; float[] bestLeftDistribution = null; float[] bestRightDistribution = null; using (ThreadLocal <DecisionResult> results = new ThreadLocal <DecisionResult>(() => new DecisionResult { Score = bestScore }, true)) { Parallel.For(0, numFeatures, i => { float[] leftDistribution; float[] rightDistribution; Decider <T, D> decider = new Decider <T, D>(factory); decider.LoadData(data); float score = decider.ChooseThreshold(numThresholds, numLabels, labelWeights, out leftDistribution, out rightDistribution); if (score > results.Value.Score) { results.Value = new DecisionResult { LeftDistribution = leftDistribution, RightDistribution = rightDistribution, Decider = decider, Score = score }; } }); foreach (var result in results.Values) { if (result.Score > bestScore) { bestLeftDistribution = result.LeftDistribution; bestRightDistribution = result.RightDistribution; bestDecider = result.Decider; bestScore = result.Score; } } } float support = 0; if (labelWeights != null) { foreach (T point in data) { support += labelWeights[point.Label]; } } else { support = dataCount; } if (bestScore == float.MinValue || dataCount < MinimumSupport) { UpdateManager.WriteLine("Stopping due to lack of data at depth {0}, {1} < {2}", depth, dataCount, MinimumSupport); float[] distribution = new float[numLabels]; for (int i = 0; i < dataCount; i++) { distribution[data[i].Label]++; } if (labelWeights != null) { for (int i = 0; i < distribution.Length; i++) { distribution[i] *= labelWeights[i]; } } return(new DecisionTreeNode <T, D>(distribution)); } if (depth == MaximumDepth - 2) { UpdateManager.WriteLine("Last branch node trained at depth {0}", depth); node.Left = new DecisionTreeNode <T, D>(bestLeftDistribution); node.Right = new DecisionTreeNode <T, D>(bestRightDistribution); node.NodeType = NodeType.Branch; node.Decider = bestDecider; return(node); } Decision[] decisions = bestDecider.Decide(data); List <T> leftData = new List <T>(); List <T> rightData = new List <T>(); for (int i = 0; i < decisions.Length; i++) { if (decisions[i] == Decision.Left) { leftData.Add(data[i]); } else { rightData.Add(data[i]); } } if (leftData.Count == 0 || rightData.Count == 0) { throw new Exception("Error"); } UpdateManager.WriteLine("Branch node at depth {0} trained.", depth); node.Left = computeDepthFirst(new DecisionTreeNode <T, D>(), leftData, factory, numFeatures, numThresholds, numLabels, labelWeights, depth + 1); node.Right = computeDepthFirst(new DecisionTreeNode <T, D>(), rightData, factory, numFeatures, numThresholds, numLabels, labelWeights, depth + 1); node.Decider = bestDecider; node.NodeType = NodeType.Branch; return(node); }
public DecisionTreeNode(Decider <T, D> decider, DecisionTreeNode <T, D> left, DecisionTreeNode <T, D> right) { _type = NodeType.Branch; _decider = decider; _left = left; _right = right; }