public void addParent(DecisionVineNode <T, D> node, float[] dist) { if (node != null) { node.AddDistribution(dist); } }
/// <summary> /// Construct a Decision Vine using the LSearch methodology. /// </summary> /// <param name="data">The data to use in training the vine</param> /// <param name="factory">The feature factory to use when creating decision stumps</param> /// <param name="numFeatures">The number of potential features to try</param> /// <param name="numThresholds">The number of thresholds to try per feature</param> /// <param name="maxChildren">The maximum allowed number of children</param> /// <param name="maximumDepth">The maximum depth of the tree</param> /// <param name="maxIterations">The number of optimization iterations to perform per level</param> /// <param name="numLabels">The number of labels found in the data</param> /// <returns>The Decision Vine</returns> public static DecisionVine <T, D> ConstructUsingLSearch(List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int maxChildren, int maximumDepth, int maxIterations, int numLabels) { UpdateManager.WriteLine("Training Decision Vine with {0} data points...", data.Count); DecisionVineNode <T, D> root = new DecisionVineNode <T, D>(); root.Data = data; root.NodeType = NodeType.Branch; root.Distribution = data.ComputeDistribution <T, D>(numLabels); DecisionVineNode <T, D>[][] levels = new DecisionVineNode <T, D> [maximumDepth][]; levels[0] = new DecisionVineNode <T, D>[] { root }; for (int i = 1; i < maximumDepth; i++) { int numChildren = Math.Min(1 << i, maxChildren); int numIterations = numChildren < maxChildren ? 0 : maxIterations; UpdateManager.WriteLine("Training level {0} with {1} children and {2} optimization iterations...", i, numChildren, numIterations); levels[i] = computeLSearchLevel(levels[i - 1], factory, numChildren, numFeatures, numLabels, numThresholds, numIterations); UpdateManager.WriteLine("Level {0} complete with entropy {1}", i, computeEntropy(levels[i])); UpdateManager.WriteLine("Data distribution: [{0}]", string.Join(",", levels[i].Select(o => o.Data.Count))); } foreach (var level in levels[maximumDepth - 1]) { level.Distribution = level.Distribution.Normalize(); } UpdateManager.WriteLine("Complete."); return(new DecisionVine <T, D>(levels)); }
public void removeParent(DecisionVineNode <T, D> node, float[] dist) { if (node != null) { node.RemoveDistribution(dist); } }
private static void findSplit(DecisionVineNode <T, D> node, IFeatureFactory <T, D> factory, float[] leftDistribution, float[] rightDistribution, int numFeatures, int numLabels, int numThresholds) { int dataCount = node.Data.Count; using (ThreadLocal <DeciderState> results = new ThreadLocal <DeciderState>(() => new DeciderState(factory), true)) { Parallel.For(0, numFeatures, i => { results.Value.Current.LoadData(node.Data); float energy = results.Value.Current.ChooseThreshold(numThresholds, numLabels, leftDistribution, rightDistribution); if (energy < results.Value.BestEnergy) { results.Value.Best = results.Value.Current; results.Value.BestEnergy = energy; results.Value.Current = new Decider <T, D>(factory); } }); node.Decider = results.Values.OrderBy(o => o.BestEnergy).First().Best; Decision[] decisions = node.Decider.Decide(node.Data); float[] leftCounts = new float[numLabels]; float[] rightCounts = new float[numLabels]; for (int i = 0; i < decisions.Length; i++) { if (decisions[i] == Decision.Left) { leftCounts[node.Data[i].Label] += 1; } else { rightCounts[node.Data[i].Label] += 1; } } node.LeftCounts = leftCounts; node.RightCounts = rightCounts; } }
private static DecisionVineNode <T, D>[] computeLSearchLevel(DecisionVineNode <T, D>[] parents, IFeatureFactory <T, D> factory, int numChildren, int numFeatures, int numLabels, int numThresholds, int numIterations) { DecisionVineNode <T, D>[] children = new DecisionVineNode <T, D> [numChildren]; // assign children in a greedy manner first int index = 0; Queue <DecisionVineNode <T, D> > parentQueue = new Queue <DecisionVineNode <T, D> >(parents.OrderByDescending(o => o.Data.Count * o.Distribution.CalculateEntropy())); UpdateManager.WriteLine("Initializing children using highest-energy parents..."); while (index < numChildren) { var parent = parentQueue.Dequeue(); findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds); children[index] = parent.Left = new DecisionVineNode <T, D>(); children[index].Index = index++; if (index < numChildren) { children[index] = parent.Right = new DecisionVineNode <T, D>(); children[index].Index = index++; } else { parent.Right = findBestChild(parents, children, parent.RightCounts); } } if (parentQueue.Any()) { UpdateManager.WriteLine("Adding in parents without children..."); // we need to start adding nodes in without increasing the number of children while (parentQueue.Any()) { var parent = parentQueue.Dequeue(); if (parent.NodeType == NodeType.Leaf) { continue; } findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds); parent.Left = findBestChild(parents, children, parent.LeftCounts); parent.Right = findBestChild(parents, children, parent.RightCounts); parent.Left.RemoveDistribution(parent.LeftCounts); parent.Right.RemoveDistribution(parent.RightCounts); findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds); parent.Left.AddDistribution(parent.LeftCounts); parent.Right.AddDistribution(parent.RightCounts); } } UpdateManager.WriteLine("Optimizing..."); // optimize the nodes on this level foreach (int i in UpdateManager.ProgressEnum(Enumerable.Range(0, numIterations))) { var parent = parents.SelectRandom(); if (parent.NodeType == NodeType.Leaf) { continue; } parent.Left = null; parent.Left = findBestChild(parents, children, parent.LeftCounts); parent.Right = null; parent.Right = findBestChild(parents, children, parent.RightCounts); parent.Left.RemoveDistribution(parent.LeftCounts); parent.Right.RemoveDistribution(parent.RightCounts); findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds); parent.Left.AddDistribution(parent.LeftCounts); parent.Right.AddDistribution(parent.RightCounts); } UpdateManager.WriteLine(" Done"); UpdateManager.WriteLine("Portioning out data to children..."); // fill the data for (int i = 0; i < children.Length; i++) { children[i].Data = new List <T>(); } for (int i = 0; i < parents.Length; i++) { var parent = parents[i]; if (parent.NodeType == NodeType.Leaf) { continue; } Decision[] decisions = parent.Decider.Decide(parent.Data); for (int j = 0; j < decisions.Length; j++) { if (decisions[j] == Decision.Left) { parent.Left.Data.Add(parent.Data[j]); } else { parent.Right.Data.Add(parent.Data[j]); } } parent.Data.Clear(); parent.Data = null; } for (int i = 0; i < children.Length; i++) { if (checkDelta(children[i].Data) || children[i].Data.Count < MinimumSupport) { children[i].NodeType = NodeType.Leaf; } else { children[i].NodeType = NodeType.Branch; } } return(children); }