Ejemplo n.º 1
0
 public void addParent(DecisionVineNode <T, D> node, float[] dist)
 {
     if (node != null)
     {
         node.AddDistribution(dist);
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Construct a Decision Vine using the LSearch methodology.
        /// </summary>
        /// <param name="data">The data to use in training the vine</param>
        /// <param name="factory">The feature factory to use when creating decision stumps</param>
        /// <param name="numFeatures">The number of potential features to try</param>
        /// <param name="numThresholds">The number of thresholds to try per feature</param>
        /// <param name="maxChildren">The maximum allowed number of children</param>
        /// <param name="maximumDepth">The maximum depth of the tree</param>
        /// <param name="maxIterations">The number of optimization iterations to perform per level</param>
        /// <param name="numLabels">The number of labels found in the data</param>
        /// <returns>The Decision Vine</returns>
        public static DecisionVine <T, D> ConstructUsingLSearch(List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int maxChildren, int maximumDepth, int maxIterations, int numLabels)
        {
            UpdateManager.WriteLine("Training Decision Vine with {0} data points...", data.Count);
            DecisionVineNode <T, D> root = new DecisionVineNode <T, D>();

            root.Data         = data;
            root.NodeType     = NodeType.Branch;
            root.Distribution = data.ComputeDistribution <T, D>(numLabels);
            DecisionVineNode <T, D>[][] levels = new DecisionVineNode <T, D> [maximumDepth][];
            levels[0] = new DecisionVineNode <T, D>[] { root };
            for (int i = 1; i < maximumDepth; i++)
            {
                int numChildren   = Math.Min(1 << i, maxChildren);
                int numIterations = numChildren < maxChildren ? 0 : maxIterations;
                UpdateManager.WriteLine("Training level {0} with {1} children and {2} optimization iterations...", i, numChildren, numIterations);
                levels[i] = computeLSearchLevel(levels[i - 1], factory, numChildren, numFeatures, numLabels, numThresholds, numIterations);
                UpdateManager.WriteLine("Level {0} complete with entropy {1}", i, computeEntropy(levels[i]));
                UpdateManager.WriteLine("Data distribution: [{0}]", string.Join(",", levels[i].Select(o => o.Data.Count)));
            }
            foreach (var level in levels[maximumDepth - 1])
            {
                level.Distribution = level.Distribution.Normalize();
            }

            UpdateManager.WriteLine("Complete.");

            return(new DecisionVine <T, D>(levels));
        }
Ejemplo n.º 3
0
 public void removeParent(DecisionVineNode <T, D> node, float[] dist)
 {
     if (node != null)
     {
         node.RemoveDistribution(dist);
     }
 }
Ejemplo n.º 4
0
        private static void findSplit(DecisionVineNode <T, D> node, IFeatureFactory <T, D> factory, float[] leftDistribution, float[] rightDistribution, int numFeatures, int numLabels, int numThresholds)
        {
            int dataCount = node.Data.Count;

            using (ThreadLocal <DeciderState> results = new ThreadLocal <DeciderState>(() => new DeciderState(factory), true))
            {
                Parallel.For(0, numFeatures, i =>
                {
                    results.Value.Current.LoadData(node.Data);
                    float energy = results.Value.Current.ChooseThreshold(numThresholds, numLabels, leftDistribution, rightDistribution);
                    if (energy < results.Value.BestEnergy)
                    {
                        results.Value.Best       = results.Value.Current;
                        results.Value.BestEnergy = energy;
                        results.Value.Current    = new Decider <T, D>(factory);
                    }
                });
                node.Decider = results.Values.OrderBy(o => o.BestEnergy).First().Best;

                Decision[] decisions   = node.Decider.Decide(node.Data);
                float[]    leftCounts  = new float[numLabels];
                float[]    rightCounts = new float[numLabels];
                for (int i = 0; i < decisions.Length; i++)
                {
                    if (decisions[i] == Decision.Left)
                    {
                        leftCounts[node.Data[i].Label] += 1;
                    }
                    else
                    {
                        rightCounts[node.Data[i].Label] += 1;
                    }
                }
                node.LeftCounts  = leftCounts;
                node.RightCounts = rightCounts;
            }
        }
Ejemplo n.º 5
0
        private static DecisionVineNode <T, D>[] computeLSearchLevel(DecisionVineNode <T, D>[] parents, IFeatureFactory <T, D> factory, int numChildren, int numFeatures, int numLabels, int numThresholds, int numIterations)
        {
            DecisionVineNode <T, D>[] children = new DecisionVineNode <T, D> [numChildren];

            // assign children in a greedy manner first
            int index = 0;
            Queue <DecisionVineNode <T, D> > parentQueue = new Queue <DecisionVineNode <T, D> >(parents.OrderByDescending(o => o.Data.Count * o.Distribution.CalculateEntropy()));

            UpdateManager.WriteLine("Initializing children using highest-energy parents...");
            while (index < numChildren)
            {
                var parent = parentQueue.Dequeue();
                findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds);
                children[index]       = parent.Left = new DecisionVineNode <T, D>();
                children[index].Index = index++;
                if (index < numChildren)
                {
                    children[index]       = parent.Right = new DecisionVineNode <T, D>();
                    children[index].Index = index++;
                }
                else
                {
                    parent.Right = findBestChild(parents, children, parent.RightCounts);
                }
            }

            if (parentQueue.Any())
            {
                UpdateManager.WriteLine("Adding in parents without children...");
                // we need to start adding nodes in without increasing the number of children
                while (parentQueue.Any())
                {
                    var parent = parentQueue.Dequeue();

                    if (parent.NodeType == NodeType.Leaf)
                    {
                        continue;
                    }

                    findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds);

                    parent.Left  = findBestChild(parents, children, parent.LeftCounts);
                    parent.Right = findBestChild(parents, children, parent.RightCounts);

                    parent.Left.RemoveDistribution(parent.LeftCounts);
                    parent.Right.RemoveDistribution(parent.RightCounts);
                    findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds);
                    parent.Left.AddDistribution(parent.LeftCounts);
                    parent.Right.AddDistribution(parent.RightCounts);
                }
            }

            UpdateManager.WriteLine("Optimizing...");
            // optimize the nodes on this level
            foreach (int i in UpdateManager.ProgressEnum(Enumerable.Range(0, numIterations)))
            {
                var parent = parents.SelectRandom();

                if (parent.NodeType == NodeType.Leaf)
                {
                    continue;
                }

                parent.Left  = null;
                parent.Left  = findBestChild(parents, children, parent.LeftCounts);
                parent.Right = null;
                parent.Right = findBestChild(parents, children, parent.RightCounts);

                parent.Left.RemoveDistribution(parent.LeftCounts);
                parent.Right.RemoveDistribution(parent.RightCounts);
                findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds);
                parent.Left.AddDistribution(parent.LeftCounts);
                parent.Right.AddDistribution(parent.RightCounts);
            }
            UpdateManager.WriteLine(" Done");

            UpdateManager.WriteLine("Portioning out data to children...");
            // fill the data
            for (int i = 0; i < children.Length; i++)
            {
                children[i].Data = new List <T>();
            }

            for (int i = 0; i < parents.Length; i++)
            {
                var parent = parents[i];

                if (parent.NodeType == NodeType.Leaf)
                {
                    continue;
                }

                Decision[] decisions = parent.Decider.Decide(parent.Data);
                for (int j = 0; j < decisions.Length; j++)
                {
                    if (decisions[j] == Decision.Left)
                    {
                        parent.Left.Data.Add(parent.Data[j]);
                    }
                    else
                    {
                        parent.Right.Data.Add(parent.Data[j]);
                    }
                }
                parent.Data.Clear();
                parent.Data = null;
            }

            for (int i = 0; i < children.Length; i++)
            {
                if (checkDelta(children[i].Data) || children[i].Data.Count < MinimumSupport)
                {
                    children[i].NodeType = NodeType.Leaf;
                }
                else
                {
                    children[i].NodeType = NodeType.Branch;
                }
            }

            return(children);
        }