Ejemplo n.º 1
0
        public FeatureRegistry Add(IFeatureFactory featureFactory)
        {
            if (featureFactory == null)
            {
                throw new ArgumentNullException(nameof(featureFactory));
            }
            if (_featureFactoriesById.ContainsKey(featureFactory.FeatureId))
            {
                throw new InvalidOperationException("A factory for the given " + nameof(featureFactory.FeatureId) + " already exists.");
            }

            var featureFactoryType = featureFactory.GetType();

            if (_featureFactoriesByType.ContainsKey(featureFactoryType))
            {
                throw new InvalidOperationException("A factory for the given type " + featureFactoryType.FullName + " already exists.");
            }

            var featureFactories = new Dictionary <Guid, IFeatureFactory>(_featureFactoriesById)
            {
                { featureFactory.FeatureId, featureFactory }
            };
            var featureFactoriesByType = new Dictionary <Type, IFeatureFactory>(_featureFactoriesByType)
            {
                { featureFactoryType, featureFactory }
            };

            return(new FeatureRegistry(featureFactories, featureFactoriesByType, Default));
        }
Ejemplo n.º 2
0
        public FileBrowserViewModel(IServiceProvider serviceProvider, ILog log, IMessageService messageService,
                                    IOptionsService optionsService, ISolutionProcessor solutionProcessor, IFileTypeResolver fileTypeResolver,
                                    ISearchMatchService searchMatchService, IShellHelperService shellHelperService,
                                    IShellImageService shellImageService, IUtilsService utilsService, IFeatureFactory featureFactory)
            : base(KnownFeature.FileBrowser, serviceProvider)
        {
            _log                = log;
            _messageService     = messageService;
            _optionsService     = optionsService;
            _solutionProcessor  = solutionProcessor;
            _fileTypeResolver   = fileTypeResolver;
            _searchMatchService = searchMatchService;
            _shellHelperService = shellHelperService;
            _shellImageService  = shellImageService;
            _utilsService       = utilsService;
            _featureFactory     = featureFactory;

            // Source files must be setup in constructor or view won't show any binding data
            _sourceFiles = new ObservableCollection <FileModel>();
            _files       = new CollectionViewSource {
                Source = _sourceFiles
            };                                                                       // must be ObservableCollection

            this.ShowFilesCommand                 = new RelayCommand(_messageService, OnShowAllFiles);
            this.OpenCodeBrowserAllCommand        = new RelayCommand(_messageService, OnOpenCodeBrowserAll, OnCanOpenCodeBrowser);
            this.OpenCodeBrowserClassesCommand    = new RelayCommand(_messageService, OnOpenCodeBrowserClasses, OnCanOpenCodeBrowser);
            this.OpenCodeBrowserMethodsCommand    = new RelayCommand(_messageService, OnOpenCodeBrowserMethods, OnCanOpenCodeBrowser);
            this.OpenCodeBrowserPropertiesCommand = new RelayCommand(_messageService, OnOpenCodeBrowserProperties, OnCanOpenCodeBrowser);
            this.OpenFilesCommand                 = new RelayCommand(_messageService, OnOpenFiles, OnCanOpenFiles);
        }
Ejemplo n.º 3
0
        private FeatureRegistry(IDictionary <Guid, IFeatureFactory> featureFactoriesById, IDictionary <Type, IFeatureFactory> featureFactoriesByType, IFeatureFactory defaultFeatureFactory)
        {
            _featureFactoriesById   = featureFactoriesById;
            _featureFactoriesByType = featureFactoriesByType;

            Default = defaultFeatureFactory;
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Construct a Decision Vine using the LSearch methodology.
        /// </summary>
        /// <param name="data">The data to use in training the vine</param>
        /// <param name="factory">The feature factory to use when creating decision stumps</param>
        /// <param name="numFeatures">The number of potential features to try</param>
        /// <param name="numThresholds">The number of thresholds to try per feature</param>
        /// <param name="maxChildren">The maximum allowed number of children</param>
        /// <param name="maximumDepth">The maximum depth of the tree</param>
        /// <param name="maxIterations">The number of optimization iterations to perform per level</param>
        /// <param name="numLabels">The number of labels found in the data</param>
        /// <returns>The Decision Vine</returns>
        public static DecisionVine <T, D> ConstructUsingLSearch(List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int maxChildren, int maximumDepth, int maxIterations, int numLabels)
        {
            UpdateManager.WriteLine("Training Decision Vine with {0} data points...", data.Count);
            DecisionVineNode <T, D> root = new DecisionVineNode <T, D>();

            root.Data         = data;
            root.NodeType     = NodeType.Branch;
            root.Distribution = data.ComputeDistribution <T, D>(numLabels);
            DecisionVineNode <T, D>[][] levels = new DecisionVineNode <T, D> [maximumDepth][];
            levels[0] = new DecisionVineNode <T, D>[] { root };
            for (int i = 1; i < maximumDepth; i++)
            {
                int numChildren   = Math.Min(1 << i, maxChildren);
                int numIterations = numChildren < maxChildren ? 0 : maxIterations;
                UpdateManager.WriteLine("Training level {0} with {1} children and {2} optimization iterations...", i, numChildren, numIterations);
                levels[i] = computeLSearchLevel(levels[i - 1], factory, numChildren, numFeatures, numLabels, numThresholds, numIterations);
                UpdateManager.WriteLine("Level {0} complete with entropy {1}", i, computeEntropy(levels[i]));
                UpdateManager.WriteLine("Data distribution: [{0}]", string.Join(",", levels[i].Select(o => o.Data.Count)));
            }
            foreach (var level in levels[maximumDepth - 1])
            {
                level.Distribution = level.Distribution.Normalize();
            }

            UpdateManager.WriteLine("Complete.");

            return(new DecisionVine <T, D>(levels));
        }
Ejemplo n.º 5
0
        static public Forest <F, HistogramAggregator> Train <F>(
            DataPointCollection trainingData,
            IFeatureFactory <F> featureFactory,
            TrainingParameters TrainingParameters) where F : IFeatureResponse
        {
            if (trainingData.Dimensions != 2)
            {
                throw new Exception("Training data points must be 2D.");
            }
            if (trainingData.HasLabels == false)
            {
                throw new Exception("Training data points must be labelled.");
            }
            if (trainingData.HasTargetValues == true)
            {
                throw new Exception("Training data points should not have target values.");
            }

            Console.WriteLine("Running training...");

            Random random = new Random();
            ITrainingContext <F, HistogramAggregator> classificationContext =
                new ClassificationTrainingContext <F>(trainingData.CountClasses(), featureFactory, random);

            var forest = ForestTrainer <F, HistogramAggregator> .TrainForest(
                random,
                TrainingParameters,
                classificationContext,
                trainingData);

            return(forest);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Trains a decision forest from <paramref name="splits"/> based on the provided parameters using the breadth first algorithm.
        /// </summary>
        /// <param name="numTrees">Number of trees in the forest</param>
        /// <param name="splits">Data splits to use when training the tree.</param>
        /// <param name="factory">The feature factory</param>
        /// <param name="numFeatures">The number of features to try for each node</param>
        /// <param name="numThresholds">The number of thresholds to try for each node</param>
        /// <param name="labelNames">The names for the labels</param>
        /// <param name="labelWeights">An array of weights for each label</param>
        /// <param name="threshold">The threshold to use to determine a "good" feature test</param>
        /// <returns>The trained forest</returns>
        public static DecisionForest <T, D> ComputeBreadthFirst(
            int numTrees,
            List <T>[] splits,
            IFeatureFactory <T, D> factory,
            int numFeatures,
            int numThresholds,
            string[] labelNames,
            float[] labelWeights,
            float threshold
            )
        {
            int numLabels = labelNames.Length;

            DecisionTree <T, D>[] trees = new DecisionTree <T, D> [numTrees];
            int count = 0;

            for (byte i = 0; i < numTrees; i++)
            {
                UpdateManager.WriteLine(string.Format("Training tree {0} of {1}...", i + 1, numTrees));
                int split = i % splits.Length;
                trees[i] = DecisionTree <T, D> .ComputeBreadthFirst(splits[split], factory, numFeatures, numThresholds, numLabels, labelWeights, threshold);

                count = trees[i].SetTreeLabel(i, count);
                UpdateManager.WriteLine("\ndone");
            }
            UpdateManager.WriteLine("Training complete");

            return(new DecisionForest <T, D>(trees, labelNames));
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Creates an instance of this class, filled with the given <paramref name="features"/>.
 /// </summary>
 /// <param name="features">The features to add to the collection</param>
 public FeatureCollection(IEnumerable <IFeature <T> > features)
 {
     Name     = "FC" + Guid.NewGuid();
     _factory = (IFeatureFactory <T>)features.First().Factory;
     foreach (var feature in features)
     {
         Add(feature);
     }
 }
Ejemplo n.º 8
0
 /// <summary>
 /// Computes the tree from the provided data.
 /// </summary>
 /// <param name="data">The data to use when computing the tree</param>
 /// <param name="factory">The factory which generates the random features</param>
 /// <param name="numFeatures">The number of features to try at each level</param>
 /// <param name="numThresholds">Number of test thresholds to try</param>
 /// <param name="min_rd">The minimum relative density of a node (as a stopping condition)</param>
 /// <param name="min_y">The minimum number of points in a node as a percentage of total data points (used as a stopping condition)</param>
 /// <param name="maxDepth">The maximum depth of the tree</param>
 /// <returns></returns>
 public static RandomClusterTree <T> Compute(
     List <T> data,
     IFeatureFactory <T, float[]> factory,
     int numFeatures,
     int numThresholds,
     float min_rd,
     float min_y,
     byte maxDepth)
 {
     return(new RandomClusterTree <T>(compute(data, data.Count, factory, numFeatures, numThresholds, min_rd, (int)(min_y * data.Count), 0, maxDepth)));
 }
Ejemplo n.º 9
0
        /// <summary>
        /// Constructs a CLTree from the provided data.
        /// </summary>
        /// <param name="data">The data to use in constructing the tree</param>
        /// <param name="factory">The factory used to create features</param>
        /// <param name="numFeatures">The number of features to use</param>
        /// <returns>A CLTree</returns>
        public static CLTree <T> Compute(List <T> data, IFeatureFactory <T, float[]> factory, int numFeatures)
        {
            Hyperrectangle <float> bounds = new Hyperrectangle <float>(numFeatures, float.MaxValue, float.MinValue);

            fillFeatureValues(factory, numFeatures, data);
            for (int i = 0; i < numFeatures; i++)
            {
                bounds.MinimumBound[i] = _featureValues[i].Min();
                bounds.MaximumBound[i] = _featureValues[i].Max();
            }
            return(new CLTree <T>(split(Enumerable.Range(0, data.Count).ToList(), data.Count, bounds), _buildFeatures));
        }
Ejemplo n.º 10
0
 private static void fillFeatureValues(IFeatureFactory <T, float[]> factory, int numFeatures, List <T> data)
 {
     _numFeatures   = numFeatures;
     _buildFeatures = new IFeature <T, float[]> [numFeatures];
     _featureValues = new float[numFeatures][];
     for (int i = 0; i < numFeatures; i++)
     {
         IFeature <T, float[]> feature = factory.Create();
         _featureValues[i] = data.Select(o => feature.Compute(o)).ToArray();
         _buildFeatures[i] = feature;
     }
 }
Ejemplo n.º 11
0
        /// <summary>
        /// Constructs a new decision tree using the breadth-first method.  This method will attempt to split each leaf node in the tree with each step, and will stop when
        /// <see cref="F:MaximumDepth" /> is reached or it is unable to split any leaf nodes.  If no nodes split in a step, it will try again <see cref="F:NumberOfTries" /> times, and then
        /// stop.  A node will only be split if the resulting entropy increase is above <paramref name="threshold"/>.
        /// </summary>
        /// <param name="data">The data to use when constructing the tree</param>
        /// <param name="factory">The feature factory to use for producing sample feature test for each node</param>
        /// <param name="numFeatures">The number of sample feature tests to try</param>
        /// <param name="numThresholds">The number of test thresholds to try with each test</param>
        /// <param name="numLabels">The number of possible labels for a point</param>
        /// <param name="labelWeights">The weights for each label</param>
        /// <param name="threshold">Threshold used to determine a good feature test</param>
        /// <returns>A new decision tree</returns>
        public static DecisionTree <T, D> ComputeBreadthFirst
        (
            List <T> data,
            IFeatureFactory <T, D> factory,
            int numFeatures,
            int numThresholds,
            int numLabels,
            float[] labelWeights,
            float threshold
        )
        {
            _isBuilding = true;
            DecisionTreeNode <T, D> root = computeBreadthFirst(threshold, data, factory, numFeatures, numThresholds, numLabels, labelWeights);

            _isBuilding = false;
            return(new DecisionTree <T, D>(root, labelWeights, numLabels));
        }
Ejemplo n.º 12
0
        public static FeatureRegistry WithDefault(IFeatureFactory featureFactory)
        {
            if (featureFactory == null)
            {
                throw new ArgumentNullException(nameof(featureFactory));
            }

            var featureFactoriesById = new Dictionary <Guid, IFeatureFactory>
            {
                { featureFactory.FeatureId, featureFactory }
            };
            var featureFactoriesByType = new Dictionary <Type, IFeatureFactory>
            {
                { featureFactory.GetType(), featureFactory }
            };

            return(new FeatureRegistry(featureFactoriesById, featureFactoriesByType, featureFactory));
        }
Ejemplo n.º 13
0
        private void GenerateFeatures()
        {
            AllFeatures       = new List <IFeature>();
            _currFeatureCount = 0;
            // start top left
            FeaturePosition position    = new FeaturePosition(1, 1, Direction.None);
            Vector3Int      size        = RoomSize.GetRandomValue();
            IFeature        tempFeature = null;

            StartPos = position + new Vector3Int(size.x / 2, size.y / 2, 0);

            _factories[0].TryCreateFeature(position, size.ToVector2Int(), Tilemap, ref tempFeature);

            _featureQueue.Enqueue(tempFeature);
            AllFeatures.Add(tempFeature);

            _currFeatureCount++;

            while (_featureQueue.Count > 0 && _currFeatureCount < FeatureCount)
            {
                var lastFeature = _featureQueue.Dequeue();

                position = lastFeature.GetNewFeaturePosition();
                size     = RoomSize.GetRandomValue();
                Debug.Log($"feature at {position.ToString()} of size {size}");

                IFeatureFactory factory    = _factories[Random.Range(0, _factories.Count)];
                IFeature        newFeature = null;
                if (factory.TryCreateFeature(position, size.ToVector2Int(), Tilemap, ref newFeature))
                {
                    lastFeature.AddExit(position);
                    AllFeatures.Add(newFeature);


                    _featureQueue.Enqueue(newFeature);
                    _currFeatureCount++;
                }

                if (lastFeature.CanMakeNewFeature())
                {
                    _featureQueue.Enqueue(lastFeature);
                }
            }
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Trains a decision forest from <paramref name="splits"/> based on the provided parameters using the depth first algorithm.
        /// </summary>
        /// <param name="numTrees">Number of trees in the forest</param>
        /// <param name="splits">Data splits to use when training the tree.</param>
        /// <param name="factory">The feature factory</param>
        /// <param name="numFeatures">The number of features to try for each node</param>
        /// <param name="numThresholds">The number of thresholds to try for each node</param>
        /// <param name="labelNames">The names for the labels</param>
        /// <param name="labelWeights">An array of weights for each label</param>
        /// <returns>The trained forest</returns>
        public static DecisionForest <T, D> ComputeDepthFirst(
            int numTrees,
            List <T>[] splits,
            IFeatureFactory <T, D> factory,
            int numFeatures,
            int numThresholds,
            string[] labelNames,
            float[] labelWeights
            )
        {
            int numLabels = labelNames.Length;

            DecisionTree <T, D>[] trees = new DecisionTree <T, D> [numTrees];
            int count   = 0;
            var indices = Enumerable.Range(0, numTrees).Select(o => (byte)o);

            if (splits[0][0] is IComparable <T> )
            {
                foreach (var split in splits)
                {
                    split.Sort();
                }
            }
            foreach (var i in indices)
            {
                int split = i % splits.Length;
                UpdateManager.WriteLine(string.Format("Training tree {0} of {1}...", i + 1, numTrees));
                trees[i] = DecisionTree <T, D> .ComputeDepthFirst(splits[split], factory, numFeatures, numThresholds, numLabels, labelWeights);

                trees[i].LabelCount = labelNames.Length;
                count = trees[i].SetTreeLabel(i, count);
                UpdateManager.WriteLine("\ndone");
            }
            ;
            UpdateManager.WriteLine("Training complete");

            return(new DecisionForest <T, D>(trees, labelNames));
        }
Ejemplo n.º 15
0
        private static void findSplit(DecisionVineNode <T, D> node, IFeatureFactory <T, D> factory, float[] leftDistribution, float[] rightDistribution, int numFeatures, int numLabels, int numThresholds)
        {
            int dataCount = node.Data.Count;

            using (ThreadLocal <DeciderState> results = new ThreadLocal <DeciderState>(() => new DeciderState(factory), true))
            {
                Parallel.For(0, numFeatures, i =>
                {
                    results.Value.Current.LoadData(node.Data);
                    float energy = results.Value.Current.ChooseThreshold(numThresholds, numLabels, leftDistribution, rightDistribution);
                    if (energy < results.Value.BestEnergy)
                    {
                        results.Value.Best       = results.Value.Current;
                        results.Value.BestEnergy = energy;
                        results.Value.Current    = new Decider <T, D>(factory);
                    }
                });
                node.Decider = results.Values.OrderBy(o => o.BestEnergy).First().Best;

                Decision[] decisions   = node.Decider.Decide(node.Data);
                float[]    leftCounts  = new float[numLabels];
                float[]    rightCounts = new float[numLabels];
                for (int i = 0; i < decisions.Length; i++)
                {
                    if (decisions[i] == Decision.Left)
                    {
                        leftCounts[node.Data[i].Label] += 1;
                    }
                    else
                    {
                        rightCounts[node.Data[i].Label] += 1;
                    }
                }
                node.LeftCounts  = leftCounts;
                node.RightCounts = rightCounts;
            }
        }
Ejemplo n.º 16
0
        private static DecisionTreeNode <T, D> computeBreadthFirst(float threshold, List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int numLabels, float[] labelWeights)
        {
            string id = "DecisionTree.ComputeBreadthFirst";
            Queue <SplitCandidate> candidates = new Queue <SplitCandidate>();
            SplitCandidate         start      = new SplitCandidate(new List <int>(), 1, 0);

            for (int i = 0; i < data.Count; i++)
            {
                start.Indices.Add(i);
            }
            start.Entropy = calculateEntropy(data, start.Indices, labelWeights, numLabels);
            start.Support = calculateSupport(data, start.Indices, labelWeights);
            candidates.Enqueue(start);
            bool changed = true;

            float[] leftDistribution, rightDistribution;
            int     tries     = (int)_numberOfTries;
            float   increment = threshold / tries;
            Dictionary <int, Decider <T, D> > deciders = new Dictionary <int, Decider <T, D> >();

            while (tries > 0)
            {
                if (!changed)
                {
                    threshold -= increment;
                    UpdateManager.WriteLine("Decreasing threshold to {0}", threshold);
                }
                GC.Collect();
                int count = candidates.Count;
                for (int i = 0; i < count; i++)
                {
                    SplitCandidate candidate = candidates.Dequeue();

                    if (candidate.Delta)
                    {
                        candidates.Enqueue(candidate);
                        continue;
                    }
                    if (MaximumDepth > 0 && candidate.Level >= MaximumDepth - 1)
                    {
                        candidates.Enqueue(candidate);
                        continue;
                    }
                    if (candidate.Support < MinimumSupport)
                    {
                        candidates.Enqueue(candidate);
                        continue;
                    }
                    int dataCount = candidate.Indices.Count;
                    if (candidate.Values == null)
                    {
                        candidate.Values = new float[dataCount];
                    }
                    if (candidate.Labels == null)
                    {
                        candidate.Labels = new int[dataCount];
                    }
                    if (candidate.Weights == null)
                    {
                        candidate.Weights = new float[dataCount];
                    }

                    candidates.Enqueue(candidate);
                }
                float bestGain = float.MinValue;
                for (int k = 0; k < numFeatures; k++)
                {
                    UpdateManager.RaiseProgress(k, numFeatures);
                    Decider <T, D> decider = new Decider <T, D>(factory);
                    decider.ApplyFeature(data);
                    for (int i = 0; i < count; i++)
                    {
                        SplitCandidate candidate = candidates.Dequeue();
                        if (MaximumDepth > 0 && candidate.Level >= MaximumDepth - 1)
                        {
                            candidates.Enqueue(candidate);
                            continue;
                        }
                        if (candidate.Delta)
                        {
                            candidates.Enqueue(candidate);
                            continue;
                        }
                        if (candidate.Support < MinimumSupport)
                        {
                            candidates.Enqueue(candidate);
                            continue;
                        }
                        List <int> indices   = candidate.Indices;
                        int        dataCount = indices.Count;
                        for (int j = 0; j < dataCount; j++)
                        {
                            T point = data[indices[j]];
                            candidate.Values[j]  = point.FeatureValue;
                            candidate.Labels[j]  = point.Label;
                            candidate.Weights[j] = point.Weight;
                        }
                        decider.SetData(candidate.Values, candidate.Weights, candidate.Labels);
                        float gain = candidate.Entropy + decider.ChooseThreshold(numThresholds, numLabels, labelWeights, out leftDistribution, out rightDistribution);
                        bestGain = Math.Max(gain, bestGain);
                        if ((gain > threshold || candidate.Level < MinimumDepth) && gain > candidate.EntropyGain)
                        {
                            candidate.EntropyGain = gain;
                            candidate.Decider     = new Decider <T, D>(decider.Feature, decider.Threshold);
                        }
                        candidates.Enqueue(candidate);
                    }
                }
                UpdateManager.WriteLine(id, "\rNodes Added:");
                changed = false;
                for (int i = 0; i < count; i++)
                {
                    SplitCandidate candidate = candidates.Dequeue();
                    if (candidate.Decider == null)
                    {
                        candidates.Enqueue(candidate);
                        continue;
                    }
                    changed = true;
                    List <int> indices   = candidate.Indices;
                    int        dataCount = candidate.Indices.Count;
                    List <T>   points    = new List <T>();
                    for (int j = 0; j < dataCount; j++)
                    {
                        points.Add(data[indices[j]]);
                    }
                    Decision[] decisions = candidate.Decider.Decide(points);
                    List <int> left      = new List <int>();
                    List <int> right     = new List <int>();
                    for (int j = 0; j < dataCount; j++)
                    {
                        if (decisions[j] == Decision.Left)
                        {
                            left.Add(indices[j]);
                        }
                        else
                        {
                            right.Add(indices[j]);
                        }
                    }
                    SplitCandidate leftCandidate  = new SplitCandidate(left, 2 * candidate.Index, candidate.Level + 1);
                    SplitCandidate rightCandidate = new SplitCandidate(right, 2 * candidate.Index + 1, candidate.Level + 1);
                    leftCandidate.Entropy  = calculateEntropy(data, left, labelWeights, numLabels);
                    leftCandidate.Support  = calculateSupport(data, left, labelWeights);
                    leftCandidate.Delta    = calculateDelta(data, left);
                    rightCandidate.Entropy = calculateEntropy(data, right, labelWeights, numLabels);
                    rightCandidate.Support = calculateSupport(data, right, labelWeights);
                    rightCandidate.Delta   = calculateDelta(data, right);
                    UpdateManager.WriteLine(id, "{3:00000}:{0:0.000}|{1:0.000} {2:0.000} {4}", leftCandidate.Support / candidate.Support, rightCandidate.Support / candidate.Support, candidate.EntropyGain, candidate.Index, candidate.Decider);
                    deciders[candidate.Index] = candidate.Decider;
                    candidates.Enqueue(leftCandidate);
                    candidates.Enqueue(rightCandidate);
                }
                if (!changed)
                {
                    UpdateManager.WriteLine("No new nodes added, best entropy gain was {0}", bestGain);
                    tries--;
                }
                if (bestGain == float.MinValue)
                {
                    break;
                }
            }
            Dictionary <int, List <int> > leafIndices = new Dictionary <int, List <int> >();

            while (candidates.Count > 0)
            {
                SplitCandidate candidate = candidates.Dequeue();
                leafIndices[candidate.Index] = candidate.Indices;
            }
            return(buildTree(new DecisionTreeNode <T, D>(), 1, deciders, leafIndices, data, numLabels, labelWeights));
        }
Ejemplo n.º 17
0
 public DeciderState(IFeatureFactory <T, D> factory)
 {
     Current    = new Decider <T, D>(factory);
     BestEnergy = float.MaxValue;
 }
Ejemplo n.º 18
0
        private static DecisionVineNode <T, D>[] computeLSearchLevel(DecisionVineNode <T, D>[] parents, IFeatureFactory <T, D> factory, int numChildren, int numFeatures, int numLabels, int numThresholds, int numIterations)
        {
            DecisionVineNode <T, D>[] children = new DecisionVineNode <T, D> [numChildren];

            // assign children in a greedy manner first
            int index = 0;
            Queue <DecisionVineNode <T, D> > parentQueue = new Queue <DecisionVineNode <T, D> >(parents.OrderByDescending(o => o.Data.Count * o.Distribution.CalculateEntropy()));

            UpdateManager.WriteLine("Initializing children using highest-energy parents...");
            while (index < numChildren)
            {
                var parent = parentQueue.Dequeue();
                findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds);
                children[index]       = parent.Left = new DecisionVineNode <T, D>();
                children[index].Index = index++;
                if (index < numChildren)
                {
                    children[index]       = parent.Right = new DecisionVineNode <T, D>();
                    children[index].Index = index++;
                }
                else
                {
                    parent.Right = findBestChild(parents, children, parent.RightCounts);
                }
            }

            if (parentQueue.Any())
            {
                UpdateManager.WriteLine("Adding in parents without children...");
                // we need to start adding nodes in without increasing the number of children
                while (parentQueue.Any())
                {
                    var parent = parentQueue.Dequeue();

                    if (parent.NodeType == NodeType.Leaf)
                    {
                        continue;
                    }

                    findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds);

                    parent.Left  = findBestChild(parents, children, parent.LeftCounts);
                    parent.Right = findBestChild(parents, children, parent.RightCounts);

                    parent.Left.RemoveDistribution(parent.LeftCounts);
                    parent.Right.RemoveDistribution(parent.RightCounts);
                    findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds);
                    parent.Left.AddDistribution(parent.LeftCounts);
                    parent.Right.AddDistribution(parent.RightCounts);
                }
            }

            UpdateManager.WriteLine("Optimizing...");
            // optimize the nodes on this level
            foreach (int i in UpdateManager.ProgressEnum(Enumerable.Range(0, numIterations)))
            {
                var parent = parents.SelectRandom();

                if (parent.NodeType == NodeType.Leaf)
                {
                    continue;
                }

                parent.Left  = null;
                parent.Left  = findBestChild(parents, children, parent.LeftCounts);
                parent.Right = null;
                parent.Right = findBestChild(parents, children, parent.RightCounts);

                parent.Left.RemoveDistribution(parent.LeftCounts);
                parent.Right.RemoveDistribution(parent.RightCounts);
                findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds);
                parent.Left.AddDistribution(parent.LeftCounts);
                parent.Right.AddDistribution(parent.RightCounts);
            }
            UpdateManager.WriteLine(" Done");

            UpdateManager.WriteLine("Portioning out data to children...");
            // fill the data
            for (int i = 0; i < children.Length; i++)
            {
                children[i].Data = new List <T>();
            }

            for (int i = 0; i < parents.Length; i++)
            {
                var parent = parents[i];

                if (parent.NodeType == NodeType.Leaf)
                {
                    continue;
                }

                Decision[] decisions = parent.Decider.Decide(parent.Data);
                for (int j = 0; j < decisions.Length; j++)
                {
                    if (decisions[j] == Decision.Left)
                    {
                        parent.Left.Data.Add(parent.Data[j]);
                    }
                    else
                    {
                        parent.Right.Data.Add(parent.Data[j]);
                    }
                }
                parent.Data.Clear();
                parent.Data = null;
            }

            for (int i = 0; i < children.Length; i++)
            {
                if (checkDelta(children[i].Data) || children[i].Data.Count < MinimumSupport)
                {
                    children[i].NodeType = NodeType.Leaf;
                }
                else
                {
                    children[i].NodeType = NodeType.Branch;
                }
            }

            return(children);
        }
Ejemplo n.º 19
0
 /// <summary>
 /// Creates an instance of this class just initialized with the <paramref name="factory"/>
 /// </summary>
 /// <param name="factory">The feature factory</param>
 public FeatureCollection(IFeatureFactory <T> factory)
 {
     _factory = factory;
 }
Ejemplo n.º 20
0
        private static DecisionTreeNode <T, D> computeDepthFirst(DecisionTreeNode <T, D> node, List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int numLabels, float[] labelWeights, int depth)
        {
            GC.Collect();
            if (data.Count == 0)
            {
                UpdateManager.WriteLine("No data at depth {0}", depth);
                return(null);
            }
            if (data[0] is IComparable <T> )
            {
                data.Sort();
            }
            if (checkDelta(data))
            {
                UpdateManager.WriteLine("Delta function at depth {0}", depth);
                int     label = data[0].Label;
                float[] dist  = new float[numLabels];
                dist[label] = 1;
                return(new DecisionTreeNode <T, D>(dist));
            }
            int            dataCount   = data.Count;
            Decider <T, D> bestDecider = null;
            float          bestScore   = float.MinValue;

            float[] bestLeftDistribution  = null;
            float[] bestRightDistribution = null;
            using (ThreadLocal <DecisionResult> results = new ThreadLocal <DecisionResult>(() => new DecisionResult {
                Score = bestScore
            }, true))
            {
                Parallel.For(0, numFeatures, i =>
                {
                    float[] leftDistribution;
                    float[] rightDistribution;
                    Decider <T, D> decider = new Decider <T, D>(factory);
                    decider.LoadData(data);
                    float score = decider.ChooseThreshold(numThresholds, numLabels, labelWeights, out leftDistribution, out rightDistribution);
                    if (score > results.Value.Score)
                    {
                        results.Value = new DecisionResult {
                            LeftDistribution = leftDistribution, RightDistribution = rightDistribution, Decider = decider, Score = score
                        };
                    }
                });
                foreach (var result in results.Values)
                {
                    if (result.Score > bestScore)
                    {
                        bestLeftDistribution  = result.LeftDistribution;
                        bestRightDistribution = result.RightDistribution;
                        bestDecider           = result.Decider;
                        bestScore             = result.Score;
                    }
                }
            }

            float support = 0;

            if (labelWeights != null)
            {
                foreach (T point in data)
                {
                    support += labelWeights[point.Label];
                }
            }
            else
            {
                support = dataCount;
            }
            if (bestScore == float.MinValue || dataCount < MinimumSupport)
            {
                UpdateManager.WriteLine("Stopping due to lack of data at depth {0}, {1} < {2}", depth, dataCount, MinimumSupport);
                float[] distribution = new float[numLabels];
                for (int i = 0; i < dataCount; i++)
                {
                    distribution[data[i].Label]++;
                }
                if (labelWeights != null)
                {
                    for (int i = 0; i < distribution.Length; i++)
                    {
                        distribution[i] *= labelWeights[i];
                    }
                }
                return(new DecisionTreeNode <T, D>(distribution));
            }
            if (depth == MaximumDepth - 2)
            {
                UpdateManager.WriteLine("Last branch node trained at depth {0}", depth);
                node.Left     = new DecisionTreeNode <T, D>(bestLeftDistribution);
                node.Right    = new DecisionTreeNode <T, D>(bestRightDistribution);
                node.NodeType = NodeType.Branch;
                node.Decider  = bestDecider;
                return(node);
            }
            Decision[] decisions = bestDecider.Decide(data);
            List <T>   leftData  = new List <T>();
            List <T>   rightData = new List <T>();

            for (int i = 0; i < decisions.Length; i++)
            {
                if (decisions[i] == Decision.Left)
                {
                    leftData.Add(data[i]);
                }
                else
                {
                    rightData.Add(data[i]);
                }
            }
            if (leftData.Count == 0 || rightData.Count == 0)
            {
                throw new Exception("Error");
            }
            UpdateManager.WriteLine("Branch node at depth {0} trained.", depth);
            node.Left     = computeDepthFirst(new DecisionTreeNode <T, D>(), leftData, factory, numFeatures, numThresholds, numLabels, labelWeights, depth + 1);
            node.Right    = computeDepthFirst(new DecisionTreeNode <T, D>(), rightData, factory, numFeatures, numThresholds, numLabels, labelWeights, depth + 1);
            node.Decider  = bestDecider;
            node.NodeType = NodeType.Branch;
            return(node);
        }
Ejemplo n.º 21
0
 /// <summary>
 /// Creates a new feature collection based on the provided <paramref name="collection"/>.
 /// </summary>
 /// <param name="collection"></param>
 protected FeatureCollection(FeatureCollection <T> collection)
     : base(collection)
 {
     Name     = collection.Name;
     _factory = (IFeatureFactory <T>)collection.Factory;
 }
Ejemplo n.º 22
0
 public ClassificationTrainingContext(int nClasses, IFeatureFactory <F> featureFactory, Random random)
 {
     nClasses_       = nClasses;
     featureFactory_ = featureFactory;
     random_         = random;
 }
Ejemplo n.º 23
0
 /// <summary>
 /// Adds <paramref name="factory"/> to the list of factories this combination factory will choose from.
 /// </summary>
 /// <param name="factory">Factory to add</param>
 public void AddFactory(IFeatureFactory <T, D> factory)
 {
     _factories.Add(factory);
 }
Ejemplo n.º 24
0
 /// <summary>
 /// For autofac initialization
 /// </summary>
 public S122DataParser(IGeometryBuilderFactory geometryBuilderFactory, IFeatureFactory featureFactory)
 {
     _geometryBuilderFactory = geometryBuilderFactory;
     _featureFactory         = featureFactory;
 }
Ejemplo n.º 25
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="factory">The factory to use to create the feature for this Decider</param>
 public Decider(IFeatureFactory <T, D> factory) : this(factory.Create(), 0)
 {
 }
Ejemplo n.º 26
0
        private static Node compute(List <T> data, int NCount, IFeatureFactory <T, float[]> factory, int numFeatures, int numThresholds, float min_rd, int min_y, int currentDepth, int maxDepth)
        {
            if (currentDepth == maxDepth - 1)
            {
                return new Node {
                           NodeType = NodeType.Leaf
                }
            }
            ;

            int YCount = data.Count;

            if (NCount < YCount)
            {
                NCount = YCount;
            }

            Split best = new Split {
                Score = float.MinValue
            };

            for (int i = 0; i < numFeatures; i++)
            {
                IFeature <T, float[]> feature = factory.Create();
                var featureValues             = from point in data
                                                select feature.Compute(point);

                Split split = findBestSplit(featureValues.OrderBy(o => o).ToArray(), min_rd, min_y, NCount, numThresholds);
                if (split.Score > best.Score)
                {
                    best         = split;
                    best.Feature = feature;
                }
            }
            if (best.Feature == null)
            {
                return new Node {
                           NodeType = NodeType.Leaf
                }
            }
            ;

            Node node = new Node {
                NodeType = NodeType.Branch, Feature = best.Feature, Threshold = best.Threshold
            };
            List <T> left  = new List <T>();
            List <T> right = new List <T>();

            foreach (T point in data)
            {
                if (best.Feature.Compute(point) < best.Threshold)
                {
                    left.Add(point);
                }
                else
                {
                    right.Add(point);
                }
            }

            if (left.Count == 0 || right.Count == 0)
            {
                return new Node {
                           NodeType = NodeType.Leaf
                }
            }
            ;

            UpdateManager.WriteLine("{0}:{1} {2}|{3} {4}", currentDepth, best.Score, left.Count, right.Count, best.Feature);

            node.Left  = compute(left, best.NLeft, factory, numFeatures, numThresholds, min_rd, min_y, currentDepth + 1, maxDepth);
            node.Right = compute(right, NCount - best.NLeft, factory, numFeatures, numThresholds, min_rd, min_y, currentDepth + 1, maxDepth);
            return(node);
        }
    }
}
Ejemplo n.º 27
0
 /// <summary>
 /// Constructs a CLTree from the provided data.
 /// </summary>
 /// <param name="data">The data to use in construction</param>
 /// <param name="factory">A factory to create feature dimensions</param>
 /// <param name="numFeatures">The number of features to use</param>
 /// <param name="bounds">The bounds of the data</param>
 /// <returns>A CLTree</returns>
 public static CLTree <T> Compute(List <T> data, IFeatureFactory <T, float[]> factory, int numFeatures, Hyperrectangle <float> bounds)
 {
     fillFeatureValues(factory, numFeatures, data);
     return(new CLTree <T>(split(Enumerable.Range(0, data.Count).ToList(), data.Count, bounds), _buildFeatures));
 }
Ejemplo n.º 28
0
 /// <summary>
 /// Removes <paramref name="factory"/> from the list of factories this combination factory chooses from.
 /// </summary>
 /// <param name="factory">Factory to remove</param>
 public void RemoveFactory(IFeatureFactory <T, D> factory)
 {
     _factories.Remove(factory);
 }