public FeatureRegistry Add(IFeatureFactory featureFactory) { if (featureFactory == null) { throw new ArgumentNullException(nameof(featureFactory)); } if (_featureFactoriesById.ContainsKey(featureFactory.FeatureId)) { throw new InvalidOperationException("A factory for the given " + nameof(featureFactory.FeatureId) + " already exists."); } var featureFactoryType = featureFactory.GetType(); if (_featureFactoriesByType.ContainsKey(featureFactoryType)) { throw new InvalidOperationException("A factory for the given type " + featureFactoryType.FullName + " already exists."); } var featureFactories = new Dictionary <Guid, IFeatureFactory>(_featureFactoriesById) { { featureFactory.FeatureId, featureFactory } }; var featureFactoriesByType = new Dictionary <Type, IFeatureFactory>(_featureFactoriesByType) { { featureFactoryType, featureFactory } }; return(new FeatureRegistry(featureFactories, featureFactoriesByType, Default)); }
public FileBrowserViewModel(IServiceProvider serviceProvider, ILog log, IMessageService messageService, IOptionsService optionsService, ISolutionProcessor solutionProcessor, IFileTypeResolver fileTypeResolver, ISearchMatchService searchMatchService, IShellHelperService shellHelperService, IShellImageService shellImageService, IUtilsService utilsService, IFeatureFactory featureFactory) : base(KnownFeature.FileBrowser, serviceProvider) { _log = log; _messageService = messageService; _optionsService = optionsService; _solutionProcessor = solutionProcessor; _fileTypeResolver = fileTypeResolver; _searchMatchService = searchMatchService; _shellHelperService = shellHelperService; _shellImageService = shellImageService; _utilsService = utilsService; _featureFactory = featureFactory; // Source files must be setup in constructor or view won't show any binding data _sourceFiles = new ObservableCollection <FileModel>(); _files = new CollectionViewSource { Source = _sourceFiles }; // must be ObservableCollection this.ShowFilesCommand = new RelayCommand(_messageService, OnShowAllFiles); this.OpenCodeBrowserAllCommand = new RelayCommand(_messageService, OnOpenCodeBrowserAll, OnCanOpenCodeBrowser); this.OpenCodeBrowserClassesCommand = new RelayCommand(_messageService, OnOpenCodeBrowserClasses, OnCanOpenCodeBrowser); this.OpenCodeBrowserMethodsCommand = new RelayCommand(_messageService, OnOpenCodeBrowserMethods, OnCanOpenCodeBrowser); this.OpenCodeBrowserPropertiesCommand = new RelayCommand(_messageService, OnOpenCodeBrowserProperties, OnCanOpenCodeBrowser); this.OpenFilesCommand = new RelayCommand(_messageService, OnOpenFiles, OnCanOpenFiles); }
private FeatureRegistry(IDictionary <Guid, IFeatureFactory> featureFactoriesById, IDictionary <Type, IFeatureFactory> featureFactoriesByType, IFeatureFactory defaultFeatureFactory) { _featureFactoriesById = featureFactoriesById; _featureFactoriesByType = featureFactoriesByType; Default = defaultFeatureFactory; }
/// <summary> /// Construct a Decision Vine using the LSearch methodology. /// </summary> /// <param name="data">The data to use in training the vine</param> /// <param name="factory">The feature factory to use when creating decision stumps</param> /// <param name="numFeatures">The number of potential features to try</param> /// <param name="numThresholds">The number of thresholds to try per feature</param> /// <param name="maxChildren">The maximum allowed number of children</param> /// <param name="maximumDepth">The maximum depth of the tree</param> /// <param name="maxIterations">The number of optimization iterations to perform per level</param> /// <param name="numLabels">The number of labels found in the data</param> /// <returns>The Decision Vine</returns> public static DecisionVine <T, D> ConstructUsingLSearch(List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int maxChildren, int maximumDepth, int maxIterations, int numLabels) { UpdateManager.WriteLine("Training Decision Vine with {0} data points...", data.Count); DecisionVineNode <T, D> root = new DecisionVineNode <T, D>(); root.Data = data; root.NodeType = NodeType.Branch; root.Distribution = data.ComputeDistribution <T, D>(numLabels); DecisionVineNode <T, D>[][] levels = new DecisionVineNode <T, D> [maximumDepth][]; levels[0] = new DecisionVineNode <T, D>[] { root }; for (int i = 1; i < maximumDepth; i++) { int numChildren = Math.Min(1 << i, maxChildren); int numIterations = numChildren < maxChildren ? 0 : maxIterations; UpdateManager.WriteLine("Training level {0} with {1} children and {2} optimization iterations...", i, numChildren, numIterations); levels[i] = computeLSearchLevel(levels[i - 1], factory, numChildren, numFeatures, numLabels, numThresholds, numIterations); UpdateManager.WriteLine("Level {0} complete with entropy {1}", i, computeEntropy(levels[i])); UpdateManager.WriteLine("Data distribution: [{0}]", string.Join(",", levels[i].Select(o => o.Data.Count))); } foreach (var level in levels[maximumDepth - 1]) { level.Distribution = level.Distribution.Normalize(); } UpdateManager.WriteLine("Complete."); return(new DecisionVine <T, D>(levels)); }
static public Forest <F, HistogramAggregator> Train <F>( DataPointCollection trainingData, IFeatureFactory <F> featureFactory, TrainingParameters TrainingParameters) where F : IFeatureResponse { if (trainingData.Dimensions != 2) { throw new Exception("Training data points must be 2D."); } if (trainingData.HasLabels == false) { throw new Exception("Training data points must be labelled."); } if (trainingData.HasTargetValues == true) { throw new Exception("Training data points should not have target values."); } Console.WriteLine("Running training..."); Random random = new Random(); ITrainingContext <F, HistogramAggregator> classificationContext = new ClassificationTrainingContext <F>(trainingData.CountClasses(), featureFactory, random); var forest = ForestTrainer <F, HistogramAggregator> .TrainForest( random, TrainingParameters, classificationContext, trainingData); return(forest); }
/// <summary> /// Trains a decision forest from <paramref name="splits"/> based on the provided parameters using the breadth first algorithm. /// </summary> /// <param name="numTrees">Number of trees in the forest</param> /// <param name="splits">Data splits to use when training the tree.</param> /// <param name="factory">The feature factory</param> /// <param name="numFeatures">The number of features to try for each node</param> /// <param name="numThresholds">The number of thresholds to try for each node</param> /// <param name="labelNames">The names for the labels</param> /// <param name="labelWeights">An array of weights for each label</param> /// <param name="threshold">The threshold to use to determine a "good" feature test</param> /// <returns>The trained forest</returns> public static DecisionForest <T, D> ComputeBreadthFirst( int numTrees, List <T>[] splits, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, string[] labelNames, float[] labelWeights, float threshold ) { int numLabels = labelNames.Length; DecisionTree <T, D>[] trees = new DecisionTree <T, D> [numTrees]; int count = 0; for (byte i = 0; i < numTrees; i++) { UpdateManager.WriteLine(string.Format("Training tree {0} of {1}...", i + 1, numTrees)); int split = i % splits.Length; trees[i] = DecisionTree <T, D> .ComputeBreadthFirst(splits[split], factory, numFeatures, numThresholds, numLabels, labelWeights, threshold); count = trees[i].SetTreeLabel(i, count); UpdateManager.WriteLine("\ndone"); } UpdateManager.WriteLine("Training complete"); return(new DecisionForest <T, D>(trees, labelNames)); }
/// <summary> /// Creates an instance of this class, filled with the given <paramref name="features"/>. /// </summary> /// <param name="features">The features to add to the collection</param> public FeatureCollection(IEnumerable <IFeature <T> > features) { Name = "FC" + Guid.NewGuid(); _factory = (IFeatureFactory <T>)features.First().Factory; foreach (var feature in features) { Add(feature); } }
/// <summary> /// Computes the tree from the provided data. /// </summary> /// <param name="data">The data to use when computing the tree</param> /// <param name="factory">The factory which generates the random features</param> /// <param name="numFeatures">The number of features to try at each level</param> /// <param name="numThresholds">Number of test thresholds to try</param> /// <param name="min_rd">The minimum relative density of a node (as a stopping condition)</param> /// <param name="min_y">The minimum number of points in a node as a percentage of total data points (used as a stopping condition)</param> /// <param name="maxDepth">The maximum depth of the tree</param> /// <returns></returns> public static RandomClusterTree <T> Compute( List <T> data, IFeatureFactory <T, float[]> factory, int numFeatures, int numThresholds, float min_rd, float min_y, byte maxDepth) { return(new RandomClusterTree <T>(compute(data, data.Count, factory, numFeatures, numThresholds, min_rd, (int)(min_y * data.Count), 0, maxDepth))); }
/// <summary> /// Constructs a CLTree from the provided data. /// </summary> /// <param name="data">The data to use in constructing the tree</param> /// <param name="factory">The factory used to create features</param> /// <param name="numFeatures">The number of features to use</param> /// <returns>A CLTree</returns> public static CLTree <T> Compute(List <T> data, IFeatureFactory <T, float[]> factory, int numFeatures) { Hyperrectangle <float> bounds = new Hyperrectangle <float>(numFeatures, float.MaxValue, float.MinValue); fillFeatureValues(factory, numFeatures, data); for (int i = 0; i < numFeatures; i++) { bounds.MinimumBound[i] = _featureValues[i].Min(); bounds.MaximumBound[i] = _featureValues[i].Max(); } return(new CLTree <T>(split(Enumerable.Range(0, data.Count).ToList(), data.Count, bounds), _buildFeatures)); }
private static void fillFeatureValues(IFeatureFactory <T, float[]> factory, int numFeatures, List <T> data) { _numFeatures = numFeatures; _buildFeatures = new IFeature <T, float[]> [numFeatures]; _featureValues = new float[numFeatures][]; for (int i = 0; i < numFeatures; i++) { IFeature <T, float[]> feature = factory.Create(); _featureValues[i] = data.Select(o => feature.Compute(o)).ToArray(); _buildFeatures[i] = feature; } }
/// <summary> /// Constructs a new decision tree using the breadth-first method. This method will attempt to split each leaf node in the tree with each step, and will stop when /// <see cref="F:MaximumDepth" /> is reached or it is unable to split any leaf nodes. If no nodes split in a step, it will try again <see cref="F:NumberOfTries" /> times, and then /// stop. A node will only be split if the resulting entropy increase is above <paramref name="threshold"/>. /// </summary> /// <param name="data">The data to use when constructing the tree</param> /// <param name="factory">The feature factory to use for producing sample feature test for each node</param> /// <param name="numFeatures">The number of sample feature tests to try</param> /// <param name="numThresholds">The number of test thresholds to try with each test</param> /// <param name="numLabels">The number of possible labels for a point</param> /// <param name="labelWeights">The weights for each label</param> /// <param name="threshold">Threshold used to determine a good feature test</param> /// <returns>A new decision tree</returns> public static DecisionTree <T, D> ComputeBreadthFirst ( List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int numLabels, float[] labelWeights, float threshold ) { _isBuilding = true; DecisionTreeNode <T, D> root = computeBreadthFirst(threshold, data, factory, numFeatures, numThresholds, numLabels, labelWeights); _isBuilding = false; return(new DecisionTree <T, D>(root, labelWeights, numLabels)); }
public static FeatureRegistry WithDefault(IFeatureFactory featureFactory) { if (featureFactory == null) { throw new ArgumentNullException(nameof(featureFactory)); } var featureFactoriesById = new Dictionary <Guid, IFeatureFactory> { { featureFactory.FeatureId, featureFactory } }; var featureFactoriesByType = new Dictionary <Type, IFeatureFactory> { { featureFactory.GetType(), featureFactory } }; return(new FeatureRegistry(featureFactoriesById, featureFactoriesByType, featureFactory)); }
private void GenerateFeatures() { AllFeatures = new List <IFeature>(); _currFeatureCount = 0; // start top left FeaturePosition position = new FeaturePosition(1, 1, Direction.None); Vector3Int size = RoomSize.GetRandomValue(); IFeature tempFeature = null; StartPos = position + new Vector3Int(size.x / 2, size.y / 2, 0); _factories[0].TryCreateFeature(position, size.ToVector2Int(), Tilemap, ref tempFeature); _featureQueue.Enqueue(tempFeature); AllFeatures.Add(tempFeature); _currFeatureCount++; while (_featureQueue.Count > 0 && _currFeatureCount < FeatureCount) { var lastFeature = _featureQueue.Dequeue(); position = lastFeature.GetNewFeaturePosition(); size = RoomSize.GetRandomValue(); Debug.Log($"feature at {position.ToString()} of size {size}"); IFeatureFactory factory = _factories[Random.Range(0, _factories.Count)]; IFeature newFeature = null; if (factory.TryCreateFeature(position, size.ToVector2Int(), Tilemap, ref newFeature)) { lastFeature.AddExit(position); AllFeatures.Add(newFeature); _featureQueue.Enqueue(newFeature); _currFeatureCount++; } if (lastFeature.CanMakeNewFeature()) { _featureQueue.Enqueue(lastFeature); } } }
/// <summary> /// Trains a decision forest from <paramref name="splits"/> based on the provided parameters using the depth first algorithm. /// </summary> /// <param name="numTrees">Number of trees in the forest</param> /// <param name="splits">Data splits to use when training the tree.</param> /// <param name="factory">The feature factory</param> /// <param name="numFeatures">The number of features to try for each node</param> /// <param name="numThresholds">The number of thresholds to try for each node</param> /// <param name="labelNames">The names for the labels</param> /// <param name="labelWeights">An array of weights for each label</param> /// <returns>The trained forest</returns> public static DecisionForest <T, D> ComputeDepthFirst( int numTrees, List <T>[] splits, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, string[] labelNames, float[] labelWeights ) { int numLabels = labelNames.Length; DecisionTree <T, D>[] trees = new DecisionTree <T, D> [numTrees]; int count = 0; var indices = Enumerable.Range(0, numTrees).Select(o => (byte)o); if (splits[0][0] is IComparable <T> ) { foreach (var split in splits) { split.Sort(); } } foreach (var i in indices) { int split = i % splits.Length; UpdateManager.WriteLine(string.Format("Training tree {0} of {1}...", i + 1, numTrees)); trees[i] = DecisionTree <T, D> .ComputeDepthFirst(splits[split], factory, numFeatures, numThresholds, numLabels, labelWeights); trees[i].LabelCount = labelNames.Length; count = trees[i].SetTreeLabel(i, count); UpdateManager.WriteLine("\ndone"); } ; UpdateManager.WriteLine("Training complete"); return(new DecisionForest <T, D>(trees, labelNames)); }
private static void findSplit(DecisionVineNode <T, D> node, IFeatureFactory <T, D> factory, float[] leftDistribution, float[] rightDistribution, int numFeatures, int numLabels, int numThresholds) { int dataCount = node.Data.Count; using (ThreadLocal <DeciderState> results = new ThreadLocal <DeciderState>(() => new DeciderState(factory), true)) { Parallel.For(0, numFeatures, i => { results.Value.Current.LoadData(node.Data); float energy = results.Value.Current.ChooseThreshold(numThresholds, numLabels, leftDistribution, rightDistribution); if (energy < results.Value.BestEnergy) { results.Value.Best = results.Value.Current; results.Value.BestEnergy = energy; results.Value.Current = new Decider <T, D>(factory); } }); node.Decider = results.Values.OrderBy(o => o.BestEnergy).First().Best; Decision[] decisions = node.Decider.Decide(node.Data); float[] leftCounts = new float[numLabels]; float[] rightCounts = new float[numLabels]; for (int i = 0; i < decisions.Length; i++) { if (decisions[i] == Decision.Left) { leftCounts[node.Data[i].Label] += 1; } else { rightCounts[node.Data[i].Label] += 1; } } node.LeftCounts = leftCounts; node.RightCounts = rightCounts; } }
private static DecisionTreeNode <T, D> computeBreadthFirst(float threshold, List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int numLabels, float[] labelWeights) { string id = "DecisionTree.ComputeBreadthFirst"; Queue <SplitCandidate> candidates = new Queue <SplitCandidate>(); SplitCandidate start = new SplitCandidate(new List <int>(), 1, 0); for (int i = 0; i < data.Count; i++) { start.Indices.Add(i); } start.Entropy = calculateEntropy(data, start.Indices, labelWeights, numLabels); start.Support = calculateSupport(data, start.Indices, labelWeights); candidates.Enqueue(start); bool changed = true; float[] leftDistribution, rightDistribution; int tries = (int)_numberOfTries; float increment = threshold / tries; Dictionary <int, Decider <T, D> > deciders = new Dictionary <int, Decider <T, D> >(); while (tries > 0) { if (!changed) { threshold -= increment; UpdateManager.WriteLine("Decreasing threshold to {0}", threshold); } GC.Collect(); int count = candidates.Count; for (int i = 0; i < count; i++) { SplitCandidate candidate = candidates.Dequeue(); if (candidate.Delta) { candidates.Enqueue(candidate); continue; } if (MaximumDepth > 0 && candidate.Level >= MaximumDepth - 1) { candidates.Enqueue(candidate); continue; } if (candidate.Support < MinimumSupport) { candidates.Enqueue(candidate); continue; } int dataCount = candidate.Indices.Count; if (candidate.Values == null) { candidate.Values = new float[dataCount]; } if (candidate.Labels == null) { candidate.Labels = new int[dataCount]; } if (candidate.Weights == null) { candidate.Weights = new float[dataCount]; } candidates.Enqueue(candidate); } float bestGain = float.MinValue; for (int k = 0; k < numFeatures; k++) { UpdateManager.RaiseProgress(k, numFeatures); Decider <T, D> decider = new Decider <T, D>(factory); decider.ApplyFeature(data); for (int i = 0; i < count; i++) { SplitCandidate candidate = candidates.Dequeue(); if (MaximumDepth > 0 && candidate.Level >= MaximumDepth - 1) { candidates.Enqueue(candidate); continue; } if (candidate.Delta) { candidates.Enqueue(candidate); continue; } if (candidate.Support < MinimumSupport) { candidates.Enqueue(candidate); continue; } List <int> indices = candidate.Indices; int dataCount = indices.Count; for (int j = 0; j < dataCount; j++) { T point = data[indices[j]]; candidate.Values[j] = point.FeatureValue; candidate.Labels[j] = point.Label; candidate.Weights[j] = point.Weight; } decider.SetData(candidate.Values, candidate.Weights, candidate.Labels); float gain = candidate.Entropy + decider.ChooseThreshold(numThresholds, numLabels, labelWeights, out leftDistribution, out rightDistribution); bestGain = Math.Max(gain, bestGain); if ((gain > threshold || candidate.Level < MinimumDepth) && gain > candidate.EntropyGain) { candidate.EntropyGain = gain; candidate.Decider = new Decider <T, D>(decider.Feature, decider.Threshold); } candidates.Enqueue(candidate); } } UpdateManager.WriteLine(id, "\rNodes Added:"); changed = false; for (int i = 0; i < count; i++) { SplitCandidate candidate = candidates.Dequeue(); if (candidate.Decider == null) { candidates.Enqueue(candidate); continue; } changed = true; List <int> indices = candidate.Indices; int dataCount = candidate.Indices.Count; List <T> points = new List <T>(); for (int j = 0; j < dataCount; j++) { points.Add(data[indices[j]]); } Decision[] decisions = candidate.Decider.Decide(points); List <int> left = new List <int>(); List <int> right = new List <int>(); for (int j = 0; j < dataCount; j++) { if (decisions[j] == Decision.Left) { left.Add(indices[j]); } else { right.Add(indices[j]); } } SplitCandidate leftCandidate = new SplitCandidate(left, 2 * candidate.Index, candidate.Level + 1); SplitCandidate rightCandidate = new SplitCandidate(right, 2 * candidate.Index + 1, candidate.Level + 1); leftCandidate.Entropy = calculateEntropy(data, left, labelWeights, numLabels); leftCandidate.Support = calculateSupport(data, left, labelWeights); leftCandidate.Delta = calculateDelta(data, left); rightCandidate.Entropy = calculateEntropy(data, right, labelWeights, numLabels); rightCandidate.Support = calculateSupport(data, right, labelWeights); rightCandidate.Delta = calculateDelta(data, right); UpdateManager.WriteLine(id, "{3:00000}:{0:0.000}|{1:0.000} {2:0.000} {4}", leftCandidate.Support / candidate.Support, rightCandidate.Support / candidate.Support, candidate.EntropyGain, candidate.Index, candidate.Decider); deciders[candidate.Index] = candidate.Decider; candidates.Enqueue(leftCandidate); candidates.Enqueue(rightCandidate); } if (!changed) { UpdateManager.WriteLine("No new nodes added, best entropy gain was {0}", bestGain); tries--; } if (bestGain == float.MinValue) { break; } } Dictionary <int, List <int> > leafIndices = new Dictionary <int, List <int> >(); while (candidates.Count > 0) { SplitCandidate candidate = candidates.Dequeue(); leafIndices[candidate.Index] = candidate.Indices; } return(buildTree(new DecisionTreeNode <T, D>(), 1, deciders, leafIndices, data, numLabels, labelWeights)); }
public DeciderState(IFeatureFactory <T, D> factory) { Current = new Decider <T, D>(factory); BestEnergy = float.MaxValue; }
private static DecisionVineNode <T, D>[] computeLSearchLevel(DecisionVineNode <T, D>[] parents, IFeatureFactory <T, D> factory, int numChildren, int numFeatures, int numLabels, int numThresholds, int numIterations) { DecisionVineNode <T, D>[] children = new DecisionVineNode <T, D> [numChildren]; // assign children in a greedy manner first int index = 0; Queue <DecisionVineNode <T, D> > parentQueue = new Queue <DecisionVineNode <T, D> >(parents.OrderByDescending(o => o.Data.Count * o.Distribution.CalculateEntropy())); UpdateManager.WriteLine("Initializing children using highest-energy parents..."); while (index < numChildren) { var parent = parentQueue.Dequeue(); findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds); children[index] = parent.Left = new DecisionVineNode <T, D>(); children[index].Index = index++; if (index < numChildren) { children[index] = parent.Right = new DecisionVineNode <T, D>(); children[index].Index = index++; } else { parent.Right = findBestChild(parents, children, parent.RightCounts); } } if (parentQueue.Any()) { UpdateManager.WriteLine("Adding in parents without children..."); // we need to start adding nodes in without increasing the number of children while (parentQueue.Any()) { var parent = parentQueue.Dequeue(); if (parent.NodeType == NodeType.Leaf) { continue; } findSplit(parent, factory, new float[numLabels], new float[numLabels], numFeatures, numLabels, numThresholds); parent.Left = findBestChild(parents, children, parent.LeftCounts); parent.Right = findBestChild(parents, children, parent.RightCounts); parent.Left.RemoveDistribution(parent.LeftCounts); parent.Right.RemoveDistribution(parent.RightCounts); findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds); parent.Left.AddDistribution(parent.LeftCounts); parent.Right.AddDistribution(parent.RightCounts); } } UpdateManager.WriteLine("Optimizing..."); // optimize the nodes on this level foreach (int i in UpdateManager.ProgressEnum(Enumerable.Range(0, numIterations))) { var parent = parents.SelectRandom(); if (parent.NodeType == NodeType.Leaf) { continue; } parent.Left = null; parent.Left = findBestChild(parents, children, parent.LeftCounts); parent.Right = null; parent.Right = findBestChild(parents, children, parent.RightCounts); parent.Left.RemoveDistribution(parent.LeftCounts); parent.Right.RemoveDistribution(parent.RightCounts); findSplit(parent, factory, parent.Left.Distribution, parent.Right.Distribution, numFeatures, numLabels, numThresholds); parent.Left.AddDistribution(parent.LeftCounts); parent.Right.AddDistribution(parent.RightCounts); } UpdateManager.WriteLine(" Done"); UpdateManager.WriteLine("Portioning out data to children..."); // fill the data for (int i = 0; i < children.Length; i++) { children[i].Data = new List <T>(); } for (int i = 0; i < parents.Length; i++) { var parent = parents[i]; if (parent.NodeType == NodeType.Leaf) { continue; } Decision[] decisions = parent.Decider.Decide(parent.Data); for (int j = 0; j < decisions.Length; j++) { if (decisions[j] == Decision.Left) { parent.Left.Data.Add(parent.Data[j]); } else { parent.Right.Data.Add(parent.Data[j]); } } parent.Data.Clear(); parent.Data = null; } for (int i = 0; i < children.Length; i++) { if (checkDelta(children[i].Data) || children[i].Data.Count < MinimumSupport) { children[i].NodeType = NodeType.Leaf; } else { children[i].NodeType = NodeType.Branch; } } return(children); }
/// <summary> /// Creates an instance of this class just initialized with the <paramref name="factory"/> /// </summary> /// <param name="factory">The feature factory</param> public FeatureCollection(IFeatureFactory <T> factory) { _factory = factory; }
private static DecisionTreeNode <T, D> computeDepthFirst(DecisionTreeNode <T, D> node, List <T> data, IFeatureFactory <T, D> factory, int numFeatures, int numThresholds, int numLabels, float[] labelWeights, int depth) { GC.Collect(); if (data.Count == 0) { UpdateManager.WriteLine("No data at depth {0}", depth); return(null); } if (data[0] is IComparable <T> ) { data.Sort(); } if (checkDelta(data)) { UpdateManager.WriteLine("Delta function at depth {0}", depth); int label = data[0].Label; float[] dist = new float[numLabels]; dist[label] = 1; return(new DecisionTreeNode <T, D>(dist)); } int dataCount = data.Count; Decider <T, D> bestDecider = null; float bestScore = float.MinValue; float[] bestLeftDistribution = null; float[] bestRightDistribution = null; using (ThreadLocal <DecisionResult> results = new ThreadLocal <DecisionResult>(() => new DecisionResult { Score = bestScore }, true)) { Parallel.For(0, numFeatures, i => { float[] leftDistribution; float[] rightDistribution; Decider <T, D> decider = new Decider <T, D>(factory); decider.LoadData(data); float score = decider.ChooseThreshold(numThresholds, numLabels, labelWeights, out leftDistribution, out rightDistribution); if (score > results.Value.Score) { results.Value = new DecisionResult { LeftDistribution = leftDistribution, RightDistribution = rightDistribution, Decider = decider, Score = score }; } }); foreach (var result in results.Values) { if (result.Score > bestScore) { bestLeftDistribution = result.LeftDistribution; bestRightDistribution = result.RightDistribution; bestDecider = result.Decider; bestScore = result.Score; } } } float support = 0; if (labelWeights != null) { foreach (T point in data) { support += labelWeights[point.Label]; } } else { support = dataCount; } if (bestScore == float.MinValue || dataCount < MinimumSupport) { UpdateManager.WriteLine("Stopping due to lack of data at depth {0}, {1} < {2}", depth, dataCount, MinimumSupport); float[] distribution = new float[numLabels]; for (int i = 0; i < dataCount; i++) { distribution[data[i].Label]++; } if (labelWeights != null) { for (int i = 0; i < distribution.Length; i++) { distribution[i] *= labelWeights[i]; } } return(new DecisionTreeNode <T, D>(distribution)); } if (depth == MaximumDepth - 2) { UpdateManager.WriteLine("Last branch node trained at depth {0}", depth); node.Left = new DecisionTreeNode <T, D>(bestLeftDistribution); node.Right = new DecisionTreeNode <T, D>(bestRightDistribution); node.NodeType = NodeType.Branch; node.Decider = bestDecider; return(node); } Decision[] decisions = bestDecider.Decide(data); List <T> leftData = new List <T>(); List <T> rightData = new List <T>(); for (int i = 0; i < decisions.Length; i++) { if (decisions[i] == Decision.Left) { leftData.Add(data[i]); } else { rightData.Add(data[i]); } } if (leftData.Count == 0 || rightData.Count == 0) { throw new Exception("Error"); } UpdateManager.WriteLine("Branch node at depth {0} trained.", depth); node.Left = computeDepthFirst(new DecisionTreeNode <T, D>(), leftData, factory, numFeatures, numThresholds, numLabels, labelWeights, depth + 1); node.Right = computeDepthFirst(new DecisionTreeNode <T, D>(), rightData, factory, numFeatures, numThresholds, numLabels, labelWeights, depth + 1); node.Decider = bestDecider; node.NodeType = NodeType.Branch; return(node); }
/// <summary> /// Creates a new feature collection based on the provided <paramref name="collection"/>. /// </summary> /// <param name="collection"></param> protected FeatureCollection(FeatureCollection <T> collection) : base(collection) { Name = collection.Name; _factory = (IFeatureFactory <T>)collection.Factory; }
public ClassificationTrainingContext(int nClasses, IFeatureFactory <F> featureFactory, Random random) { nClasses_ = nClasses; featureFactory_ = featureFactory; random_ = random; }
/// <summary> /// Adds <paramref name="factory"/> to the list of factories this combination factory will choose from. /// </summary> /// <param name="factory">Factory to add</param> public void AddFactory(IFeatureFactory <T, D> factory) { _factories.Add(factory); }
/// <summary> /// For autofac initialization /// </summary> public S122DataParser(IGeometryBuilderFactory geometryBuilderFactory, IFeatureFactory featureFactory) { _geometryBuilderFactory = geometryBuilderFactory; _featureFactory = featureFactory; }
/// <summary> /// Constructor. /// </summary> /// <param name="factory">The factory to use to create the feature for this Decider</param> public Decider(IFeatureFactory <T, D> factory) : this(factory.Create(), 0) { }
private static Node compute(List <T> data, int NCount, IFeatureFactory <T, float[]> factory, int numFeatures, int numThresholds, float min_rd, int min_y, int currentDepth, int maxDepth) { if (currentDepth == maxDepth - 1) { return new Node { NodeType = NodeType.Leaf } } ; int YCount = data.Count; if (NCount < YCount) { NCount = YCount; } Split best = new Split { Score = float.MinValue }; for (int i = 0; i < numFeatures; i++) { IFeature <T, float[]> feature = factory.Create(); var featureValues = from point in data select feature.Compute(point); Split split = findBestSplit(featureValues.OrderBy(o => o).ToArray(), min_rd, min_y, NCount, numThresholds); if (split.Score > best.Score) { best = split; best.Feature = feature; } } if (best.Feature == null) { return new Node { NodeType = NodeType.Leaf } } ; Node node = new Node { NodeType = NodeType.Branch, Feature = best.Feature, Threshold = best.Threshold }; List <T> left = new List <T>(); List <T> right = new List <T>(); foreach (T point in data) { if (best.Feature.Compute(point) < best.Threshold) { left.Add(point); } else { right.Add(point); } } if (left.Count == 0 || right.Count == 0) { return new Node { NodeType = NodeType.Leaf } } ; UpdateManager.WriteLine("{0}:{1} {2}|{3} {4}", currentDepth, best.Score, left.Count, right.Count, best.Feature); node.Left = compute(left, best.NLeft, factory, numFeatures, numThresholds, min_rd, min_y, currentDepth + 1, maxDepth); node.Right = compute(right, NCount - best.NLeft, factory, numFeatures, numThresholds, min_rd, min_y, currentDepth + 1, maxDepth); return(node); } } }
/// <summary> /// Constructs a CLTree from the provided data. /// </summary> /// <param name="data">The data to use in construction</param> /// <param name="factory">A factory to create feature dimensions</param> /// <param name="numFeatures">The number of features to use</param> /// <param name="bounds">The bounds of the data</param> /// <returns>A CLTree</returns> public static CLTree <T> Compute(List <T> data, IFeatureFactory <T, float[]> factory, int numFeatures, Hyperrectangle <float> bounds) { fillFeatureValues(factory, numFeatures, data); return(new CLTree <T>(split(Enumerable.Range(0, data.Count).ToList(), data.Count, bounds), _buildFeatures)); }
/// <summary> /// Removes <paramref name="factory"/> from the list of factories this combination factory chooses from. /// </summary> /// <param name="factory">Factory to remove</param> public void RemoveFactory(IFeatureFactory <T, D> factory) { _factories.Remove(factory); }