Esempio n. 1
0
 public bool EvaluateThis(double currentGain, ISplitIterator splitIterator, int level)
 {
     if (list.Count < _whichBetterToFind || currentGain > minStoredValue)
     {
         IChildSelector currentChildSelector = splitIterator.CreateCurrentChildSelector();
         if (CanAcceptChildSelector(currentChildSelector, level))
         {
             list.Add(Tuple.Create(currentGain, currentChildSelector,
                                   splitIterator.CurrentDistribution.CloneArray()));
             list.Sort(comparer);
             if (list.Count > _whichBetterToFind)
             {
                 list.RemoveAt(_whichBetterToFind);
             }
             int index = Math.Min(_whichBetterToFind - 1, list.Count - 1);
             minStoredValue = list[index].Item1;
             return(true);
         }
     }
     return(false);
 }
Esempio n. 2
0
        private void FillNode(ref Dictionary <IDecisionTreeNode, double> validityIndexByNode, InstanceModel model, ref Dictionary <IDecisionTreeNode, IEnumerable <Tuple <Instance, double> > > instancesByNode,
                              Feature classFeature, ref Dictionary <IDecisionTreeNode, int> levelByNode, List <SelectorContext> currentContext, ref int leafCount)
        {
            IDecisionTreeNode node           = null;
            double            bestIndexValue = Double.MinValue;

            foreach (var currentNode in validityIndexByNode.Keys)
            {
                if (bestIndexValue < validityIndexByNode[currentNode])
                {
                    bestIndexValue = validityIndexByNode[currentNode];
                    node           = currentNode;
                }
            }

            if (node != null)
            {
                int level     = levelByNode[node];
                var instances = instancesByNode[node];

                int whichBetterToFind = 1;
                if (OnSelectingWhichBetterSplit != null)
                {
                    whichBetterToFind = OnSelectingWhichBetterSplit(node, level);
                }
                WiningSplitSelector winingSplitSelector = new WiningSplitSelector(whichBetterToFind)
                {
                    CanAcceptChildSelector = this.CanAcceptChildSelector,
                };
                foreach (var feature in OnSelectingFeaturesToConsider(model.Features, level))
                {
                    if (feature != classFeature)
                    {
                        ISplitIterator splitIterator = SplitIteratorProvider.GetSplitIterator(model, feature, classFeature);
                        if (splitIterator == null)
                        {
                            throw new InvalidOperationException(string.Format("Undefined iterator for feature {0}",
                                                                              feature));
                        }
                        splitIterator.Initialize(feature, instances);
                        while (splitIterator.FindNext())
                        {
                            double currentGain = DistributionEvaluator.Evaluate(node.Data,
                                                                                splitIterator.CurrentDistribution);
                            if (currentGain > MinimalSplitGain || leafCount < ClusterCount)
                            {
                                if (OnSplitEvaluation != null)
                                {
                                    OnSplitEvaluation(node, splitIterator, currentContext);
                                }
                                winingSplitSelector.EvaluateThis(currentGain, splitIterator, level);
                            }
                        }
                    }
                }

                if (winingSplitSelector.IsWinner())
                {
                    IChildSelector maxSelector = winingSplitSelector.WinningSelector;
                    node.ChildSelector = maxSelector;
                    node.Children      = new IDecisionTreeNode[maxSelector.ChildrenCount];
                    var instancesPerChildNode =
                        childrenInstanceCreator.CreateChildrenInstances(instances, maxSelector, double.MinValue);

                    for (int i = 0; i < maxSelector.ChildrenCount; i++)
                    {
                        var childNode = new DecisionTreeNode {
                            Parent = node
                        };
                        node.Children[i] = childNode;
                        childNode.Data   = winingSplitSelector.WinningDistribution[i];
                        SelectorContext context = null;
                        if (OnSplitEvaluation != null)
                        {
                            context = new SelectorContext
                            {
                                Index    = i,
                                Selector = node.ChildSelector,
                            };
                            currentContext.Add(context);
                        }

                        double currentBestValidityIndex = double.MinValue;
                        foreach (var feature in OnSelectingFeaturesToConsider(model.Features, level))
                        {
                            if (feature != classFeature)
                            {
                                ISplitIterator splitIterator = SplitIteratorProvider.GetSplitIterator(model, feature, classFeature);
                                if (splitIterator == null)
                                {
                                    throw new InvalidOperationException(string.Format("Undefined iterator for feature {0}",
                                                                                      feature));
                                }
                                splitIterator.Initialize(feature, instancesPerChildNode[i]);
                                while (splitIterator.FindNext())
                                {
                                    double currentGain = DistributionEvaluator.Evaluate(node.Data,
                                                                                        splitIterator.CurrentDistribution);
                                    if (currentGain > currentBestValidityIndex)
                                    {
                                        if (OnSplitEvaluation != null)
                                        {
                                            OnSplitEvaluation(node, splitIterator, currentContext);
                                        }

                                        currentBestValidityIndex = currentGain;
                                    }
                                }
                            }
                        }

                        if (currentBestValidityIndex > validityIndexByNode[node] || leafCount < ClusterCount)
                        {
                            validityIndexByNode.Add(childNode, currentBestValidityIndex);
                            instancesByNode.Add(childNode, instancesPerChildNode[i]);
                            levelByNode.Add(childNode, level + 1);
                        }

                        if (OnSplitEvaluation != null)
                        {
                            currentContext.Remove(context);
                        }
                    }

                    validityIndexByNode.Remove(node);
                    instancesByNode.Remove(node);
                    levelByNode.Remove(node);
                    leafCount++;

                    if (leafCount < 4 * ClusterCount)
                    {
                        FillNode(ref validityIndexByNode, model, ref instancesByNode, classFeature, ref levelByNode, currentContext, ref leafCount);
                    }
                }
            }
        }