public bool EvaluateThis(double currentGain, ISplitIterator splitIterator, int level) { if (list.Count < _whichBetterToFind || currentGain > minStoredValue) { IChildSelector currentChildSelector = splitIterator.CreateCurrentChildSelector(); if (CanAcceptChildSelector(currentChildSelector, level)) { list.Add(Tuple.Create(currentGain, currentChildSelector, splitIterator.CurrentDistribution.CloneArray())); list.Sort(comparer); if (list.Count > _whichBetterToFind) { list.RemoveAt(_whichBetterToFind); } int index = Math.Min(_whichBetterToFind - 1, list.Count - 1); minStoredValue = list[index].Item1; return(true); } } return(false); }
private void FillNode(ref Dictionary <IDecisionTreeNode, double> validityIndexByNode, InstanceModel model, ref Dictionary <IDecisionTreeNode, IEnumerable <Tuple <Instance, double> > > instancesByNode, Feature classFeature, ref Dictionary <IDecisionTreeNode, int> levelByNode, List <SelectorContext> currentContext, ref int leafCount) { IDecisionTreeNode node = null; double bestIndexValue = Double.MinValue; foreach (var currentNode in validityIndexByNode.Keys) { if (bestIndexValue < validityIndexByNode[currentNode]) { bestIndexValue = validityIndexByNode[currentNode]; node = currentNode; } } if (node != null) { int level = levelByNode[node]; var instances = instancesByNode[node]; int whichBetterToFind = 1; if (OnSelectingWhichBetterSplit != null) { whichBetterToFind = OnSelectingWhichBetterSplit(node, level); } WiningSplitSelector winingSplitSelector = new WiningSplitSelector(whichBetterToFind) { CanAcceptChildSelector = this.CanAcceptChildSelector, }; foreach (var feature in OnSelectingFeaturesToConsider(model.Features, level)) { if (feature != classFeature) { ISplitIterator splitIterator = SplitIteratorProvider.GetSplitIterator(model, feature, classFeature); if (splitIterator == null) { throw new InvalidOperationException(string.Format("Undefined iterator for feature {0}", feature)); } splitIterator.Initialize(feature, instances); while (splitIterator.FindNext()) { double currentGain = DistributionEvaluator.Evaluate(node.Data, splitIterator.CurrentDistribution); if (currentGain > MinimalSplitGain || leafCount < ClusterCount) { if (OnSplitEvaluation != null) { OnSplitEvaluation(node, splitIterator, currentContext); } winingSplitSelector.EvaluateThis(currentGain, splitIterator, level); } } } } if (winingSplitSelector.IsWinner()) { IChildSelector maxSelector = winingSplitSelector.WinningSelector; node.ChildSelector = maxSelector; node.Children = new IDecisionTreeNode[maxSelector.ChildrenCount]; var instancesPerChildNode = childrenInstanceCreator.CreateChildrenInstances(instances, maxSelector, double.MinValue); for (int i = 0; i < maxSelector.ChildrenCount; i++) { var childNode = new DecisionTreeNode { Parent = node }; node.Children[i] = childNode; childNode.Data = winingSplitSelector.WinningDistribution[i]; SelectorContext context = null; if (OnSplitEvaluation != null) { context = new SelectorContext { Index = i, Selector = node.ChildSelector, }; currentContext.Add(context); } double currentBestValidityIndex = double.MinValue; foreach (var feature in OnSelectingFeaturesToConsider(model.Features, level)) { if (feature != classFeature) { ISplitIterator splitIterator = SplitIteratorProvider.GetSplitIterator(model, feature, classFeature); if (splitIterator == null) { throw new InvalidOperationException(string.Format("Undefined iterator for feature {0}", feature)); } splitIterator.Initialize(feature, instancesPerChildNode[i]); while (splitIterator.FindNext()) { double currentGain = DistributionEvaluator.Evaluate(node.Data, splitIterator.CurrentDistribution); if (currentGain > currentBestValidityIndex) { if (OnSplitEvaluation != null) { OnSplitEvaluation(node, splitIterator, currentContext); } currentBestValidityIndex = currentGain; } } } } if (currentBestValidityIndex > validityIndexByNode[node] || leafCount < ClusterCount) { validityIndexByNode.Add(childNode, currentBestValidityIndex); instancesByNode.Add(childNode, instancesPerChildNode[i]); levelByNode.Add(childNode, level + 1); } if (OnSplitEvaluation != null) { currentContext.Remove(context); } } validityIndexByNode.Remove(node); instancesByNode.Remove(node); levelByNode.Remove(node); leafCount++; if (leafCount < 4 * ClusterCount) { FillNode(ref validityIndexByNode, model, ref instancesByNode, classFeature, ref levelByNode, currentContext, ref leafCount); } } } }