/// <summary> /// Searches for the best split using a brute force approach on all unique threshold values. /// The implementation assumes that the features and targets have been sorted /// together using the features as sort criteria /// </summary> /// <param name="impurityCalculator"></param> /// <param name="feature"></param> /// <param name="targets"></param> /// <param name="parentInterval"></param> /// <param name="parentImpurity"></param> /// <returns></returns> public SplitResult FindBestSplit(IImpurityCalculator impurityCalculator, double[] feature, double[] targets, Interval1D parentInterval, double parentImpurity) { var bestSplitIndex = -1; var bestThreshold = 0.0; var bestImpurityImprovement = 0.0; var bestImpurityLeft = 0.0; var bestImpurityRight = 0.0; int prevSplit = parentInterval.FromInclusive; var prevValue = feature[prevSplit]; var prevTarget = targets[prevSplit]; impurityCalculator.UpdateInterval(parentInterval); for (int j = prevSplit + 1; j < parentInterval.ToExclusive; j++) { var currentValue = feature[j]; var currentTarget = targets[j]; if (prevValue != currentValue) { var currentSplit = j; var leftSize = (double)(currentSplit - parentInterval.FromInclusive); var rightSize = (double)(parentInterval.ToExclusive - currentSplit); if (Math.Min(leftSize, rightSize) >= m_minimumSplitSize) { impurityCalculator.UpdateIndex(currentSplit); if ((impurityCalculator.WeightedLeft < m_minimumLeafWeight) || (impurityCalculator.WeightedRight < m_minimumLeafWeight)) { continue; } var improvement = impurityCalculator.ImpurityImprovement(parentImpurity); if (improvement > bestImpurityImprovement) { var childImpurities = impurityCalculator.ChildImpurities(); // could be avoided bestImpurityImprovement = improvement; bestThreshold = (currentValue + prevValue) * 0.5; bestSplitIndex = currentSplit; bestImpurityLeft = childImpurities.Left; bestImpurityRight = childImpurities.Right; } prevSplit = j; } } prevValue = currentValue; prevTarget = currentTarget; } return(new SplitResult(bestSplitIndex, bestThreshold, bestImpurityImprovement, bestImpurityLeft, bestImpurityRight)); }
/// <summary> /// /// </summary> /// <param name="impurityCalculator"></param> /// <param name="feature"></param> /// <param name="targets"></param> /// <param name="parentInterval"></param> /// <param name="parentImpurity"></param> /// <returns></returns> public SplitResult FindBestSplit(IImpurityCalculator impurityCalculator, double[] feature, double[] targets, Interval1D parentInterval, double parentImpurity) { var min = double.MaxValue; var max = double.MinValue; for (int i = parentInterval.FromInclusive; i < parentInterval.ToExclusive; i++) { var value = feature[i]; if (value < min) { min = value; } else if (value > max) { max = value; } } if (min == max) { return(SplitResult.Initial()); } var threshold = RandomThreshold(min, max); if (threshold == max) { threshold = min; } var splitIndex = -1; var impurityImprovement = 0.0; var impurityLeft = 0.0; var impurityRight = 0.0; var currentFeature = double.MinValue; for (int i = parentInterval.FromInclusive; i < parentInterval.ToExclusive; i++) { var leftSize = (double)(i - parentInterval.FromInclusive); var rightSize = (double)(parentInterval.ToExclusive - i); currentFeature = feature[i]; if (currentFeature > threshold && Math.Min(leftSize, rightSize) >= m_minimumSplitSize) { splitIndex = i; impurityCalculator.UpdateInterval(parentInterval); impurityCalculator.UpdateIndex(i); impurityImprovement = impurityCalculator.ImpurityImprovement(parentImpurity); var childImpurities = impurityCalculator.ChildImpurities(); impurityLeft = childImpurities.Left; impurityRight = childImpurities.Right; break; } } return(new SplitResult(splitIndex, threshold, impurityImprovement, impurityLeft, impurityRight)); }