public ISplittingResult SelectBestSplit( IDataFrame baseData, string dependentFeatureName, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { ISplittingResult bestSplit = null; double bestSplitQuality = float.NegativeInfinity; double initialEntropy = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName); foreach (var attributeToSplit in baseData.ColumnNames.Except(new[] { dependentFeatureName })) { if (baseData.GetColumnType(attributeToSplit).TypeIsNumeric()) { // TODO: add checking for the already used attribtues var bestNumericSplitPointAndQuality = BinaryNumericBestSplitingPointSelector.FindBestSplitPoint( baseData, dependentFeatureName, attributeToSplit, splitQualityChecker, BinaryNumericDataSplitter, initialEntropy); if (bestNumericSplitPointAndQuality.Item2 > bestSplitQuality) { bestSplitQuality = bestNumericSplitPointAndQuality.Item2; bestSplit = bestNumericSplitPointAndQuality.Item1; } } else { var bestSplitForAttribute = EvaluateCategoricalSplit( baseData, dependentFeatureName, attributeToSplit, bestSplitQuality, initialEntropy, splitQualityChecker, alreadyUsedAttributesInfo); if (bestSplitForAttribute.Item3 > bestSplitQuality) { bestSplit = BuildBestSplitObject(bestSplitForAttribute.Item2, bestSplitForAttribute.Item1); bestSplitQuality = bestSplitForAttribute.Item3; } } } return(bestSplit); }
public ISplittingResult SelectBestSplit( IDataFrame baseData, string dependentFeatureName, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { ISplittingResult bestSplit = null; double bestSplitQuality = float.NegativeInfinity; double initialEntropy = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName); foreach (var attributeToSplit in baseData.ColumnNames.Except(new[] { dependentFeatureName })) { if (baseData.GetColumnType(attributeToSplit).TypeIsNumeric()) { // TODO: add checking for the already used attribtues var bestNumericSplitPointAndQuality = BinaryNumericBestSplitingPointSelector.FindBestSplitPoint( baseData, dependentFeatureName, attributeToSplit, splitQualityChecker, BinaryNumericDataSplitter, initialEntropy); if (bestNumericSplitPointAndQuality.Item2 > bestSplitQuality) { bestSplitQuality = bestNumericSplitPointAndQuality.Item2; bestSplit = bestNumericSplitPointAndQuality.Item1; } } else { var bestSplitForAttribute = EvaluateCategoricalSplit( baseData, dependentFeatureName, attributeToSplit, bestSplitQuality, initialEntropy, splitQualityChecker, alreadyUsedAttributesInfo); if (bestSplitForAttribute.Item3 > bestSplitQuality) { bestSplit = BuildBestSplitObject(bestSplitForAttribute.Item2, bestSplitForAttribute.Item1); bestSplitQuality = bestSplitForAttribute.Item3; } } } return bestSplit; }
protected static bool ShouldStopRecusrsiveBuilding(IDataFrame dataFrame, string dependentFeatureName) { return !dataFrame.GetColumnType(dependentFeatureName).IsNumericType() && dataFrame.GetColumnVector<object>(dependentFeatureName).DataItems.Distinct().Count() == 1; }
protected static bool ShouldStopRecusrsiveBuilding(IDataFrame dataFrame, string dependentFeatureName) { return(!dataFrame.GetColumnType(dependentFeatureName).IsNumericType() && dataFrame.GetColumnVector <object>(dependentFeatureName).DataItems.Distinct().Count() == 1); }