public void SelectBestNumericSplitPoint_CategoricalVariable() { //Given var weatherDataNumeric = TestDataBuilder.ReadWeatherDataWithMixedAttributes(); var initialEntropy = categoricalBinarySplitQualityChecker.GetInitialEntropy(weatherDataNumeric, "Play"); // When var bruteForceBestSplit = binaryNumericBestSplitPointSelector.FindBestSplitPoint( weatherDataNumeric, "Play", "Temperature", categoricalBinarySplitQualityChecker, binaryNumericDataSplitter, initialEntropy); var dynamicProgrammingBestSplit = dynamicProgrammingBestNumericSplitFinder.FindBestSplitPoint( weatherDataNumeric, "Play", "Temperature", categoricalBinarySplitQualityChecker, binaryNumericDataSplitter, initialEntropy); // Then Assert.AreEqual(bruteForceBestSplit.Item1.SplittingFeatureName, dynamicProgrammingBestSplit.Item1.SplittingFeatureName); Assert.AreEqual((bruteForceBestSplit.Item1 as IBinarySplittingResult).SplittingValue, (dynamicProgrammingBestSplit.Item1 as IBinarySplittingResult).SplittingValue); Assert.AreEqual(bruteForceBestSplit.Item2, dynamicProgrammingBestSplit.Item2); }
public ISplittingResult SelectBestSplit( IDataFrame baseData, string dependentFeatureName, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { ISplittingResult bestSplit = null; double bestSplitQuality = float.NegativeInfinity; double initialEntropy = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName); foreach (var attributeToSplit in baseData.ColumnNames.Except(new[] { dependentFeatureName })) { if (baseData.GetColumnType(attributeToSplit).TypeIsNumeric()) { // TODO: add checking for the already used attribtues var bestNumericSplitPointAndQuality = BinaryNumericBestSplitingPointSelector.FindBestSplitPoint( baseData, dependentFeatureName, attributeToSplit, splitQualityChecker, BinaryNumericDataSplitter, initialEntropy); if (bestNumericSplitPointAndQuality.Item2 > bestSplitQuality) { bestSplitQuality = bestNumericSplitPointAndQuality.Item2; bestSplit = bestNumericSplitPointAndQuality.Item1; } } else { var bestSplitForAttribute = EvaluateCategoricalSplit( baseData, dependentFeatureName, attributeToSplit, bestSplitQuality, initialEntropy, splitQualityChecker, alreadyUsedAttributesInfo); if (bestSplitForAttribute.Item3 > bestSplitQuality) { bestSplit = BuildBestSplitObject(bestSplitForAttribute.Item2, bestSplitForAttribute.Item1); bestSplitQuality = bestSplitForAttribute.Item3; } } } return(bestSplit); }
public ISplittingResult SelectBestSplit( IDataFrame baseData, string dependentFeatureName, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { ISplittingResult bestSplit = null; double bestSplitQuality = float.NegativeInfinity; double initialEntropy = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName); foreach (var attributeToSplit in baseData.ColumnNames.Except(new[] { dependentFeatureName })) { if (baseData.GetColumnType(attributeToSplit).TypeIsNumeric()) { // TODO: add checking for the already used attribtues var bestNumericSplitPointAndQuality = BinaryNumericBestSplitingPointSelector.FindBestSplitPoint( baseData, dependentFeatureName, attributeToSplit, splitQualityChecker, BinaryNumericDataSplitter, initialEntropy); if (bestNumericSplitPointAndQuality.Item2 > bestSplitQuality) { bestSplitQuality = bestNumericSplitPointAndQuality.Item2; bestSplit = bestNumericSplitPointAndQuality.Item1; } } else { var bestSplitForAttribute = EvaluateCategoricalSplit( baseData, dependentFeatureName, attributeToSplit, bestSplitQuality, initialEntropy, splitQualityChecker, alreadyUsedAttributesInfo); if (bestSplitForAttribute.Item3 > bestSplitQuality) { bestSplit = BuildBestSplitObject(bestSplitForAttribute.Item2, bestSplitForAttribute.Item1); bestSplitQuality = bestSplitForAttribute.Item3; } } } return bestSplit; }