Beispiel #1
0
        public void SelectBestNumericSplitPoint_CategoricalVariable()
        {
            //Given
            var weatherDataNumeric = TestDataBuilder.ReadWeatherDataWithMixedAttributes();
            var initialEntropy     = categoricalBinarySplitQualityChecker.GetInitialEntropy(weatherDataNumeric, "Play");

            // When
            var bruteForceBestSplit = binaryNumericBestSplitPointSelector.FindBestSplitPoint(
                weatherDataNumeric,
                "Play",
                "Temperature",
                categoricalBinarySplitQualityChecker,
                binaryNumericDataSplitter,
                initialEntropy);
            var dynamicProgrammingBestSplit = dynamicProgrammingBestNumericSplitFinder.FindBestSplitPoint(
                weatherDataNumeric,
                "Play",
                "Temperature",
                categoricalBinarySplitQualityChecker,
                binaryNumericDataSplitter,
                initialEntropy);

            // Then
            Assert.AreEqual(bruteForceBestSplit.Item1.SplittingFeatureName, dynamicProgrammingBestSplit.Item1.SplittingFeatureName);
            Assert.AreEqual((bruteForceBestSplit.Item1 as IBinarySplittingResult).SplittingValue, (dynamicProgrammingBestSplit.Item1 as IBinarySplittingResult).SplittingValue);
            Assert.AreEqual(bruteForceBestSplit.Item2, dynamicProgrammingBestSplit.Item2);
        }
Beispiel #2
0
        public ISplittingResult SelectBestSplit(
            IDataFrame baseData,
            string dependentFeatureName,
            ISplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alreadyUsedAttributesInfo)
        {
            ISplittingResult bestSplit        = null;
            double           bestSplitQuality = float.NegativeInfinity;
            double           initialEntropy   = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName);

            foreach (var attributeToSplit in baseData.ColumnNames.Except(new[] { dependentFeatureName }))
            {
                if (baseData.GetColumnType(attributeToSplit).TypeIsNumeric())
                {
                    // TODO: add checking for the already used attribtues
                    var bestNumericSplitPointAndQuality =
                        BinaryNumericBestSplitingPointSelector.FindBestSplitPoint(
                            baseData,
                            dependentFeatureName,
                            attributeToSplit,
                            splitQualityChecker,
                            BinaryNumericDataSplitter,
                            initialEntropy);
                    if (bestNumericSplitPointAndQuality.Item2 > bestSplitQuality)
                    {
                        bestSplitQuality = bestNumericSplitPointAndQuality.Item2;
                        bestSplit        = bestNumericSplitPointAndQuality.Item1;
                    }
                }
                else
                {
                    var bestSplitForAttribute = EvaluateCategoricalSplit(
                        baseData,
                        dependentFeatureName,
                        attributeToSplit,
                        bestSplitQuality,
                        initialEntropy,
                        splitQualityChecker,
                        alreadyUsedAttributesInfo);
                    if (bestSplitForAttribute.Item3 > bestSplitQuality)
                    {
                        bestSplit        = BuildBestSplitObject(bestSplitForAttribute.Item2, bestSplitForAttribute.Item1);
                        bestSplitQuality = bestSplitForAttribute.Item3;
                    }
                }
            }
            return(bestSplit);
        }
        public ISplittingResult SelectBestSplit(
            IDataFrame baseData,
            string dependentFeatureName,
            ISplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alreadyUsedAttributesInfo)
        {
            ISplittingResult bestSplit = null;
            double bestSplitQuality = float.NegativeInfinity;
            double initialEntropy = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName);

            foreach (var attributeToSplit in baseData.ColumnNames.Except(new[] { dependentFeatureName }))
            {
                if (baseData.GetColumnType(attributeToSplit).TypeIsNumeric())
                {
                    // TODO: add checking for the already used attribtues
                    var bestNumericSplitPointAndQuality =
                        BinaryNumericBestSplitingPointSelector.FindBestSplitPoint(
                            baseData,
                            dependentFeatureName,
                            attributeToSplit,
                            splitQualityChecker,
                            BinaryNumericDataSplitter,
                            initialEntropy);
                    if (bestNumericSplitPointAndQuality.Item2 > bestSplitQuality)
                    {
                        bestSplitQuality = bestNumericSplitPointAndQuality.Item2;
                        bestSplit = bestNumericSplitPointAndQuality.Item1;
                    }
                }
                else
                {
                    var bestSplitForAttribute = EvaluateCategoricalSplit(
                        baseData,
                        dependentFeatureName,
                        attributeToSplit,
                        bestSplitQuality,
                        initialEntropy,
                        splitQualityChecker,
                        alreadyUsedAttributesInfo);
                    if (bestSplitForAttribute.Item3 > bestSplitQuality)
                    {
                        bestSplit = BuildBestSplitObject(bestSplitForAttribute.Item2, bestSplitForAttribute.Item1);
                        bestSplitQuality = bestSplitForAttribute.Item3;
                    }
                }
            }
            return bestSplit;
        }