Ejemplo n.º 1
0
        protected override Tuple <IList <ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit(
            IDataFrame dataToSplit,
            string dependentFeatureName,
            string splittingFeatureName,
            double bestSplitQualitySoFar,
            double initialEntropy,
            ISplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alreadyUsedAttributesInfo)
        {
            if (alreadyUsedAttributesInfo.WasAttributeAlreadyUsed(splittingFeatureName))
            {
                return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(
                           new List <ISplittedData>(),
                           new SplittingParams(splittingFeatureName, dependentFeatureName),
                           double.NegativeInfinity));
            }
            var totalRowsCount = dataToSplit.RowCount;
            var splitParams    = new SplittingParams(splittingFeatureName, dependentFeatureName);
            var splitData      = CategoricalDataSplitter.SplitData(dataToSplit, splitParams);

            if (splitData.Count == 1)
            {
                return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(
                           new List <ISplittedData>(),
                           splitParams,
                           double.NegativeInfinity));
            }

            var splitQuality = splitQualityChecker.CalculateSplitQuality(initialEntropy, totalRowsCount, splitData, dependentFeatureName);

            return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(splitData, splitParams, splitQuality));
        }
        protected override Tuple <IList <ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit(
            IDataFrame dataToSplit,
            string dependentFeatureName,
            string splittingFeatureName,
            double bestSplitQualitySoFar,
            double initialEntropy,
            ISplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alredyUsedAttributesInfo)
        {
            var    totalRowsCount          = dataToSplit.RowCount;
            var    uniqueFeatureValues     = dataToSplit.GetColumnVector(splittingFeatureName).Distinct();
            double locallyBestSplitQuality = double.NegativeInfinity;
            IBinarySplittingParams localBestSplitParams = null;
            IList <ISplittedData>  locallyBestSplitData = null;

            foreach (var featureValue in uniqueFeatureValues)
            {
                if (!alredyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(splittingFeatureName, featureValue))
                {
                    var binarySplitParams = new BinarySplittingParams(splittingFeatureName, featureValue, dependentFeatureName);
                    var splittedData      = CategoricalDataSplitter.SplitData(dataToSplit, binarySplitParams);
                    if (splittedData.Count == 1)
                    {
                        return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(
                                   new List <ISplittedData>(),
                                   binarySplitParams,
                                   double.NegativeInfinity));
                    }

                    var splitQuality = splitQualityChecker.CalculateSplitQuality(
                        initialEntropy,
                        totalRowsCount,
                        splittedData,
                        dependentFeatureName);
                    if (splitQuality > locallyBestSplitQuality)
                    {
                        locallyBestSplitQuality = splitQuality;
                        locallyBestSplitData    = splittedData;
                        localBestSplitParams    = binarySplitParams;
                    }
                }
            }

            return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(
                       locallyBestSplitData,
                       localBestSplitParams,
                       locallyBestSplitQuality));
        }