protected override Tuple <IList <ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit(
            IDataFrame dataToSplit,
            string dependentFeatureName,
            string splittingFeatureName,
            double bestSplitQualitySoFar,
            double initialEntropy,
            ISplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alredyUsedAttributesInfo)
        {
            var    totalRowsCount          = dataToSplit.RowCount;
            var    uniqueFeatureValues     = dataToSplit.GetColumnVector(splittingFeatureName).Distinct();
            double locallyBestSplitQuality = double.NegativeInfinity;
            IBinarySplittingParams localBestSplitParams = null;
            IList <ISplittedData>  locallyBestSplitData = null;

            foreach (var featureValue in uniqueFeatureValues)
            {
                if (!alredyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(splittingFeatureName, featureValue))
                {
                    var binarySplitParams = new BinarySplittingParams(splittingFeatureName, featureValue, dependentFeatureName);
                    var splittedData      = CategoricalDataSplitter.SplitData(dataToSplit, binarySplitParams);
                    if (splittedData.Count == 1)
                    {
                        return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(
                                   new List <ISplittedData>(),
                                   binarySplitParams,
                                   double.NegativeInfinity));
                    }

                    var splitQuality = splitQualityChecker.CalculateSplitQuality(
                        initialEntropy,
                        totalRowsCount,
                        splittedData,
                        dependentFeatureName);
                    if (splitQuality > locallyBestSplitQuality)
                    {
                        locallyBestSplitQuality = splitQuality;
                        locallyBestSplitData    = splittedData;
                        localBestSplitParams    = binarySplitParams;
                    }
                }
            }

            return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(
                       locallyBestSplitData,
                       localBestSplitParams,
                       locallyBestSplitQuality));
        }
        protected override Tuple<IList<ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit(
            IDataFrame dataToSplit,
            string dependentFeatureName,
            string splittingFeatureName,
            double bestSplitQualitySoFar,
            double initialEntropy,
            ISplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alredyUsedAttributesInfo)
        {
            var totalRowsCount = dataToSplit.RowCount;
            var uniqueFeatureValues = dataToSplit.GetColumnVector(splittingFeatureName).Distinct();
            double locallyBestSplitQuality = double.NegativeInfinity;
            IBinarySplittingParams localBestSplitParams = null;
            IList<ISplittedData> locallyBestSplitData = null;
            foreach (var featureValue in uniqueFeatureValues)
            {
                if (!alredyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(splittingFeatureName, featureValue))
                {
                    var binarySplitParams = new BinarySplittingParams(splittingFeatureName, featureValue, dependentFeatureName);
                    var splittedData = CategoricalDataSplitter.SplitData(dataToSplit, binarySplitParams);
                    if (splittedData.Count == 1)
                    {
                        return new Tuple<IList<ISplittedData>, ISplittingParams, double>(
                            new List<ISplittedData>(),
                            binarySplitParams,
                            double.NegativeInfinity);
                    }

                    var splitQuality = splitQualityChecker.CalculateSplitQuality(
                        initialEntropy,
                        totalRowsCount,
                        splittedData,
                        dependentFeatureName);
                    if (splitQuality > locallyBestSplitQuality)
                    {
                        locallyBestSplitQuality = splitQuality;
                        locallyBestSplitData = splittedData;
                        localBestSplitParams = binarySplitParams;
                    }
                }
            }

            return new Tuple<IList<ISplittedData>, ISplittingParams, double>(
                locallyBestSplitData,
                localBestSplitParams,
                locallyBestSplitQuality);
        }
        public ISplittingResult SelectBestSplit(
            IDataFrame baseData,
            string dependentFeatureName,
            INumericalSplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alreadyUsedAttributesInfo)
        {
            var bestSplitQuality             = double.NegativeInfinity;
            var initialEntropy               = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName);
            Tuple <string, double> bestSplit = null;

            /*
             * if (baseData.RowCount <= baseData.ColumnsCount)
             * {
             *  return null;
             * }
             */
            var featureColumns = baseData.ColumnNames.Except(new[] { dependentFeatureName });

            foreach (var feature in featureColumns)
            {
                var dataOrderedByFeature =
                    baseData.GetNumericColumnVector(feature)
                    .Select((rowVal, idx) => new Tuple <double, double, int>(rowVal, (double)baseData[idx, dependentFeatureName].FeatureValue, idx))
                    .OrderBy(tpl => tpl.Item1)
                    .ToList();
                var dependentFeatureValuesOrdered = dataOrderedByFeature.Select(elem => elem.Item2).ToList();

                var previousFeatureValue = dataOrderedByFeature.First().Item1;

                for (int i = 0; i < (dataOrderedByFeature.Count - 1); i++)
                {
                    var dataPoint           = dataOrderedByFeature[i];
                    var currentFeatureValue = dataPoint.Item1;
                    if (currentFeatureValue != previousFeatureValue)
                    {
                        var splitPoint = (currentFeatureValue + previousFeatureValue) / 2.0;
                        if (!alreadyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(feature, splitPoint))
                        {
                            var dependentValsBelow = dependentFeatureValuesOrdered.Take(i).ToList();
                            var dependentValsAbove = dependentFeatureValuesOrdered.Skip(i).ToList();
                            var splitQuality       = splitQualityChecker.CalculateSplitQuality(
                                initialEntropy,
                                baseData.RowCount,
                                new[] { dependentValsBelow, dependentValsAbove });
                            if (splitQuality > bestSplitQuality)
                            {
                                bestSplitQuality = splitQuality;
                                bestSplit        = new Tuple <string, double>(feature, splitPoint);
                            }
                        }
                    }

                    previousFeatureValue = currentFeatureValue;
                }
            }

            if (bestSplit == null)
            {
                return(null);
            }

            var splittedData = binaryNumericDataSplitter.SplitData(
                baseData,
                new BinarySplittingParams(bestSplit.Item1, bestSplit.Item2, dependentFeatureName));

            return(new BinarySplittingResult(true, bestSplit.Item1, splittedData, bestSplit.Item2));
        }
        public ISplittingResult SelectBestSplit(
            IDataFrame baseData,
            string dependentFeatureName,
            INumericalSplitQualityChecker splitQualityChecker,
            IAlredyUsedAttributesInfo alreadyUsedAttributesInfo)
        {
            var bestSplitQuality = double.NegativeInfinity;
            var initialEntropy = splitQualityChecker.GetInitialEntropy(baseData, dependentFeatureName);
            Tuple<string, double> bestSplit = null;
            /*
            if (baseData.RowCount <= baseData.ColumnsCount)
            {
                return null;
            }
            */
            var featureColumns = baseData.ColumnNames.Except(new[] { dependentFeatureName });
            foreach (var feature in featureColumns)
            {
                var dataOrderedByFeature =
                    baseData.GetNumericColumnVector(feature)
                        .Select((rowVal, idx) => new Tuple<double, double, int>(rowVal, (double)baseData[idx, dependentFeatureName].FeatureValue, idx))
                        .OrderBy(tpl => tpl.Item1)
                        .ToList();
                var dependentFeatureValuesOrdered = dataOrderedByFeature.Select(elem => elem.Item2).ToList();

                var previousFeatureValue = dataOrderedByFeature.First().Item1;

                for (int i = 0; i < (dataOrderedByFeature.Count -1); i++)
                {
                    var dataPoint = dataOrderedByFeature[i];
                    var currentFeatureValue = dataPoint.Item1;
                    if (currentFeatureValue != previousFeatureValue)
                    {
                        var splitPoint = (currentFeatureValue + previousFeatureValue) / 2.0;
                        if (!alreadyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(feature, splitPoint))
                        {
                            var dependentValsBelow = dependentFeatureValuesOrdered.Take(i).ToList();
                            var dependentValsAbove = dependentFeatureValuesOrdered.Skip(i).ToList();
                            var splitQuality = splitQualityChecker.CalculateSplitQuality(
                                initialEntropy,
                                baseData.RowCount,
                                new[] { dependentValsBelow, dependentValsAbove });
                            if (splitQuality > bestSplitQuality)
                            {
                                bestSplitQuality = splitQuality;
                                bestSplit = new Tuple<string, double>(feature, splitPoint);
                            }
                        }
                    }

                    previousFeatureValue = currentFeatureValue;
                }
            }

            if (bestSplit == null)
            {
                return null;
            }

            var splittedData = binaryNumericDataSplitter.SplitData(
                baseData,
                new BinarySplittingParams(bestSplit.Item1, bestSplit.Item2, dependentFeatureName));

            return new BinarySplittingResult(true, bestSplit.Item1, splittedData, bestSplit.Item2);
        }