protected override Tuple <IList <ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit( IDataFrame dataToSplit, string dependentFeatureName, string splittingFeatureName, double bestSplitQualitySoFar, double initialEntropy, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { if (alreadyUsedAttributesInfo.WasAttributeAlreadyUsed(splittingFeatureName)) { return(new Tuple <IList <ISplittedData>, ISplittingParams, double>( new List <ISplittedData>(), new SplittingParams(splittingFeatureName, dependentFeatureName), double.NegativeInfinity)); } var totalRowsCount = dataToSplit.RowCount; var splitParams = new SplittingParams(splittingFeatureName, dependentFeatureName); var splitData = CategoricalDataSplitter.SplitData(dataToSplit, splitParams); if (splitData.Count == 1) { return(new Tuple <IList <ISplittedData>, ISplittingParams, double>( new List <ISplittedData>(), splitParams, double.NegativeInfinity)); } var splitQuality = splitQualityChecker.CalculateSplitQuality(initialEntropy, totalRowsCount, splitData, dependentFeatureName); return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(splitData, splitParams, splitQuality)); }
protected override Tuple<IList<ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit( IDataFrame dataToSplit, string dependentFeatureName, string splittingFeatureName, double bestSplitQualitySoFar, double initialEntropy, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { if (alreadyUsedAttributesInfo.WasAttributeAlreadyUsed(splittingFeatureName)) { return new Tuple<IList<ISplittedData>, ISplittingParams, double>( new List<ISplittedData>(), new SplittingParams(splittingFeatureName, dependentFeatureName), double.NegativeInfinity); } var totalRowsCount = dataToSplit.RowCount; var splitParams = new SplittingParams(splittingFeatureName, dependentFeatureName); var splitData = CategoricalDataSplitter.SplitData(dataToSplit, splitParams); if (splitData.Count == 1) { return new Tuple<IList<ISplittedData>, ISplittingParams, double>( new List<ISplittedData>(), splitParams, double.NegativeInfinity); } var splitQuality = splitQualityChecker.CalculateSplitQuality(initialEntropy, totalRowsCount, splitData, dependentFeatureName); return new Tuple<IList<ISplittedData>, ISplittingParams, double>(splitData, splitParams, splitQuality); }
protected override Tuple <IList <ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit( IDataFrame dataToSplit, string dependentFeatureName, string splittingFeatureName, double bestSplitQualitySoFar, double initialEntropy, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alredyUsedAttributesInfo) { var totalRowsCount = dataToSplit.RowCount; var uniqueFeatureValues = dataToSplit.GetColumnVector(splittingFeatureName).Distinct(); double locallyBestSplitQuality = double.NegativeInfinity; IBinarySplittingParams localBestSplitParams = null; IList <ISplittedData> locallyBestSplitData = null; foreach (var featureValue in uniqueFeatureValues) { if (!alredyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(splittingFeatureName, featureValue)) { var binarySplitParams = new BinarySplittingParams(splittingFeatureName, featureValue, dependentFeatureName); var splittedData = CategoricalDataSplitter.SplitData(dataToSplit, binarySplitParams); if (splittedData.Count == 1) { return(new Tuple <IList <ISplittedData>, ISplittingParams, double>( new List <ISplittedData>(), binarySplitParams, double.NegativeInfinity)); } var splitQuality = splitQualityChecker.CalculateSplitQuality( initialEntropy, totalRowsCount, splittedData, dependentFeatureName); if (splitQuality > locallyBestSplitQuality) { locallyBestSplitQuality = splitQuality; locallyBestSplitData = splittedData; localBestSplitParams = binarySplitParams; } } } return(new Tuple <IList <ISplittedData>, ISplittingParams, double>( locallyBestSplitData, localBestSplitParams, locallyBestSplitQuality)); }
protected override Tuple<IList<ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit( IDataFrame dataToSplit, string dependentFeatureName, string splittingFeatureName, double bestSplitQualitySoFar, double initialEntropy, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alredyUsedAttributesInfo) { var totalRowsCount = dataToSplit.RowCount; var uniqueFeatureValues = dataToSplit.GetColumnVector(splittingFeatureName).Distinct(); double locallyBestSplitQuality = double.NegativeInfinity; IBinarySplittingParams localBestSplitParams = null; IList<ISplittedData> locallyBestSplitData = null; foreach (var featureValue in uniqueFeatureValues) { if (!alredyUsedAttributesInfo.WasAttributeAlreadyUsedWithValue(splittingFeatureName, featureValue)) { var binarySplitParams = new BinarySplittingParams(splittingFeatureName, featureValue, dependentFeatureName); var splittedData = CategoricalDataSplitter.SplitData(dataToSplit, binarySplitParams); if (splittedData.Count == 1) { return new Tuple<IList<ISplittedData>, ISplittingParams, double>( new List<ISplittedData>(), binarySplitParams, double.NegativeInfinity); } var splitQuality = splitQualityChecker.CalculateSplitQuality( initialEntropy, totalRowsCount, splittedData, dependentFeatureName); if (splitQuality > locallyBestSplitQuality) { locallyBestSplitQuality = splitQuality; locallyBestSplitData = splittedData; localBestSplitParams = binarySplitParams; } } } return new Tuple<IList<ISplittedData>, ISplittingParams, double>( locallyBestSplitData, localBestSplitParams, locallyBestSplitQuality); }