protected override Tuple<IList<ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit( IDataFrame dataToSplit, string dependentFeatureName, string splittingFeatureName, double bestSplitQualitySoFar, double initialEntropy, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { if (alreadyUsedAttributesInfo.WasAttributeAlreadyUsed(splittingFeatureName)) { return new Tuple<IList<ISplittedData>, ISplittingParams, double>( new List<ISplittedData>(), new SplittingParams(splittingFeatureName, dependentFeatureName), double.NegativeInfinity); } var totalRowsCount = dataToSplit.RowCount; var splitParams = new SplittingParams(splittingFeatureName, dependentFeatureName); var splitData = CategoricalDataSplitter.SplitData(dataToSplit, splitParams); if (splitData.Count == 1) { return new Tuple<IList<ISplittedData>, ISplittingParams, double>( new List<ISplittedData>(), splitParams, double.NegativeInfinity); } var splitQuality = splitQualityChecker.CalculateSplitQuality(initialEntropy, totalRowsCount, splitData, dependentFeatureName); return new Tuple<IList<ISplittedData>, ISplittingParams, double>(splitData, splitParams, splitQuality); }
protected override Tuple <IList <ISplittedData>, ISplittingParams, double> EvaluateCategoricalSplit( IDataFrame dataToSplit, string dependentFeatureName, string splittingFeatureName, double bestSplitQualitySoFar, double initialEntropy, ISplitQualityChecker splitQualityChecker, IAlredyUsedAttributesInfo alreadyUsedAttributesInfo) { if (alreadyUsedAttributesInfo.WasAttributeAlreadyUsed(splittingFeatureName)) { return(new Tuple <IList <ISplittedData>, ISplittingParams, double>( new List <ISplittedData>(), new SplittingParams(splittingFeatureName, dependentFeatureName), double.NegativeInfinity)); } var totalRowsCount = dataToSplit.RowCount; var splitParams = new SplittingParams(splittingFeatureName, dependentFeatureName); var splitData = CategoricalDataSplitter.SplitData(dataToSplit, splitParams); if (splitData.Count == 1) { return(new Tuple <IList <ISplittedData>, ISplittingParams, double>( new List <ISplittedData>(), splitParams, double.NegativeInfinity)); } var splitQuality = splitQualityChecker.CalculateSplitQuality(initialEntropy, totalRowsCount, splitData, dependentFeatureName); return(new Tuple <IList <ISplittedData>, ISplittingParams, double>(splitData, splitParams, splitQuality)); }
public void PerformMultiValueDiscreteDataSplit() { // Given var testData = TestDataBuilder.ReadWeatherDataWithCategoricalAttributes(); var splitParams = new SplittingParams("Outlook", "Play"); var expectedRowCounts = new Dictionary <object, int> { ["Sunny"] = 5, ["Overcast"] = 4, ["Rainy"] = 5 }; // When var splittedData = MultiValueDiscreteDataSplitter.SplitData(testData, splitParams); // Then Assert.AreEqual(expectedRowCounts.Count, splittedData.Count); foreach (var splittedResult in splittedData) { var expectedCount = expectedRowCounts[splittedResult.SplitLink.TestResult]; Assert.AreEqual(expectedCount, splittedResult.SplittedDataFrame.RowCount); Assert.AreEqual(expectedCount, splittedResult.SplitLink.InstancesCount); } }