private static int HighestGainAttribute(DataBag data) { var tableList = FrequencyTable.GetFrequencyTables(data.dataList); if (tableList == null) { throw new ArgumentNullException(nameof(tableList)); } var attribCount = DataSet.Attributes.Count - 1; var highestGain = new KeyValuePair <int, double>(); var sum = tableList.First().AllRowsLeft; var qualifierCount = tableList[0].QualifierCount; var entropy = Entropy(sum, qualifierCount); for (var i = 0; i < attribCount; i++) { var currGain = Gain(entropy, tableList[i]); if (highestGain.Value < currGain) { highestGain = new KeyValuePair <int, double>(i, currGain); } } return(highestGain.Key); }
private static double Gain(double entropy, FrequencyTable table) { var allInstancesLeftCount = table.AllRowsLeft; var attributeValueCount = DataSet.Attributes[table.AttributeIndex].ValueCount; for (var value = 0; value < attributeValueCount; value++) { var qualifierCount = table.ValueQualifierSum(value); var attributesRowAmount = table.AttributeRowCount(value); entropy -= ((double)attributesRowAmount / allInstancesLeftCount) * Entropy(attributesRowAmount, qualifierCount); } return(entropy); }