Example #1
0
        private static int HighestGainAttribute(DataBag data)
        {
            var tableList = FrequencyTable.GetFrequencyTables(data.dataList);

            if (tableList == null)
            {
                throw new ArgumentNullException(nameof(tableList));
            }

            var attribCount = DataSet.Attributes.Count - 1;
            var highestGain = new KeyValuePair <int, double>();

            var sum            = tableList.First().AllRowsLeft;
            var qualifierCount = tableList[0].QualifierCount;
            var entropy        = Entropy(sum, qualifierCount);

            for (var i = 0; i < attribCount; i++)
            {
                var currGain = Gain(entropy, tableList[i]);
                if (highestGain.Value < currGain)
                {
                    highestGain = new KeyValuePair <int, double>(i, currGain);
                }
            }
            return(highestGain.Key);
        }
Example #2
0
        private static double Gain(double entropy, FrequencyTable table)
        {
            var allInstancesLeftCount = table.AllRowsLeft;
            var attributeValueCount   = DataSet.Attributes[table.AttributeIndex].ValueCount;

            for (var value = 0; value < attributeValueCount; value++)
            {
                var qualifierCount      = table.ValueQualifierSum(value);
                var attributesRowAmount = table.AttributeRowCount(value);

                entropy -= ((double)attributesRowAmount / allInstancesLeftCount) * Entropy(attributesRowAmount, qualifierCount);
            }

            return(entropy);
        }