private static int HighestGainAttribute(DataBag data) { var tableList = FrequencyTable.GetFrequencyTables(data.dataList); if (tableList == null) { throw new ArgumentNullException(nameof(tableList)); } var attribCount = DataSet.Attributes.Count - 1; var highestGain = new KeyValuePair <int, double>(); var sum = tableList.First().AllRowsLeft; var qualifierCount = tableList[0].QualifierCount; var entropy = Entropy(sum, qualifierCount); for (var i = 0; i < attribCount; i++) { var currGain = Gain(entropy, tableList[i]); if (highestGain.Value < currGain) { highestGain = new KeyValuePair <int, double>(i, currGain); } } return(highestGain.Key); }
public static void ID3(Node n, DataBag data, int level = 1) { if (DataSet.MaxLevel < level) { DataSet.MaxLevel = level; } n.IsLeaf = data.IsAtomic; if (n.IsLeaf) { n.Qualifier = data.HighestQualifierCount(); return; } n.Attribute = HighestGainAttribute(data); for (var attributeValue = 0; attributeValue < DataSet.Attributes[n.Attribute].ValueCount; attributeValue++) { var newNode = new Node(n.Attribute) { OriginEdge = attributeValue }; var newData = new DataBag(data, n.Attribute, attributeValue); n.Paths.Add(newNode); ID3(newNode, newData, level + 1); } }
public DataBag(DataBag oldBag, int attribute, int value) { dataList = oldBag.dataList.Where(instance => instance.Data[attribute] == value).ToList(); AttributesLeft = oldBag.AttributesLeft - 1; }