/// <summary> /// Information gain for an attribute on a set of examples /// </summary> private static double Importance(ICollection<Example> examples, DataAttribute attr) { return Entropy(examples) - Remainder(examples, attr); }
/// <summary> /// Remaining entropy after attribute divides example set /// </summary> private static double Remainder(ICollection<Example> examples, DataAttribute attr) { var subsets = examples.ToLookup(x => x.Attributes[attr.ColNum]); return subsets.Select(x => x.ToList()) .Select(subset => new { prob = (double)subset.Count / examples.Count, entropy = Entropy(subset), }) .Sum(x => x.prob * x.entropy); }