private void Compute() { var tableAttributes = Schema.Attributes.Where(a => !a.IsQueryable).Concat(DecidableAttributes); Working.Print("Current data subset:"); Working.Print(""); Working.Printf(@"\begin{{tabular}}{{{0}l}}", "l ".Repeat(tableAttributes.Count())); Working.Printf(@" {0}{1} \\", String.Join(" & ", tableAttributes .Select(a => String.Format(@"\textbf{{{0}}}", a.Name))), @" & \textbf{Answer}"); foreach (var datum in Data) { Working.Printf(@" {0} & \textbf{{{1}}} \\", String.Join(" & ", tableAttributes .Select(a => datum[a])), datum.Answer); } Working.Print(@"\end{tabular}"); Working.Print(""); var entropyData = Data .GroupBy(d => d.Answer) .Select(g => new { Answer = g.Key, Count = (double)g.Count() }); Working.Printf("Entropy calculation: {0}.", String.Join(", ", entropyData .Select(i => String.Format(@"\texttt{{{0}}} occurs $ {1} $ time{2}", i.Answer, i.Count, i.Count == 1 ? "" : "s")))); var entropy = Entropy(entropyData .Select(ans => ans.Count)); Working.Printf("$$ {0}={1:0.######} $$", EntropyWorking(entropyData.Select(e => (int)e.Count), (int)entropyData.Sum(d => d.Count)), entropy); Dictionary <Attribute, double> attributeGains = new Dictionary <Attribute, double>(); foreach (Attribute attribute in DecidableAttributes) { double remainder = Remainder(Data, attribute), gain = entropy - remainder; var remainderData = Data .GroupBy(d => d[attribute]) .Select(g => new { Value = g.Key, Count = g.Count() }); Working.Printf(@"Remainder calculation for \texttt{{{1}}} as follows. " + @"Number of occurrences for each value of \texttt{{{1}}}: {0}.", String.Join(", ", remainderData .Select(i => String.Format(@"\texttt{{{0}}} occurs $ {1} $ time{2}", i.Value, i.Count, i.Count == 1 ? "" : "s"))), attribute.Name); Working.Printf("$$ Remainder({2})={0}={1:0.######} $$", String.Join("+", remainderData .Select(g => String.Format(@"\frac{{{0}}}{{{1}}}\left({2}\right)", g.Count, Data.Count(), EntropyWorking( Data .Where(d => d[attribute] == g.Value) .GroupBy(d => d.Answer) .Select(g2 => g2.Count()), g.Count)))), remainder, attribute.Name); Working.Printf("Hence, $ Gain({0}) = H - Remainder({0}) = {1:0.######} $.", attribute.Name, gain); Working.Print(""); attributeGains.Add(attribute, gain); } var questionAttributeGain = attributeGains .OrderByDescending(kvp => kvp.Value) .First(); Working.Printf(@"The information gain from \texttt{{{0}}} is the largest, at $ {1:0.######} $ bits - " + @"therefore, this attribute should form the next decision.", questionAttributeGain.Key, questionAttributeGain.Value); QuestionAttribute = questionAttributeGain.Key; Children = new Dictionary <string, ITreeNode>(); var byBest = Data .GroupBy(d => d[QuestionAttribute]); Answers = byBest .Select(g => g.Key) .ToArray(); foreach (var group in byBest) { /* for (int i = 0; i < Level; i++) * Console.Write(" |"); * Console.WriteLine(" If {0} = {1}:", QuestionAttribute, group.Key); */ Working.Print(""); Working.Printf(@"Assume \texttt{{{0}}} was chosen for the attribute \texttt{{{1}}}.", group.Key, QuestionAttribute); Children.Add(group.Key, group.Count() == 1 || group.AllEqual(v => v.Answer) ? (ITreeNode)(new AnswerTreeNode(Schema, group.First().Answer, group, Level + 1)) : (ITreeNode)(new QuestionTreeNode(Schema, group, DecidableAttributes.Where(a => a != QuestionAttribute).ToArray(), Level + 1, KnownValues.Add(QuestionAttribute, group.Key)))); } Working.Print(""); Working.Printf(@"This accounts for every possibility of the attribute \texttt{{{0}}} " + "at this level of the decision tree.", QuestionAttribute); }