public AnswerTreeNode(DatumSchema schema, string answer, Datum[] data, int level)
 {
     Level = level;
     if (schema.AnswerValidator(answer))
     {
         /* for (int i = 0; i < Level; i++)
          *  Console.Write(" |");
          * Console.WriteLine(" Answer: {0}", answer); */
         Answer = answer;
         for (int i = 0; i < data.Length; i++)
         {
             if (data[i].Answer != Answer)
             {
                 throw new InvalidOperationException(String.Format(
                                                         "The datum {0} does not have the answer {1}.", data[i], Answer));
             }
         }
         Data = data;
         Working.Printf("Every element in the current data subset (\\{{{0}\\}}) now " +
                        "all share the same answer of \\texttt{{{1}}}. Hence, " +
                        "that is the answer that should be declared given this " +
                        "node of the decision tree has been reached.",
                        String.Join(", ", Data.Select(d => d.ToString())),
                        Answer);
     }
     else
     {
         throw new InvalidOperationException(String.Format(
                                                 "The answer {0} is not valid with the given schema.", answer));
     }
 }
Example #2
0
        private void Compute()
        {
            var tableAttributes = Schema.Attributes.Where(a => !a.IsQueryable).Concat(DecidableAttributes);

            Working.Print("Current data subset:");
            Working.Print("");
            Working.Printf(@"\begin{{tabular}}{{{0}l}}", "l ".Repeat(tableAttributes.Count()));
            Working.Printf(@"  {0}{1} \\",
                           String.Join(" & ", tableAttributes
                                       .Select(a => String.Format(@"\textbf{{{0}}}", a.Name))),
                           @" & \textbf{Answer}");
            foreach (var datum in Data)
            {
                Working.Printf(@"  {0} & \textbf{{{1}}} \\",
                               String.Join(" & ", tableAttributes
                                           .Select(a => datum[a])),
                               datum.Answer);
            }
            Working.Print(@"\end{tabular}");
            Working.Print("");

            var entropyData = Data
                              .GroupBy(d => d.Answer)
                              .Select(g => new { Answer = g.Key, Count = (double)g.Count() });

            Working.Printf("Entropy calculation: {0}.",
                           String.Join(", ",
                                       entropyData
                                       .Select(i => String.Format(@"\texttt{{{0}}} occurs $ {1} $ time{2}",
                                                                  i.Answer,
                                                                  i.Count,
                                                                  i.Count == 1 ? "" : "s"))));
            var entropy = Entropy(entropyData
                                  .Select(ans => ans.Count));

            Working.Printf("$$ {0}={1:0.######} $$",
                           EntropyWorking(entropyData.Select(e => (int)e.Count), (int)entropyData.Sum(d => d.Count)),
                           entropy);

            Dictionary <Attribute, double> attributeGains = new Dictionary <Attribute, double>();

            foreach (Attribute attribute in DecidableAttributes)
            {
                double remainder    = Remainder(Data, attribute),
                               gain = entropy - remainder;
                var remainderData   = Data
                                      .GroupBy(d => d[attribute])
                                      .Select(g => new { Value = g.Key, Count = g.Count() });
                Working.Printf(@"Remainder calculation for \texttt{{{1}}} as follows. " +
                               @"Number of occurrences for each value of \texttt{{{1}}}: {0}.",
                               String.Join(", ",
                                           remainderData
                                           .Select(i => String.Format(@"\texttt{{{0}}} occurs $ {1} $ time{2}",
                                                                      i.Value,
                                                                      i.Count,
                                                                      i.Count == 1 ? "" : "s"))),
                               attribute.Name);
                Working.Printf("$$ Remainder({2})={0}={1:0.######} $$",
                               String.Join("+", remainderData
                                           .Select(g => String.Format(@"\frac{{{0}}}{{{1}}}\left({2}\right)", g.Count, Data.Count(),
                                                                      EntropyWorking(
                                                                          Data
                                                                          .Where(d => d[attribute] == g.Value)
                                                                          .GroupBy(d => d.Answer)
                                                                          .Select(g2 => g2.Count()), g.Count)))),
                               remainder,
                               attribute.Name);
                Working.Printf("Hence, $ Gain({0}) = H - Remainder({0}) = {1:0.######} $.",
                               attribute.Name,
                               gain);
                Working.Print("");
                attributeGains.Add(attribute, gain);
            }

            var questionAttributeGain = attributeGains
                                        .OrderByDescending(kvp => kvp.Value)
                                        .First();

            Working.Printf(@"The information gain from \texttt{{{0}}} is the largest, at $ {1:0.######} $ bits - " +
                           @"therefore, this attribute should form the next decision.",
                           questionAttributeGain.Key,
                           questionAttributeGain.Value);
            QuestionAttribute = questionAttributeGain.Key;

            Children = new Dictionary <string, ITreeNode>();
            var byBest = Data
                         .GroupBy(d => d[QuestionAttribute]);

            Answers = byBest
                      .Select(g => g.Key)
                      .ToArray();

            foreach (var group in byBest)
            {
                /* for (int i = 0; i < Level; i++)
                 *  Console.Write(" |");
                 * Console.WriteLine(" If {0} = {1}:", QuestionAttribute, group.Key); */
                Working.Print("");
                Working.Printf(@"Assume \texttt{{{0}}} was chosen for the attribute \texttt{{{1}}}.",
                               group.Key,
                               QuestionAttribute);
                Children.Add(group.Key,
                             group.Count() == 1 || group.AllEqual(v => v.Answer) ?
                             (ITreeNode)(new AnswerTreeNode(Schema, group.First().Answer, group, Level + 1)) :
                             (ITreeNode)(new QuestionTreeNode(Schema, group,
                                                              DecidableAttributes.Where(a => a != QuestionAttribute).ToArray(),
                                                              Level + 1,
                                                              KnownValues.Add(QuestionAttribute, group.Key))));
            }
            Working.Print("");
            Working.Printf(@"This accounts for every possibility of the attribute \texttt{{{0}}} " +
                           "at this level of the decision tree.",
                           QuestionAttribute);
        }