Esempio n. 1
0
        public static StringBuilder PrintTree(DecisionNode tree, string indent = "")
        {
            StringBuilder str = new StringBuilder();

            // Is this a leaf node?
            if (tree.Results != null)
            {
                foreach (var res in tree.Results)
                {
                    str.Append("{'" + res.Key + "': " + res.Value + "}");
                }
                str.Append("\n");
            }
            else
            {
                // Print the criteria
                str.Append(tree.Column + ":" + tree.Value + "?" + "\n");

                // Print the branches
                str.Append(indent + "T->");
                str.Append(PrintTree(tree.NextTrueNode, indent + "\t"));
                str.Append(indent + "F->");
                str.Append(PrintTree(tree.NextFalseNode, indent + "\t"));
            }
            return(str);
        }
Esempio n. 2
0
 /// <summary>
 /// Creates instance of DecisionNode class
 /// </summary>
 public DecisionNode(int column = -1, string value = null, Dictionary <string, double> results = null,
                     DecisionNode nextTrueNode = null, DecisionNode nextFalseNode = null)
 {
     Column        = column;
     Value         = value;
     Results       = results;
     NextTrueNode  = nextTrueNode;
     NextFalseNode = nextFalseNode;
 }
Esempio n. 3
0
 /// <summary>
 /// Creates instance of DecisionNode class 
 /// </summary>
 public DecisionNode(int column = -1, string value = null, Dictionary<string, double> results = null,
                     DecisionNode nextTrueNode = null, DecisionNode nextFalseNode = null)
 {
     Column = column;
     Value = value;
     Results = results;
     NextTrueNode = nextTrueNode;
     NextFalseNode = nextFalseNode;
 }  
        private void zoo_example()
        {
            string path = @"zoo.txt";

            Headers = new string[] {
                "Hair", "Feathers", "Eggs", "Milk", "Airborne", "Aquatic",
                "Predator", "Toothed", "Backbone", "Breathes", "Venomous", "Fins", "Legs", "Tail",
                "Domestic", "Catsize"
            };
            Result = "Animal type";

            List <string[]> rows  = new List <string[]>();
            List <string[]> dummy = new List <string[]>();

            foreach (var animalData in File.ReadAllLines(path))
            {
                List <string> data = new List <string>();
                string[]      line = animalData.Split(',');
                for (int i = 0; i < line.Length; ++i)
                {
                    if (i == 13 || i == 17 || i == 0)
                    {
                        data.Add(line[i]);
                    }
                    else
                    {
                        data.Add(BoolParser.Parse(line[i]).ToString());
                    }
                }
                dummy.Add(data.ToArray());
                data.RemoveAt(0);
                rows.Add(data.ToArray());
            }
            UpdateDataBaseZoo(dummy);
            tcMain.Clear();
            tree = DecisionTreeTools.BuildTree(rows);
            DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextFalseNode.NextTrueNode, 1.01);
            DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextTrueNode.NextFalseNode, 1.01);
            PrintTree(tree);


            listBoxZoo.Items.Clear();
            listBoxZoo.Items.Add(
                "1 -- (41) aardvark, antelope, bear, boar, buffalo, calf, cavy, cheetah, deer, dolphin, elephant, fruitbat, giraffe, girl, goat, gorilla, hamster, hare, leopard, lion, lynx, mink, mole, mongoose, opossum, oryx, platypus, polecat, pony, porpoise, puma, pussycat, raccoon, reindeer, seal, sealion, squirrel, vampire, vole, wallaby,wolf ");
            listBoxZoo.Items.Add(
                "2 -- (20) chicken, crow, dove, duck, flamingo, gull, hawk, kiwi, lark, ostrich, parakeet, penguin, pheasant, rhea, skimmer, skua, sparrow, swan, vulture, wren ");
            listBoxZoo.Items.Add("3 -- (5) pitviper, seasnake, slowworm, tortoise, tuatara");
            listBoxZoo.Items.Add(
                "4 -- (13) bass, carp, catfish, chub, dogfish, haddock, herring, pike, piranha, seahorse, sole, stingray, tuna ");
            listBoxZoo.Items.Add("5 -- (4) frog, frog, newt, toad ");
            listBoxZoo.Items.Add("6 -- (8) flea, gnat, honeybee, housefly, ladybird, moth, termite, wasp ");
            listBoxZoo.Items.Add(
                "7 -- (10) clam, crab, crayfish, lobster, octopus, scorpion, seawasp, slug, starfish, worm");

            listBoxZoo.Visibility = Visibility.Visible;
        }
        private void PrintTree(DecisionNode tree, TreeNode tnControl = null)
        {
            TreeNode tnSubtreeRoot;

            Label label = new Label();

            label.BorderBrush     = Brushes.White;
            label.BorderThickness = new Thickness(0.5);
            label.Foreground      = Brushes.Orange;


            // Is this a leaf node?
            if (tree.Results != null)
            {
                StringBuilder str = new StringBuilder();
                foreach (var res in tree.Results)
                {
                    str.Append("{" + Result + ": '" + res.Key + "'}");
                }
                label.Content    = str.ToString();
                label.Foreground = Brushes.YellowGreen;
                if (tnControl == null)
                {
                    tnSubtreeRoot = tcMain.AddRoot(label);
                }
                else
                {
                    tnSubtreeRoot = tcMain.AddNode(label, tnControl);
                }
            }
            else
            {
                string name = Headers[tree.Column] + " : " + tree.Value + " ?";
                label.Content = name;
                if (tnControl == null)
                {
                    tnSubtreeRoot = tcMain.AddRoot(label);
                }
                else
                {
                    tnSubtreeRoot = tcMain.AddNode(label, tnControl);
                }

                PrintTree(tree.NextFalseNode, tnSubtreeRoot);
                PrintTree(tree.NextTrueNode, tnSubtreeRoot);
            }
        }
Esempio n. 6
0
        /// <summary>
        /// When this function is called on the root node, it will traverse all the way down the
        /// tree to the nodes that only have leaf nodes as children. It will create a combined list
        /// of results from both of the leaves and will test the entropy. If the change in entropy is
        /// less than the mingain parameter, the leaves will be deleted and all their results moved
        /// to their parent node. The combined node then becomes a possible candidate for
        /// deletion and merging with another node.
        /// </summary>
        public static void Prune(DecisionNode tree, double mingain)
        {
            // If the branches aren't leaves, then prune them
            if (tree.NextTrueNode.Results == null)
            {
                Prune(tree.NextTrueNode, mingain);
            }
            if (tree.NextFalseNode.Results == null)
            {
                Prune(tree.NextFalseNode, mingain);
            }

            // If both the subbranches are now leaves, see if they should merged
            if (tree.NextTrueNode.Results != null && tree.NextFalseNode.Results != null)
            {
                // Build a combined dataset
                Set tb = new Set(), fb = new Set();
                foreach (var s in tree.NextTrueNode.Results)
                {
                    for (int i = 0; i < s.Value; i++)
                    {
                        tb.Add(new string[] { s.Key });
                    }
                }

                foreach (var s in tree.NextFalseNode.Results)
                {
                    for (int i = 0; i < s.Value; i++)
                    {
                        fb.Add(new string[] { s.Key });
                    }
                }

                // Test the reduction in entropy
                double delta = Entropy(tb.Union(fb).ToList()) - (Entropy(tb) + Entropy(fb) / 2);
                if (delta < mingain)
                {
                    // Merge the branches
                    tree.NextTrueNode  = null;
                    tree.NextFalseNode = null;
                    tree.Results       = UniqueCounts(tb.Union(fb).ToList());
                }
            }
        }
        private void simple_example()
        {
            string path = @"decision_tree_example.txt";

            Headers = new string[] { "Refferer", "Location", "Read FAQ", "Pages viewed" };
            Result  = "Service";

            List <string[]> rows = new List <string[]>();

            foreach (var word in File.ReadAllLines(path))
            {
                rows.Add(word.Split('\t'));
            }

            tcMain.Clear();
            tree = DecisionTreeTools.BuildTree(rows);
            PrintTree(tree);
            UpdateDataBaseSimpleExample(rows);

            listBoxZoo.Visibility = Visibility.Hidden;
        }
Esempio n. 8
0
        /// <summary>
        /// takes a new observation and classifies it according to the decision tree
        /// </summary>
        public static Dictionary <string, double> Classify(string[] observation, DecisionNode tree)
        {
            if (tree.Results != null)
            {
                return(tree.Results);
            }
            else
            {
                string       data = observation[tree.Column];
                DecisionNode branch;

                double res;
                if (double.TryParse(data, out res))
                {
                    branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode;
                }
                else
                {
                    branch = data == tree.Value ? tree.NextTrueNode : tree.NextFalseNode;
                }
                return(Classify(observation, branch));
            }
        }
Esempio n. 9
0
        /// <summary>
        /// The only difference is at the end where, if the important piece of data is missing, the
        /// results for each branch are calculated and then combined with their respective
        /// weightings.
        /// </summary>
        public static Dictionary <string, double> MissingDataClassify(string[] observation, DecisionNode tree)
        {
            if (tree.Results != null)
            {
                return(tree.Results);
            }
            else
            {
                string v = observation[tree.Column];

                if (v == null)
                {
                    var    tr     = MissingDataClassify(observation, tree.NextTrueNode);
                    var    fr     = MissingDataClassify(observation, tree.NextFalseNode);
                    double tcount = tr.Values.Sum();
                    double fcount = fr.Values.Sum();
                    double tw     = (double)tcount / (tcount + fcount);
                    double fw     = (double)fcount / (tcount + fcount);

                    Dictionary <string, double> result = new Dictionary <string, double>();
                    foreach (var i in tr)
                    {
                        if (!result.ContainsKey(i.Key))
                        {
                            result.Add(i.Key, i.Value * tw);
                        }
                        else
                        {
                            result[i.Key] = i.Value * tw;
                        }
                    }
                    foreach (var i in fr)
                    {
                        if (!result.ContainsKey(i.Key))
                        {
                            result.Add(i.Key, i.Value * fw);
                        }
                        else
                        {
                            result[i.Key] = i.Value * fw;
                        }
                    }
                    return(result);
                }
                else
                {
                    DecisionNode branch;

                    double res;
                    if (double.TryParse(v, out res))
                    {
                        branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode;
                    }
                    else
                    {
                        branch = v == tree.Value ? tree.NextTrueNode : tree.NextFalseNode;
                    }
                    return(MissingDataClassify(observation, branch));
                }
            }
        }
Esempio n. 10
0
        /// <summary>
        /// takes a new observation and classifies it according to the decision tree
        /// </summary>
        public static Dictionary<string, double> Classify(string[] observation, DecisionNode tree)
        {
            if (tree.Results != null)
            {
                return tree.Results;
            }
            else
            {
                string data = observation[tree.Column];
                DecisionNode branch;

                double res;
                if (double.TryParse(data, out res))
                {
                    branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode;
                }
                else
                {
                    branch = data == tree.Value ? tree.NextTrueNode : tree.NextFalseNode;
                }
                return Classify(observation, branch);
            }
        }
Esempio n. 11
0
        private void PrintTree(DecisionNode tree, TreeNode tnControl = null)
        {

            TreeNode tnSubtreeRoot;

            Label label = new Label();
            label.BorderBrush = Brushes.White;
            label.BorderThickness = new Thickness(0.5);
            label.Foreground = Brushes.Orange;


            // Is this a leaf node?
            if (tree.Results != null)
            {
                StringBuilder str = new StringBuilder();
                foreach (var res in tree.Results)
                {
                   str.Append("{"+Result+": '" + res.Key + "'}");
                }
                label.Content = str.ToString();
                label.Foreground = Brushes.YellowGreen;
                if(tnControl == null)
                {
                    tnSubtreeRoot = tcMain.AddRoot(label);
                }
                else
                {
                    tnSubtreeRoot = tcMain.AddNode(label, tnControl);
                }
            }
            else
            {
                string name = Headers[tree.Column] + " : " + tree.Value + " ?";
                label.Content = name;
                if(tnControl == null)
                {
                    tnSubtreeRoot = tcMain.AddRoot(label);
                }
                else
                {
                    tnSubtreeRoot = tcMain.AddNode(label, tnControl);
                }

                PrintTree(tree.NextFalseNode, tnSubtreeRoot);
                PrintTree(tree.NextTrueNode, tnSubtreeRoot);

            }
        }
Esempio n. 12
0
        private void simple_example()
        {
            string path = @"decision_tree_example.txt";
            Headers = new string[]{"Refferer", "Location", "Read FAQ", "Pages viewed"};
            Result = "Service";

            List<string[]> rows = new List<string[]>();
            foreach (var word in File.ReadAllLines(path))
            {
                rows.Add(word.Split('\t'));
            }

            tcMain.Clear();
            tree = DecisionTreeTools.BuildTree(rows);
            PrintTree(tree);
            UpdateDataBaseSimpleExample(rows);

            listBoxZoo.Visibility = Visibility.Hidden;
        }
Esempio n. 13
0
        private void zoo_example()
        {
            string path = @"zoo.txt";
            Headers = new string[]{
                                        "Hair", "Feathers", "Eggs", "Milk", "Airborne", "Aquatic",
                                        "Predator", "Toothed", "Backbone", "Breathes", "Venomous", "Fins", "Legs", "Tail",
                                        "Domestic", "Catsize"
                                    };
            Result = "Animal type";

            List<string[]> rows = new List<string[]>();
            List<string[]> dummy = new List<string[]>();
            foreach (var animalData in File.ReadAllLines(path))
            {
                List<string> data = new List<string>();
                string[] line = animalData.Split(',');
                for (int i = 0; i < line.Length; ++i)
                {
                    if(i==13 || i==17 || i==0)
                    {
                        data.Add(line[i]);
                    }
                    else
                    {
                        data.Add(BoolParser.Parse(line[i]).ToString());
                    }
                }
                dummy.Add(data.ToArray());
                data.RemoveAt(0);
                rows.Add(data.ToArray());
            }
            UpdateDataBaseZoo(dummy);
            tcMain.Clear();
            tree = DecisionTreeTools.BuildTree(rows);
            DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextFalseNode.NextTrueNode,1.01);
            DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextTrueNode.NextFalseNode,1.01);
            PrintTree(tree);
            

            listBoxZoo.Items.Clear();
            listBoxZoo.Items.Add(
                "1 -- (41) aardvark, antelope, bear, boar, buffalo, calf, cavy, cheetah, deer, dolphin, elephant, fruitbat, giraffe, girl, goat, gorilla, hamster, hare, leopard, lion, lynx, mink, mole, mongoose, opossum, oryx, platypus, polecat, pony, porpoise, puma, pussycat, raccoon, reindeer, seal, sealion, squirrel, vampire, vole, wallaby,wolf ");
            listBoxZoo.Items.Add(
                "2 -- (20) chicken, crow, dove, duck, flamingo, gull, hawk, kiwi, lark, ostrich, parakeet, penguin, pheasant, rhea, skimmer, skua, sparrow, swan, vulture, wren ");
            listBoxZoo.Items.Add("3 -- (5) pitviper, seasnake, slowworm, tortoise, tuatara");
            listBoxZoo.Items.Add(
                "4 -- (13) bass, carp, catfish, chub, dogfish, haddock, herring, pike, piranha, seahorse, sole, stingray, tuna ");
            listBoxZoo.Items.Add("5 -- (4) frog, frog, newt, toad ");
            listBoxZoo.Items.Add("6 -- (8) flea, gnat, honeybee, housefly, ladybird, moth, termite, wasp ");
            listBoxZoo.Items.Add(
                "7 -- (10) clam, crab, crayfish, lobster, octopus, scorpion, seawasp, slug, starfish, worm");
            
            listBoxZoo.Visibility = Visibility.Visible;
        }
Esempio n. 14
0
        /// <summary>
        /// The only difference is at the end where, if the important piece of data is missing, the
        /// results for each branch are calculated and then combined with their respective
        /// weightings.
        /// </summary>
        public static Dictionary<string, double> MissingDataClassify(string[] observation, DecisionNode tree)
        {
            if (tree.Results != null)
            {
                return tree.Results;
            }
            else
            {
                string v = observation[tree.Column];

                if (v == null)
                {
                    var tr = MissingDataClassify(observation, tree.NextTrueNode);
                    var fr = MissingDataClassify(observation, tree.NextFalseNode);
                    double tcount = tr.Values.Sum();
                    double fcount = fr.Values.Sum();
                    double tw = (double)tcount / (tcount + fcount);
                    double fw = (double)fcount / (tcount + fcount);

                    Dictionary<string, double> result = new Dictionary<string, double>();
                    foreach (var i in tr)
                    {
                        if (!result.ContainsKey(i.Key))
                        {
                            result.Add(i.Key, i.Value * tw);
                        }
                        else
                        {
                            result[i.Key] = i.Value * tw;
                        }
                    }
                    foreach (var i in fr)
                    {
                        if (!result.ContainsKey(i.Key))
                        {
                            result.Add(i.Key, i.Value * fw);
                        }
                        else
                        {
                            result[i.Key] = i.Value * fw;
                        }
                    }
                    return result;
                }
                else
                {
                    DecisionNode branch;

                    double res;
                    if (double.TryParse(v, out res))
                    {
                        branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode;
                    }
                    else
                    {
                        branch = v == tree.Value ? tree.NextTrueNode : tree.NextFalseNode;
                    }
                    return MissingDataClassify(observation, branch);
                }
            }
        }
Esempio n. 15
0
        /// <summary>
        /// When this function is called on the root node, it will traverse all the way down the
        /// tree to the nodes that only have leaf nodes as children. It will create a combined list
        /// of results from both of the leaves and will test the entropy. If the change in entropy is
        /// less than the mingain parameter, the leaves will be deleted and all their results moved
        /// to their parent node. The combined node then becomes a possible candidate for
        /// deletion and merging with another node.
        /// </summary>
        public static void Prune(DecisionNode tree, double mingain)
        {
            // If the branches aren't leaves, then prune them
            if (tree.NextTrueNode.Results == null)
            {
                Prune(tree.NextTrueNode, mingain);
            }
            if (tree.NextFalseNode.Results == null)
            {
                Prune(tree.NextFalseNode, mingain);
            }

            // If both the subbranches are now leaves, see if they should merged
            if (tree.NextTrueNode.Results != null && tree.NextFalseNode.Results != null)
            {
                // Build a combined dataset
                Set tb = new Set(), fb = new Set();
                foreach (var s in tree.NextTrueNode.Results)
                {
                    for (int i = 0; i < s.Value; i++)
                    {
                        tb.Add(new string[] { s.Key });
                    }
                }

                foreach (var s in tree.NextFalseNode.Results)
                {
                    for (int i = 0; i < s.Value; i++)
                    {
                        fb.Add(new string[] { s.Key });
                    }
                }

                // Test the reduction in entropy
                double delta = Entropy(tb.Union(fb).ToList()) - (Entropy(tb) + Entropy(fb) / 2);
                if (delta < mingain)
                {
                    // Merge the branches
                    tree.NextTrueNode = null;
                    tree.NextFalseNode = null;
                    tree.Results = UniqueCounts(tb.Union(fb).ToList());
                }
            }
        }
Esempio n. 16
0
        /// <summary>
        /// Builds the tree by choosing the best dividing criteria for the current set
        /// </summary>
        public static DecisionNode BuildTree(Set rows, MeasuringMetric mode = MeasuringMetric.Entropy)
        {
            Fun scoreCalc = Entropy;

            switch (mode)
            {
            case MeasuringMetric.Entropy:
                scoreCalc = Entropy;
                break;

            case MeasuringMetric.GiniImpurity:
                scoreCalc = GiniImpurity;
                break;

            case MeasuringMetric.Variance:
                scoreCalc = Variance;
                break;
            }

            if (rows.Count == 0)
            {
                return(new DecisionNode());
            }
            double currentScore = scoreCalc(rows);


            // Set up some variables to track the best criteria
            double     bestGain     = 0;
            Pair       bestCriteria = new Pair();
            DividedSet bestSets     = new DividedSet();

            int columnCount = rows[0].Length - 1;

            for (int col = 0; col < columnCount; ++col)
            {
                // Generate the list of different values in this column
                List <string> columnValues = new List <string>();

                foreach (var row in rows)
                {
                    if (!columnValues.Contains(row[col]))
                    {
                        columnValues.Add(row[col]);
                    }
                }

                // Now try dividing the rows up for each value in this column
                foreach (var value in columnValues)
                {
                    DividedSet divSet = DivideSet(rows, col, value);

                    // Information gain
                    double p    = (double)divSet.Key.Count / rows.Count;
                    double gain = currentScore - p * scoreCalc(divSet.Key) - (1 - p) * scoreCalc(divSet.Value);


                    if (gain > bestGain && divSet.Key.Count > 0 && divSet.Value.Count > 0)
                    {
                        bestGain     = gain;
                        bestCriteria = new Pair(col, value);
                        bestSets     = divSet;
                    }
                }
            }

            // Create the subbranches
            if (bestGain > 0)
            {
                DecisionNode trueBranch  = BuildTree(bestSets.Key);
                DecisionNode falseBranch = BuildTree(bestSets.Value);
                return(new DecisionNode(column: bestCriteria.Key, value: bestCriteria.Value, nextTrueNode: trueBranch,
                                        nextFalseNode: falseBranch));
            }
            return(new DecisionNode(results: UniqueCounts(rows)));
        }
Esempio n. 17
0
        public static StringBuilder PrintTree(DecisionNode tree, string indent = "")
        {
            StringBuilder str = new StringBuilder();
            // Is this a leaf node?
            if (tree.Results != null)
            {
                foreach (var res in tree.Results)
                {
                    str.Append("{'" + res.Key + "': " + res.Value + "}");
                }
                str.Append("\n");
            }
            else
            {
                // Print the criteria
                str.Append(tree.Column + ":" + tree.Value + "?" + "\n");

                // Print the branches
                str.Append(indent + "T->");
                str.Append(PrintTree(tree.NextTrueNode, indent + "\t"));
                str.Append(indent + "F->");
                str.Append(PrintTree(tree.NextFalseNode, indent + "\t"));
            }
            return str;
        }