public static StringBuilder PrintTree(DecisionNode tree, string indent = "") { StringBuilder str = new StringBuilder(); // Is this a leaf node? if (tree.Results != null) { foreach (var res in tree.Results) { str.Append("{'" + res.Key + "': " + res.Value + "}"); } str.Append("\n"); } else { // Print the criteria str.Append(tree.Column + ":" + tree.Value + "?" + "\n"); // Print the branches str.Append(indent + "T->"); str.Append(PrintTree(tree.NextTrueNode, indent + "\t")); str.Append(indent + "F->"); str.Append(PrintTree(tree.NextFalseNode, indent + "\t")); } return(str); }
/// <summary> /// Creates instance of DecisionNode class /// </summary> public DecisionNode(int column = -1, string value = null, Dictionary <string, double> results = null, DecisionNode nextTrueNode = null, DecisionNode nextFalseNode = null) { Column = column; Value = value; Results = results; NextTrueNode = nextTrueNode; NextFalseNode = nextFalseNode; }
/// <summary> /// Creates instance of DecisionNode class /// </summary> public DecisionNode(int column = -1, string value = null, Dictionary<string, double> results = null, DecisionNode nextTrueNode = null, DecisionNode nextFalseNode = null) { Column = column; Value = value; Results = results; NextTrueNode = nextTrueNode; NextFalseNode = nextFalseNode; }
private void zoo_example() { string path = @"zoo.txt"; Headers = new string[] { "Hair", "Feathers", "Eggs", "Milk", "Airborne", "Aquatic", "Predator", "Toothed", "Backbone", "Breathes", "Venomous", "Fins", "Legs", "Tail", "Domestic", "Catsize" }; Result = "Animal type"; List <string[]> rows = new List <string[]>(); List <string[]> dummy = new List <string[]>(); foreach (var animalData in File.ReadAllLines(path)) { List <string> data = new List <string>(); string[] line = animalData.Split(','); for (int i = 0; i < line.Length; ++i) { if (i == 13 || i == 17 || i == 0) { data.Add(line[i]); } else { data.Add(BoolParser.Parse(line[i]).ToString()); } } dummy.Add(data.ToArray()); data.RemoveAt(0); rows.Add(data.ToArray()); } UpdateDataBaseZoo(dummy); tcMain.Clear(); tree = DecisionTreeTools.BuildTree(rows); DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextFalseNode.NextTrueNode, 1.01); DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextTrueNode.NextFalseNode, 1.01); PrintTree(tree); listBoxZoo.Items.Clear(); listBoxZoo.Items.Add( "1 -- (41) aardvark, antelope, bear, boar, buffalo, calf, cavy, cheetah, deer, dolphin, elephant, fruitbat, giraffe, girl, goat, gorilla, hamster, hare, leopard, lion, lynx, mink, mole, mongoose, opossum, oryx, platypus, polecat, pony, porpoise, puma, pussycat, raccoon, reindeer, seal, sealion, squirrel, vampire, vole, wallaby,wolf "); listBoxZoo.Items.Add( "2 -- (20) chicken, crow, dove, duck, flamingo, gull, hawk, kiwi, lark, ostrich, parakeet, penguin, pheasant, rhea, skimmer, skua, sparrow, swan, vulture, wren "); listBoxZoo.Items.Add("3 -- (5) pitviper, seasnake, slowworm, tortoise, tuatara"); listBoxZoo.Items.Add( "4 -- (13) bass, carp, catfish, chub, dogfish, haddock, herring, pike, piranha, seahorse, sole, stingray, tuna "); listBoxZoo.Items.Add("5 -- (4) frog, frog, newt, toad "); listBoxZoo.Items.Add("6 -- (8) flea, gnat, honeybee, housefly, ladybird, moth, termite, wasp "); listBoxZoo.Items.Add( "7 -- (10) clam, crab, crayfish, lobster, octopus, scorpion, seawasp, slug, starfish, worm"); listBoxZoo.Visibility = Visibility.Visible; }
private void PrintTree(DecisionNode tree, TreeNode tnControl = null) { TreeNode tnSubtreeRoot; Label label = new Label(); label.BorderBrush = Brushes.White; label.BorderThickness = new Thickness(0.5); label.Foreground = Brushes.Orange; // Is this a leaf node? if (tree.Results != null) { StringBuilder str = new StringBuilder(); foreach (var res in tree.Results) { str.Append("{" + Result + ": '" + res.Key + "'}"); } label.Content = str.ToString(); label.Foreground = Brushes.YellowGreen; if (tnControl == null) { tnSubtreeRoot = tcMain.AddRoot(label); } else { tnSubtreeRoot = tcMain.AddNode(label, tnControl); } } else { string name = Headers[tree.Column] + " : " + tree.Value + " ?"; label.Content = name; if (tnControl == null) { tnSubtreeRoot = tcMain.AddRoot(label); } else { tnSubtreeRoot = tcMain.AddNode(label, tnControl); } PrintTree(tree.NextFalseNode, tnSubtreeRoot); PrintTree(tree.NextTrueNode, tnSubtreeRoot); } }
/// <summary> /// When this function is called on the root node, it will traverse all the way down the /// tree to the nodes that only have leaf nodes as children. It will create a combined list /// of results from both of the leaves and will test the entropy. If the change in entropy is /// less than the mingain parameter, the leaves will be deleted and all their results moved /// to their parent node. The combined node then becomes a possible candidate for /// deletion and merging with another node. /// </summary> public static void Prune(DecisionNode tree, double mingain) { // If the branches aren't leaves, then prune them if (tree.NextTrueNode.Results == null) { Prune(tree.NextTrueNode, mingain); } if (tree.NextFalseNode.Results == null) { Prune(tree.NextFalseNode, mingain); } // If both the subbranches are now leaves, see if they should merged if (tree.NextTrueNode.Results != null && tree.NextFalseNode.Results != null) { // Build a combined dataset Set tb = new Set(), fb = new Set(); foreach (var s in tree.NextTrueNode.Results) { for (int i = 0; i < s.Value; i++) { tb.Add(new string[] { s.Key }); } } foreach (var s in tree.NextFalseNode.Results) { for (int i = 0; i < s.Value; i++) { fb.Add(new string[] { s.Key }); } } // Test the reduction in entropy double delta = Entropy(tb.Union(fb).ToList()) - (Entropy(tb) + Entropy(fb) / 2); if (delta < mingain) { // Merge the branches tree.NextTrueNode = null; tree.NextFalseNode = null; tree.Results = UniqueCounts(tb.Union(fb).ToList()); } } }
private void simple_example() { string path = @"decision_tree_example.txt"; Headers = new string[] { "Refferer", "Location", "Read FAQ", "Pages viewed" }; Result = "Service"; List <string[]> rows = new List <string[]>(); foreach (var word in File.ReadAllLines(path)) { rows.Add(word.Split('\t')); } tcMain.Clear(); tree = DecisionTreeTools.BuildTree(rows); PrintTree(tree); UpdateDataBaseSimpleExample(rows); listBoxZoo.Visibility = Visibility.Hidden; }
/// <summary> /// takes a new observation and classifies it according to the decision tree /// </summary> public static Dictionary <string, double> Classify(string[] observation, DecisionNode tree) { if (tree.Results != null) { return(tree.Results); } else { string data = observation[tree.Column]; DecisionNode branch; double res; if (double.TryParse(data, out res)) { branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode; } else { branch = data == tree.Value ? tree.NextTrueNode : tree.NextFalseNode; } return(Classify(observation, branch)); } }
/// <summary> /// The only difference is at the end where, if the important piece of data is missing, the /// results for each branch are calculated and then combined with their respective /// weightings. /// </summary> public static Dictionary <string, double> MissingDataClassify(string[] observation, DecisionNode tree) { if (tree.Results != null) { return(tree.Results); } else { string v = observation[tree.Column]; if (v == null) { var tr = MissingDataClassify(observation, tree.NextTrueNode); var fr = MissingDataClassify(observation, tree.NextFalseNode); double tcount = tr.Values.Sum(); double fcount = fr.Values.Sum(); double tw = (double)tcount / (tcount + fcount); double fw = (double)fcount / (tcount + fcount); Dictionary <string, double> result = new Dictionary <string, double>(); foreach (var i in tr) { if (!result.ContainsKey(i.Key)) { result.Add(i.Key, i.Value * tw); } else { result[i.Key] = i.Value * tw; } } foreach (var i in fr) { if (!result.ContainsKey(i.Key)) { result.Add(i.Key, i.Value * fw); } else { result[i.Key] = i.Value * fw; } } return(result); } else { DecisionNode branch; double res; if (double.TryParse(v, out res)) { branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode; } else { branch = v == tree.Value ? tree.NextTrueNode : tree.NextFalseNode; } return(MissingDataClassify(observation, branch)); } } }
/// <summary> /// takes a new observation and classifies it according to the decision tree /// </summary> public static Dictionary<string, double> Classify(string[] observation, DecisionNode tree) { if (tree.Results != null) { return tree.Results; } else { string data = observation[tree.Column]; DecisionNode branch; double res; if (double.TryParse(data, out res)) { branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode; } else { branch = data == tree.Value ? tree.NextTrueNode : tree.NextFalseNode; } return Classify(observation, branch); } }
private void PrintTree(DecisionNode tree, TreeNode tnControl = null) { TreeNode tnSubtreeRoot; Label label = new Label(); label.BorderBrush = Brushes.White; label.BorderThickness = new Thickness(0.5); label.Foreground = Brushes.Orange; // Is this a leaf node? if (tree.Results != null) { StringBuilder str = new StringBuilder(); foreach (var res in tree.Results) { str.Append("{"+Result+": '" + res.Key + "'}"); } label.Content = str.ToString(); label.Foreground = Brushes.YellowGreen; if(tnControl == null) { tnSubtreeRoot = tcMain.AddRoot(label); } else { tnSubtreeRoot = tcMain.AddNode(label, tnControl); } } else { string name = Headers[tree.Column] + " : " + tree.Value + " ?"; label.Content = name; if(tnControl == null) { tnSubtreeRoot = tcMain.AddRoot(label); } else { tnSubtreeRoot = tcMain.AddNode(label, tnControl); } PrintTree(tree.NextFalseNode, tnSubtreeRoot); PrintTree(tree.NextTrueNode, tnSubtreeRoot); } }
private void simple_example() { string path = @"decision_tree_example.txt"; Headers = new string[]{"Refferer", "Location", "Read FAQ", "Pages viewed"}; Result = "Service"; List<string[]> rows = new List<string[]>(); foreach (var word in File.ReadAllLines(path)) { rows.Add(word.Split('\t')); } tcMain.Clear(); tree = DecisionTreeTools.BuildTree(rows); PrintTree(tree); UpdateDataBaseSimpleExample(rows); listBoxZoo.Visibility = Visibility.Hidden; }
private void zoo_example() { string path = @"zoo.txt"; Headers = new string[]{ "Hair", "Feathers", "Eggs", "Milk", "Airborne", "Aquatic", "Predator", "Toothed", "Backbone", "Breathes", "Venomous", "Fins", "Legs", "Tail", "Domestic", "Catsize" }; Result = "Animal type"; List<string[]> rows = new List<string[]>(); List<string[]> dummy = new List<string[]>(); foreach (var animalData in File.ReadAllLines(path)) { List<string> data = new List<string>(); string[] line = animalData.Split(','); for (int i = 0; i < line.Length; ++i) { if(i==13 || i==17 || i==0) { data.Add(line[i]); } else { data.Add(BoolParser.Parse(line[i]).ToString()); } } dummy.Add(data.ToArray()); data.RemoveAt(0); rows.Add(data.ToArray()); } UpdateDataBaseZoo(dummy); tcMain.Clear(); tree = DecisionTreeTools.BuildTree(rows); DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextFalseNode.NextTrueNode,1.01); DecisionTreeTools.Prune(tree.NextFalseNode.NextTrueNode.NextTrueNode.NextFalseNode,1.01); PrintTree(tree); listBoxZoo.Items.Clear(); listBoxZoo.Items.Add( "1 -- (41) aardvark, antelope, bear, boar, buffalo, calf, cavy, cheetah, deer, dolphin, elephant, fruitbat, giraffe, girl, goat, gorilla, hamster, hare, leopard, lion, lynx, mink, mole, mongoose, opossum, oryx, platypus, polecat, pony, porpoise, puma, pussycat, raccoon, reindeer, seal, sealion, squirrel, vampire, vole, wallaby,wolf "); listBoxZoo.Items.Add( "2 -- (20) chicken, crow, dove, duck, flamingo, gull, hawk, kiwi, lark, ostrich, parakeet, penguin, pheasant, rhea, skimmer, skua, sparrow, swan, vulture, wren "); listBoxZoo.Items.Add("3 -- (5) pitviper, seasnake, slowworm, tortoise, tuatara"); listBoxZoo.Items.Add( "4 -- (13) bass, carp, catfish, chub, dogfish, haddock, herring, pike, piranha, seahorse, sole, stingray, tuna "); listBoxZoo.Items.Add("5 -- (4) frog, frog, newt, toad "); listBoxZoo.Items.Add("6 -- (8) flea, gnat, honeybee, housefly, ladybird, moth, termite, wasp "); listBoxZoo.Items.Add( "7 -- (10) clam, crab, crayfish, lobster, octopus, scorpion, seawasp, slug, starfish, worm"); listBoxZoo.Visibility = Visibility.Visible; }
/// <summary> /// The only difference is at the end where, if the important piece of data is missing, the /// results for each branch are calculated and then combined with their respective /// weightings. /// </summary> public static Dictionary<string, double> MissingDataClassify(string[] observation, DecisionNode tree) { if (tree.Results != null) { return tree.Results; } else { string v = observation[tree.Column]; if (v == null) { var tr = MissingDataClassify(observation, tree.NextTrueNode); var fr = MissingDataClassify(observation, tree.NextFalseNode); double tcount = tr.Values.Sum(); double fcount = fr.Values.Sum(); double tw = (double)tcount / (tcount + fcount); double fw = (double)fcount / (tcount + fcount); Dictionary<string, double> result = new Dictionary<string, double>(); foreach (var i in tr) { if (!result.ContainsKey(i.Key)) { result.Add(i.Key, i.Value * tw); } else { result[i.Key] = i.Value * tw; } } foreach (var i in fr) { if (!result.ContainsKey(i.Key)) { result.Add(i.Key, i.Value * fw); } else { result[i.Key] = i.Value * fw; } } return result; } else { DecisionNode branch; double res; if (double.TryParse(v, out res)) { branch = res >= Convert.ToDouble(tree.Value) ? tree.NextTrueNode : tree.NextFalseNode; } else { branch = v == tree.Value ? tree.NextTrueNode : tree.NextFalseNode; } return MissingDataClassify(observation, branch); } } }
/// <summary> /// Builds the tree by choosing the best dividing criteria for the current set /// </summary> public static DecisionNode BuildTree(Set rows, MeasuringMetric mode = MeasuringMetric.Entropy) { Fun scoreCalc = Entropy; switch (mode) { case MeasuringMetric.Entropy: scoreCalc = Entropy; break; case MeasuringMetric.GiniImpurity: scoreCalc = GiniImpurity; break; case MeasuringMetric.Variance: scoreCalc = Variance; break; } if (rows.Count == 0) { return(new DecisionNode()); } double currentScore = scoreCalc(rows); // Set up some variables to track the best criteria double bestGain = 0; Pair bestCriteria = new Pair(); DividedSet bestSets = new DividedSet(); int columnCount = rows[0].Length - 1; for (int col = 0; col < columnCount; ++col) { // Generate the list of different values in this column List <string> columnValues = new List <string>(); foreach (var row in rows) { if (!columnValues.Contains(row[col])) { columnValues.Add(row[col]); } } // Now try dividing the rows up for each value in this column foreach (var value in columnValues) { DividedSet divSet = DivideSet(rows, col, value); // Information gain double p = (double)divSet.Key.Count / rows.Count; double gain = currentScore - p * scoreCalc(divSet.Key) - (1 - p) * scoreCalc(divSet.Value); if (gain > bestGain && divSet.Key.Count > 0 && divSet.Value.Count > 0) { bestGain = gain; bestCriteria = new Pair(col, value); bestSets = divSet; } } } // Create the subbranches if (bestGain > 0) { DecisionNode trueBranch = BuildTree(bestSets.Key); DecisionNode falseBranch = BuildTree(bestSets.Value); return(new DecisionNode(column: bestCriteria.Key, value: bestCriteria.Value, nextTrueNode: trueBranch, nextFalseNode: falseBranch)); } return(new DecisionNode(results: UniqueCounts(rows))); }
public static StringBuilder PrintTree(DecisionNode tree, string indent = "") { StringBuilder str = new StringBuilder(); // Is this a leaf node? if (tree.Results != null) { foreach (var res in tree.Results) { str.Append("{'" + res.Key + "': " + res.Value + "}"); } str.Append("\n"); } else { // Print the criteria str.Append(tree.Column + ":" + tree.Value + "?" + "\n"); // Print the branches str.Append(indent + "T->"); str.Append(PrintTree(tree.NextTrueNode, indent + "\t")); str.Append(indent + "F->"); str.Append(PrintTree(tree.NextFalseNode, indent + "\t")); } return str; }