/// <summary> /// Method responsible for print the output tree /// </summary> /// <param name="level">string indicanting the current level</param> /// <param name="execution">Output StringBuilder</param> /// <param name="node">Current TreeNode</param> private static void PrintTree(string level, StringBuilder execution, TreeNode node) { int index = 0; // select all nodes but the failure ones TreeNode[] childNodes = node.Childs.Where(m => m.Attribute.Name != "Failure").ToArray(); // if node has any child if (childNodes.Length > 0) { // for each child in current node foreach (TreeNode childs in childNodes) { // prints the results execution.AppendLine(level + node.Attribute.Name + ":" + node.Attribute.Values[index]); // recall the method, for each child PrintTree(level + "-", execution, childs); index++; } } else { // prints the results execution.AppendLine(level + node.Attribute.Name + ":" + node.Attribute.Values[index]); } }
/// <summary> /// Method responsible to test the created tree /// </summary> /// <param name="node">Current TreeNode</param> /// <param name="line">Line of data to be tested</param> /// <param name="categoricalAttribute">The categorical attribute</param> /// <returns>true or false</returns> private static bool TestTree(TreeNode node, string[] line, shared.Attribute categoricalAttribute) { // if current node is categorical node if (node.Attribute.Name == categoricalAttribute.Name) { // return the positive or negative classification return node.Attribute.Values.First().Equals(line[categoricalAttribute.Index].Trim()); } else { // if node type is real if (node.Attribute.Type == typeof(double)) { int index = 0; // for each attribute value foreach (string item in node.Attribute.Values) { // identify the token if (item.IndexOf('<') > -1) { // makes the comparison if (double.Parse(line[node.Attribute.Index]) < double.Parse(item.Replace("<", ""))) break; // item found, breaks the loop } else { // makes the comparison if (double.Parse(line[node.Attribute.Index]) >= double.Parse(item.Replace("=>", ""))) break; // item found, breaks the loop } index++; } // recalls the method, passing the correct node return TestTree(node.Childs[index], line, categoricalAttribute); } else // or node has a domain { if (node.Attribute.Index > -1) { int index = 0; // for each attribute value foreach (string item in node.Attribute.Values) { // makes the comparison if (item.Trim () == line [node.Attribute.Index].Trim ()) break; // item found, breaks the loop index++; } // recalls the method, passing the correct node return TestTree (node.Childs [index], line, categoricalAttribute); } else { return false; } } } }
/// <summary> /// Method resposible for build up a tree /// </summary> /// <param name="attributeList">Array of attributes</param> /// <param name="currentAttribute">Categorized Attribute</param> /// <param name="data">Array of data</param> /// <returns>A single node</returns> private static TreeNode MountTree(shared.Attribute[] attributeList, shared.Attribute categoricalAttribute, string[][] data) { // if data is empty, return a single node with value failure if (data.Length == 0) return new TreeNode(new shared.Attribute(typeof(string), "Failure", -1, new string[] { "Failure" })); var groupingData = data.GroupBy(m => m[categoricalAttribute.Index]); /* * if data consists of records all with the same value for * the categorical attribute, * return a single node with that value */ if (groupingData.Count() == 1) return new TreeNode(new shared.Attribute( categoricalAttribute.Type, categoricalAttribute.Name, categoricalAttribute.Index, new string[] { groupingData.First().Key })); /* * if attributeList is empty, the return a single node with as value * the most frequent of the values of the categorical attribute * that are found in the records of data */ if (attributeList.Length == 0) return new TreeNode(GetMostFrequentValue(categoricalAttribute, data)); /* * Let bestAttribute be the attribute with the largest gain * among attributes in listAttributes */ shared.Attribute bestAttribute = GetAttributeLargestGain(attributeList, categoricalAttribute, data); // creates a root TreeNode root = new TreeNode(bestAttribute); // creates a new attribute list shared.Attribute[] newAttributeList = attributeList .Where(m => m.Name.Trim() != bestAttribute.Name.Trim()).ToArray(); // for each value in attributes foreach (string value in root.Attribute.Values) { // if node type is continous range if (root.Attribute.Type == typeof(double)) { string[][] newData = null; // gets the new data if (value.IndexOf('<') > -1) newData = data .Where(m => double.Parse(m[bestAttribute.Index].Trim()) < double.Parse(value.Trim().Replace("<", ""))).ToArray(); else newData = data .Where(m => double.Parse(m[bestAttribute.Index].Trim()) >= double.Parse(value.Trim().Replace("=", "").Replace(">", ""))).ToArray(); // calls again TreeNode childContinous = MountTree(newAttributeList, categoricalAttribute, newData); root.Childs.Add(childContinous); } else // or node has a domain { // gets the new data string[][] newData = data.Where(m => m[bestAttribute.Index].Trim() == value.Trim()).ToArray(); // calls again TreeNode child = MountTree(newAttributeList, categoricalAttribute, newData); root.Childs.Add(child); } } return root; }