//public string relation = "eq"; /// <summary> /// Initializes a new instance of the TreeNode /// </summary> /// <param name="attribute">attribute to which the node is connected </param> public TreeNode(TreeAttribute attribute) { if (attribute != null && attribute.PossibleValues != null) { _children = new TreeNodeCollection(); _children2 = new TreeNodeCollection(); _childRelations = new PossibleValueRelationCollection(); _childRelations2 = new PossibleValueRelationCollection(); for (int i = 0; i < attribute.PossibleValues.Count; i++) { _children.Add(null); _children2.Add(null); _childRelations.Add("eq"); _childRelations2.Add("eq"); } } else { _children = new TreeNodeCollection(); _children.Add(null); _children2 = new TreeNodeCollection(); _children2.Add(null); _childRelations = new PossibleValueRelationCollection(); _childRelations2 = new PossibleValueRelationCollection(); _childRelations.Add("eq"); _childRelations2.Add("eq"); } _attribute = attribute; }
private void getValuesToAttributeContinous(DataTable samples, TreeAttribute attribute, string value, out int positives, out int negatives) { positives = 0; negatives = 0; double refval = double.Parse(value); string positveRef = getUniformRefValue(samples, mTargetAttribute); foreach (DataRow aRow in samples.Rows) { ///To do: Figure out if this is correct - it looks bad double rowV = double.Parse((string)aRow[attribute.AttributeName]); if ((rowV <= refval)) { if (aRow[mTargetAttribute].ToString().Trim().ToLower() == positveRef) { positives++; } else { negatives++; } } } }
/// <summary> /// Calculate the gain of an attribute /// </summary> /// <param name="attribute">Attribute to be calculated </param> /// <returns> Gain attribute </returns> private double gain(DataTable samples, TreeAttribute attribute) { PossibleValueCollection values = attribute.PossibleValues; if (isContinousSet(values)) { double sum = 0.0; double bsum = -9999999.0; // return the value for the best possible split for (int i = 0; i < values.Count; i++) { int positives, negatives; positives = negatives = 0; getValuesToAttributeContinous(samples, attribute, values[i], out positives, out negatives); double entropy = getCalculatedEntropy(positives, negatives); sum = -(double)(positives + negatives) / mTotal * entropy; if (sum > bsum) { bsum = sum; } } return(mEntropySet + bsum); } else { double sum = 0.0; for (int i = 0; i < values.Count; i++) { int positives, negatives; positives = negatives = 0; getValuesToAttribute(samples, attribute, values[i], out positives, out negatives); // does it really split? int remainder = mTotal - (positives + negatives); if (remainder > 0) { double entropy = getCalculatedEntropy(positives, negatives); sum += -(double)(positives + negatives) / mTotal * entropy; } } return(mEntropySet + sum); } }
/// <summary> ///Returns the best attribute. /// </summary> /// <param name="attributes"> A vector with attributes </param> /// <returns>Returns which has higher gain </returns> private TreeAttribute getBestAttribute(DataTable samples, TreeAttributeCollection attributes) { double maxGain = -9999999.0; TreeAttribute result = null; foreach (TreeAttribute attribute in attributes) { double aux = gain(samples, attribute); if (aux > maxGain) { maxGain = aux; result = attribute; } } return(result); }
public TreeAttributeCollection GetValidAttributeCollection() { TreeAttributeCollection returnCollection = new TreeAttributeCollection(); foreach (DataColumn column in this.Columns) { TreeAttribute currentAttribute = new TreeAttribute(column.ColumnName, GetValuesFromColumn(column.ColumnName)); if (returnCollection.ContainsAttribute(currentAttribute) || currentAttribute.AttributeName.ToUpper().Trim() == "RESULT") { continue; } returnCollection.Add(currentAttribute); } return(returnCollection); }
/// <summary> /// Sample table scans checking an attribute and if the result is positive or negative /// </summary> /// <param name="samples">DataTable with samples</param> /// <param name="attribute"> attribute to be searched </param> /// <param name="value"> value allowed for the attribute </param> /// <param name="positives"> nro will contain all the attributes with the value determined with positive results </param> /// <param name="negatives">nro will contain all the attributes with the value determined with negative</param> private void getValuesToAttribute0(DataTable samples, TreeAttribute attribute, string value, out int positives, out int negatives) { positives = 0; negatives = 0; foreach (DataRow aRow in samples.Rows) { ///To do: Figure out if this is correct - it looks bad if (((string)aRow[attribute.AttributeName] == value)) { if (aRow[mTargetAttribute].ToString().Trim().ToUpper() == "TRUE") { positives++; } else { negatives++; } } } }
/// <summary> /// Sets up a decision tree based on samples submitted /// </summary> /// <param name="samples">Table with samples that will be provided for mounting the tree </param> /// <param name="targetAttribute"> Name column of the table that otherwise has the value true or false to /// Validate or not a sample</param> /// <returns>The root of the decision tree mounted </returns></returns?> private TreeNode internalMountTree(DataTable samples, string targetAttribute, TreeAttributeCollection attributes) { if (allSamplesAreUniform(samples, targetAttribute) == true) { return(new TreeNode(new OutcomeTreeAttribute(getUniformRefValue(samples, targetAttribute)))); } //if (allSamplesArePositive(samples, targetAttribute) == true) // return new TreeNode(new OutcomeTreeAttribute(true)); //if (allSamplesAreNegative(samples, targetAttribute) == true) // return new TreeNode(new OutcomeTreeAttribute(false)); if (attributes.Count == 0) { return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute)))); } mTotal = samples.Rows.Count; mTargetAttribute = targetAttribute; mTotalPositives = countTotalPositives(samples); mEntropySet = getCalculatedEntropy(mTotalPositives, mTotal - mTotalPositives); TreeAttribute bestAttribute = getBestAttribute(samples, attributes); TreeNode root = new TreeNode(bestAttribute); if (bestAttribute == null) { return(root); } PossibleValueCollection bestAttrValues = bestAttribute.PossibleValues; bool continousSet = isContinousSet(bestAttrValues); //DataTable aSample = samples.Clone(); if (continousSet) { string value = bestSplitValue(samples, bestAttribute); { DataTable aSample = samples.Clone(); //First Below then Above DataRow[] rows; string cond = bestAttribute.AttributeName + " <= " + "" + value + ""; rows = samples.Select(cond); aSample.Rows.Clear(); foreach (DataRow row in rows) { aSample.Rows.Add(row.ItemArray); Console.WriteLine(" SPLIT {0} ROW:", cond); foreach (DataColumn myCol in samples.Columns) { Console.WriteLine(" " + row[myCol]); } } // Create a new attribute list unless the attribute which is the current best attribute TreeAttributeCollection aAttributes = new TreeAttributeCollection(); //ArrayList aAttributes = new ArrayList(attributes.Count - 1); for (int i = 0; i < attributes.Count; i++) { if (attributes[i].AttributeName != bestAttribute.AttributeName) { aAttributes.Add(attributes[i]); } } //Recycle the best continous attribute if there are others if (aAttributes.Count > 0) { aAttributes.Add(bestAttribute); } // Create a new attribute list unless the attribute which is the current best attribute if (rows.Length == 0) { //return new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute))); return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute)))); } else { DecisionTree dc3 = new DecisionTree(); TreeNode ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes); root.AddTreeNode(ChildNode, value, "leq"); } } { DataTable aSample = samples.Clone(); DataRow[] rows2; string cond = bestAttribute.AttributeName + " > " + "" + value + ""; rows2 = samples.Select(cond); aSample.Rows.Clear(); foreach (DataRow row in rows2) { aSample.Rows.Add(row.ItemArray); Console.WriteLine(" SPLIT {0} ROW:", cond); foreach (DataColumn myCol in samples.Columns) { Console.WriteLine(" " + row[myCol]); } } // Create a new attribute list unless the attribute which is the current best attribute TreeAttributeCollection aAttributes2 = new TreeAttributeCollection(); //ArrayList aAttributes = new ArrayList(attributes.Count - 1); for (int i = 0; i < attributes.Count; i++) { if (attributes[i].AttributeName != bestAttribute.AttributeName) { aAttributes2.Add(attributes[i]); } } //Recycle the best continous attribute if there are others if (aAttributes2.Count > 0) { aAttributes2.Add(bestAttribute); } // Create a new attribute list unless the attribute which is the current best attribute if (rows2.Length == 0) { //return new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute))); return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute)))); } else { DecisionTree dc3 = new DecisionTree(); TreeNode ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes2); root.AddTreeNode(ChildNode, value, "gt"); } } } else { DataTable aSample = samples.Clone(); foreach (string value in bestAttribute.PossibleValues) { // Select all elements with the value of this attribute aSample.Rows.Clear(); DataRow[] rows; rows = samples.Select(bestAttribute.AttributeName + " = " + "'" + value + "'"); foreach (DataRow row in rows) { aSample.Rows.Add(row.ItemArray); } // Select all elements with the value of this attribute // Create a new attribute list unless the attribute which is the current best attribute TreeAttributeCollection aAttributes = new TreeAttributeCollection(); //ArrayList aAttributes = new ArrayList(attributes.Count - 1); for (int i = 0; i < attributes.Count; i++) { if (attributes[i].AttributeName != bestAttribute.AttributeName) { aAttributes.Add(attributes[i]); } } // Create a new attribute list unless the attribute which is the current best attribute if (aSample.Rows.Count == 0) { //return new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute))); return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute)))); } else { DecisionTree dc3 = new DecisionTree(); TreeNode ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes); root.AddTreeNode(ChildNode, value, "eq"); } } } return(root); }