public string GetTree(string sourceFile) { _sourceFile = sourceFile; RawDataSource samples = new RawDataSource(_sourceFile); TreeAttributeCollection attributes = samples.GetValidAttributeCollection(); DecisionTree id3 = new DecisionTree(); TreeNode root = id3.mountTree(samples, "explosive", attributes); return(PrintNode(root, "")); }
public TreeAttributeCollection GetValidAttributeCollection() { TreeAttributeCollection returnCollection = new TreeAttributeCollection(); foreach (DataColumn column in this.Columns) { TreeAttribute currentAttribute = new TreeAttribute(column.ColumnName, GetValuesFromColumn(column.ColumnName)); if (returnCollection.ContainsAttribute(currentAttribute) || currentAttribute.AttributeName.ToUpper().Trim() == "EXPLOSIVE") { continue; } returnCollection.Add(currentAttribute); } return(returnCollection); }
private TreeAttribute getBestAttribute(DataTable samples, TreeAttributeCollection attributes) { double maxGain = 0.0; TreeAttribute result = null; foreach (TreeAttribute attribute in attributes) { double aux = gain(samples, attribute); if (aux > maxGain) { maxGain = aux; result = attribute; } } return(result); }
public TreeNode mountTree(DataTable samples, string targetAttribute, TreeAttributeCollection attributes) { sampleData = samples; return(buildTree(sampleData, targetAttribute, attributes)); }
private TreeNode buildTree(DataTable samples, string targetAttribute, TreeAttributeCollection attributes) { if (samples == null) { return(new TreeNode(new OutcomeTreeAttribute(targetAttribute))); } if (allSamplesArePositive(samples, targetAttribute) == true) { return(new TreeNode(new OutcomeTreeAttribute(true))); } if (allSamplesAreNegative(samples, targetAttribute) == true) { return(new TreeNode(new OutcomeTreeAttribute(false))); } if (attributes.Count == 0) { return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute)))); } mTotal = samples.Rows.Count; mTargetAttribute = targetAttribute; mTotalPositives = countTotalPositives(samples); mEntropySet = calculateEntropy(mTotalPositives, mTotal - mTotalPositives); TreeAttribute bestAttribute = getBestAttribute(samples, attributes); TreeNode root = new TreeNode(bestAttribute); if (bestAttribute == null) { return(root); } DataTable aSample = samples.Clone(); foreach (string value in bestAttribute.PossibleValues) { aSample.Rows.Clear(); DataRow[] rows = samples.Select(bestAttribute.AttributeName + " = " + "'" + value + "'"); foreach (DataRow row in rows) { aSample.Rows.Add(row.ItemArray); } TreeAttributeCollection aAttributes = new TreeAttributeCollection(); //ArrayList aAttributes = new ArrayList(attributes.Count - 1); for (int i = 0; i < attributes.Count; i++) { if (attributes[i].AttributeName != bestAttribute.AttributeName) { aAttributes.Add(attributes[i]); } } if (aSample.Rows.Count == 0) { return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute)))); } else { DecisionTree dc3 = new DecisionTree(); TreeNode ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes); root.AddTreeNode(ChildNode, value); } } return(root); }