static void Main(string[] args) { string trainfile, testfile; DataTable result = new DataTable(); for (int l = 0; l < 10; l++) { result.Columns.Add(l.ToString(), typeof(string)); } Console.WriteLine("Enter the training file name"); trainfile = Console.ReadLine().ToString().Trim(); Console.WriteLine("Enter the test file name"); testfile = Console.ReadLine().ToString().Trim(); ArrayList attributenames = getAttributeNames(trainfile); DataTable samples = getDataTable(trainfile, attributenames); for (int m = 0; m < samples.Rows.Count; m++) { result.Rows.Add(); } string[] array = attributenames.ToArray(typeof(string)) as string[]; for (int k = 0; k < 10; k++) { DataTable random = GetRandomSamples(samples); Attribute[] attributes = getList(random, array); DecisionID3 id3 = new DecisionID3(); TreeNode root = id3.MainTree(random, "result", attributes, trainfile, attributenames); TraverseMain(root, testfile, attributenames, "Test File", ref result, k); } CalcuateMajorityVotes(ref result,testfile,attributenames); Console.ReadLine(); }
/*This function is used to construct the tree amd is called recursively*/ private TreeNode constructTree(DataTable samples, string resultLabel, Attribute[] attributes, string filename, ArrayList attributenames) { if (tuplePositiveTest(samples, resultLabel) == true) /* check if all tuples belong to class label 1*/ return new TreeNode(new Attribute("1")); if (tupleNegativeTest(samples, resultLabel) == true) /* check if all tuples belong to class label 1*/ return new TreeNode(new Attribute("0")); TotalTuples = samples.Rows.Count; resultClass = resultLabel; TotalPositives = countPositiveClass(samples); int mnegative = TotalTuples - TotalPositives; /*Below are the conditions that check when attribute set is empty or tuples are over etc.,*/ if (attributes.Length == 0 && TotalPositives == mnegative) return new TreeNode(new Attribute(getMostCommonValue(samples, resultLabel))); else if (attributes.Length == 0 && TotalPositives == mnegative) return new TreeNode(new Attribute(getMostCommonValue(getDataTable(filename, attributenames), resultLabel))); else if (samples.Rows.Count == 0) return new TreeNode(new Attribute(getMostCommonValue(getDataTable(filename, attributenames), resultLabel))); Entropy = calcEntropy(TotalPositives, TotalTuples - TotalPositives); /*To find the best attribute*/ Attribute bestAttribute = getSplittingAttribute(samples, attributes); if (bestAttribute == null && TotalPositives == mnegative) { return new TreeNode(new Attribute(getMostCommonValue(getDataTable(filename, attributenames), resultLabel))); } else if (bestAttribute == null && TotalPositives != mnegative) { return new TreeNode(new Attribute(getMostCommonValue(samples, resultLabel))); } TreeNode root = new TreeNode(bestAttribute); DataTable aSample = samples.Clone(); bestAttribute.postives = TotalPositives; bestAttribute.negatives = mnegative; /*Loop through all possible attribute values to split based on the above best attribute obtained */ foreach (string value in bestAttribute.values) { aSample.Rows.Clear(); DataRow[] rows = samples.Select(bestAttribute.AttributeName + " = " + "'" + value + "'"); foreach (DataRow row in rows) { aSample.Rows.Add(row.ItemArray); } ArrayList aAttributes = new ArrayList(attributes.Length - 1); for (int i = 0; i < attributes.Length; i++) { if (attributes[i].AttributeName != bestAttribute.AttributeName) aAttributes.Add(attributes[i]); } DecisionID3 dc3 = new DecisionID3(); TreeNode child = dc3.MainTree(aSample, resultLabel, (Attribute[])aAttributes.ToArray(typeof(Attribute)), filename, attributenames); root.AddNode(child, value); } return root; }