Ejemplo n.º 1
0
        /*This function is used to construct the tree amd is called recursively*/
        private TreeNode constructTree(DataTable samples, string resultLabel, Attribute[] attributes, string filename, ArrayList attributenames)
        {
            if (tuplePositiveTest(samples, resultLabel) == true) /* check if all tuples belong to class label 1*/
                return new TreeNode(new Attribute("1"));

            if (tupleNegativeTest(samples, resultLabel) == true) /* check if all tuples belong to class label 1*/
                return new TreeNode(new Attribute("0"));

            TotalTuples = samples.Rows.Count;
            resultClass = resultLabel;
            TotalPositives = countPositiveClass(samples);
            int mnegative = TotalTuples - TotalPositives;
            /*Below are the conditions that check when attribute set is empty or tuples are over etc.,*/
            if (attributes.Length == 0 && TotalPositives == mnegative)
                return new TreeNode(new Attribute(getMostCommonValue(samples, resultLabel)));
            else if (attributes.Length == 0 && TotalPositives == mnegative)
                return new TreeNode(new Attribute(getMostCommonValue(getDataTable(filename, attributenames), resultLabel)));

            else if (samples.Rows.Count == 0)
                return new TreeNode(new Attribute(getMostCommonValue(getDataTable(filename, attributenames), resultLabel)));

            Entropy = calcEntropy(TotalPositives, TotalTuples - TotalPositives);
            /*To find the best attribute*/
            Attribute bestAttribute = getSplittingAttribute(samples, attributes);
            if (bestAttribute == null && TotalPositives == mnegative)
            {

                return new TreeNode(new Attribute(getMostCommonValue(getDataTable(filename, attributenames), resultLabel)));
            }
            else if (bestAttribute == null && TotalPositives != mnegative)
            {
                return new TreeNode(new Attribute(getMostCommonValue(samples, resultLabel)));
            }
            TreeNode root = new TreeNode(bestAttribute);

            DataTable aSample = samples.Clone();
            bestAttribute.postives = TotalPositives;
            bestAttribute.negatives = mnegative;
            /*Loop through all possible attribute values to split based on the above best attribute obtained */
            foreach (string value in bestAttribute.values)
            {

                aSample.Rows.Clear();

                DataRow[] rows = samples.Select(bestAttribute.AttributeName + " = " + "'" + value + "'");

                foreach (DataRow row in rows)
                {
                    aSample.Rows.Add(row.ItemArray);
                }
                ArrayList aAttributes = new ArrayList(attributes.Length - 1);
                for (int i = 0; i < attributes.Length; i++)
                {
                    if (attributes[i].AttributeName != bestAttribute.AttributeName)
                        aAttributes.Add(attributes[i]);
                }

                DecisionID3 dc3 = new DecisionID3();
                TreeNode child = dc3.MainTree(aSample, resultLabel, (Attribute[])aAttributes.ToArray(typeof(Attribute)), filename, attributenames);
                root.AddNode(child, value);

            }

            return root;
        }