예제 #1
0
        //public string relation = "eq";

        /// <summary>
        /// Initializes a new instance of the TreeNode
        /// </summary>
        /// <param name="attribute">attribute to which the node is connected </param>
        public TreeNode(TreeAttribute attribute)
        {
            if (attribute != null && attribute.PossibleValues != null)
            {
                _children        = new TreeNodeCollection();
                _children2       = new TreeNodeCollection();
                _childRelations  = new PossibleValueRelationCollection();
                _childRelations2 = new PossibleValueRelationCollection();
                for (int i = 0; i < attribute.PossibleValues.Count; i++)
                {
                    _children.Add(null);
                    _children2.Add(null);
                    _childRelations.Add("eq");
                    _childRelations2.Add("eq");
                }
            }
            else
            {
                _children = new TreeNodeCollection();
                _children.Add(null);
                _children2 = new TreeNodeCollection();
                _children2.Add(null);
                _childRelations  = new PossibleValueRelationCollection();
                _childRelations2 = new PossibleValueRelationCollection();
                _childRelations.Add("eq");
                _childRelations2.Add("eq");
            }
            _attribute = attribute;
        }
예제 #2
0
        private void getValuesToAttributeContinous(DataTable samples, TreeAttribute attribute, string value, out int positives, out int negatives)
        {
            positives = 0;
            negatives = 0;
            double refval = double.Parse(value);

            string positveRef = getUniformRefValue(samples, mTargetAttribute);

            foreach (DataRow aRow in samples.Rows)
            {
                ///To do:   Figure out if this is correct - it looks bad
                double rowV = double.Parse((string)aRow[attribute.AttributeName]);
                if ((rowV <= refval))
                {
                    if (aRow[mTargetAttribute].ToString().Trim().ToLower() == positveRef)
                    {
                        positives++;
                    }
                    else
                    {
                        negatives++;
                    }
                }
            }
        }
예제 #3
0
        /// <summary>
        /// Calculate the gain of an attribute
        /// </summary>
        /// <param name="attribute">Attribute to be calculated </param>
        /// <returns> Gain attribute </returns>
        private double gain(DataTable samples, TreeAttribute attribute)
        {
            PossibleValueCollection values = attribute.PossibleValues;

            if (isContinousSet(values))
            {
                double sum  = 0.0;
                double bsum = -9999999.0;

                // return the value for the best possible split
                for (int i = 0; i < values.Count; i++)
                {
                    int positives, negatives;

                    positives = negatives = 0;

                    getValuesToAttributeContinous(samples, attribute, values[i], out positives, out negatives);

                    double entropy = getCalculatedEntropy(positives, negatives);
                    sum = -(double)(positives + negatives) / mTotal * entropy;
                    if (sum > bsum)
                    {
                        bsum = sum;
                    }
                }
                return(mEntropySet + bsum);
            }
            else
            {
                double sum = 0.0;

                for (int i = 0; i < values.Count; i++)
                {
                    int positives, negatives;

                    positives = negatives = 0;

                    getValuesToAttribute(samples, attribute, values[i], out positives, out negatives);
                    // does it really split?
                    int remainder = mTotal - (positives + negatives);
                    if (remainder > 0)
                    {
                        double entropy = getCalculatedEntropy(positives, negatives);
                        sum += -(double)(positives + negatives) / mTotal * entropy;
                    }
                }
                return(mEntropySet + sum);
            }
        }
예제 #4
0
        /// <summary>
        ///Returns the best attribute.
        /// </summary>
        /// <param name="attributes"> A vector with attributes </param>
        /// <returns>Returns which has higher gain </returns>
        private TreeAttribute getBestAttribute(DataTable samples, TreeAttributeCollection attributes)
        {
            double        maxGain = -9999999.0;
            TreeAttribute result  = null;

            foreach (TreeAttribute attribute in attributes)
            {
                double aux = gain(samples, attribute);
                if (aux > maxGain)
                {
                    maxGain = aux;
                    result  = attribute;
                }
            }
            return(result);
        }
예제 #5
0
        public TreeAttributeCollection GetValidAttributeCollection()
        {
            TreeAttributeCollection returnCollection = new TreeAttributeCollection();

            foreach (DataColumn column in this.Columns)
            {
                TreeAttribute currentAttribute = new TreeAttribute(column.ColumnName, GetValuesFromColumn(column.ColumnName));

                if (returnCollection.ContainsAttribute(currentAttribute) || currentAttribute.AttributeName.ToUpper().Trim() == "RESULT")
                {
                    continue;
                }
                returnCollection.Add(currentAttribute);
            }
            return(returnCollection);
        }
예제 #6
0
        /// <summary>
        /// Sample table scans checking an attribute and if the result is positive or negative
        /// </summary>
        /// <param name="samples">DataTable with samples</param>
        /// <param name="attribute"> attribute to be searched </param>
        /// <param name="value"> value allowed for the attribute </param>
        /// <param name="positives"> nro will contain all the attributes with the value determined with positive results </param>
        /// <param name="negatives">nro will contain all the attributes with the value determined with negative</param>
        private void getValuesToAttribute0(DataTable samples, TreeAttribute attribute, string value, out int positives, out int negatives)
        {
            positives = 0;
            negatives = 0;

            foreach (DataRow aRow in samples.Rows)
            {
                ///To do:   Figure out if this is correct - it looks bad
                if (((string)aRow[attribute.AttributeName] == value))
                {
                    if (aRow[mTargetAttribute].ToString().Trim().ToUpper() == "TRUE")
                    {
                        positives++;
                    }
                    else
                    {
                        negatives++;
                    }
                }
            }
        }
예제 #7
0
        /// <summary>
        /// Sets up a decision tree based on samples submitted
        /// </summary>
        /// <param name="samples">Table with samples that will be provided for mounting the tree </param>
        /// <param name="targetAttribute"> Name column of the table that otherwise has the value true or false to
        /// Validate or not a sample</param>
        /// <returns>The root of the decision tree mounted </returns></returns?>
        private TreeNode internalMountTree(DataTable samples, string targetAttribute, TreeAttributeCollection attributes)
        {
            if (allSamplesAreUniform(samples, targetAttribute) == true)
            {
                return(new TreeNode(new OutcomeTreeAttribute(getUniformRefValue(samples, targetAttribute))));
            }
            //if (allSamplesArePositive(samples, targetAttribute) == true)
            //    return new TreeNode(new OutcomeTreeAttribute(true));

            //if (allSamplesAreNegative(samples, targetAttribute) == true)
            //    return new TreeNode(new OutcomeTreeAttribute(false));


            if (attributes.Count == 0)
            {
                return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute))));
            }
            mTotal           = samples.Rows.Count;
            mTargetAttribute = targetAttribute;
            mTotalPositives  = countTotalPositives(samples);

            mEntropySet = getCalculatedEntropy(mTotalPositives, mTotal - mTotalPositives);

            TreeAttribute bestAttribute = getBestAttribute(samples, attributes);

            TreeNode root = new TreeNode(bestAttribute);

            if (bestAttribute == null)
            {
                return(root);
            }
            PossibleValueCollection bestAttrValues = bestAttribute.PossibleValues;
            bool continousSet = isContinousSet(bestAttrValues);

            //DataTable aSample = samples.Clone();
            if (continousSet)
            {
                string value = bestSplitValue(samples, bestAttribute);
                {
                    DataTable aSample = samples.Clone();
                    //First Below then Above
                    DataRow[] rows;
                    string    cond = bestAttribute.AttributeName + " <= " + "" + value + "";
                    rows = samples.Select(cond);

                    aSample.Rows.Clear();
                    foreach (DataRow row in rows)
                    {
                        aSample.Rows.Add(row.ItemArray);
                        Console.WriteLine(" SPLIT {0} ROW:", cond);
                        foreach (DataColumn myCol in samples.Columns)
                        {
                            Console.WriteLine("  " + row[myCol]);
                        }
                    }
                    // Create a new attribute list unless the attribute which is the current best attribute
                    TreeAttributeCollection aAttributes = new TreeAttributeCollection();
                    //ArrayList aAttributes = new ArrayList(attributes.Count - 1);
                    for (int i = 0; i < attributes.Count; i++)
                    {
                        if (attributes[i].AttributeName != bestAttribute.AttributeName)
                        {
                            aAttributes.Add(attributes[i]);
                        }
                    }
                    //Recycle the best continous attribute if there are others
                    if (aAttributes.Count > 0)
                    {
                        aAttributes.Add(bestAttribute);
                    }

                    // Create a new attribute list unless the attribute which is the current best attribute

                    if (rows.Length == 0)
                    {
                        //return new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute)));
                        return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute))));
                    }
                    else
                    {
                        DecisionTree dc3       = new DecisionTree();
                        TreeNode     ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes);
                        root.AddTreeNode(ChildNode, value, "leq");
                    }
                }
                {
                    DataTable aSample = samples.Clone();
                    DataRow[] rows2;
                    string    cond = bestAttribute.AttributeName + " > " + "" + value + "";
                    rows2 = samples.Select(cond);

                    aSample.Rows.Clear();
                    foreach (DataRow row in rows2)
                    {
                        aSample.Rows.Add(row.ItemArray);
                        Console.WriteLine(" SPLIT {0} ROW:", cond);
                        foreach (DataColumn myCol in samples.Columns)
                        {
                            Console.WriteLine("  " + row[myCol]);
                        }
                    }
                    // Create a new attribute list unless the attribute which is the current best attribute
                    TreeAttributeCollection aAttributes2 = new TreeAttributeCollection();
                    //ArrayList aAttributes = new ArrayList(attributes.Count - 1);
                    for (int i = 0; i < attributes.Count; i++)
                    {
                        if (attributes[i].AttributeName != bestAttribute.AttributeName)
                        {
                            aAttributes2.Add(attributes[i]);
                        }
                    }
                    //Recycle the best continous attribute if there are others
                    if (aAttributes2.Count > 0)
                    {
                        aAttributes2.Add(bestAttribute);
                    }

                    // Create a new attribute list unless the attribute which is the current best attribute

                    if (rows2.Length == 0)
                    {
                        //return new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute)));
                        return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute))));
                    }
                    else
                    {
                        DecisionTree dc3       = new DecisionTree();
                        TreeNode     ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes2);
                        root.AddTreeNode(ChildNode, value, "gt");
                    }
                }
            }
            else
            {
                DataTable aSample = samples.Clone();
                foreach (string value in bestAttribute.PossibleValues)
                {
                    // Select all elements with the value of this attribute
                    aSample.Rows.Clear();

                    DataRow[] rows;

                    rows = samples.Select(bestAttribute.AttributeName + " = " + "'" + value + "'");

                    foreach (DataRow row in rows)
                    {
                        aSample.Rows.Add(row.ItemArray);
                    }
                    // Select all elements with the value of this attribute

                    // Create a new attribute list unless the attribute which is the current best attribute
                    TreeAttributeCollection aAttributes = new TreeAttributeCollection();
                    //ArrayList aAttributes = new ArrayList(attributes.Count - 1);
                    for (int i = 0; i < attributes.Count; i++)
                    {
                        if (attributes[i].AttributeName != bestAttribute.AttributeName)
                        {
                            aAttributes.Add(attributes[i]);
                        }
                    }
                    // Create a new attribute list unless the attribute which is the current best attribute

                    if (aSample.Rows.Count == 0)
                    {
                        //return new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(aSample, targetAttribute)));
                        return(new TreeNode(new OutcomeTreeAttribute(getMostCommonValue(samples, targetAttribute))));
                    }
                    else
                    {
                        DecisionTree dc3       = new DecisionTree();
                        TreeNode     ChildNode = dc3.mountTree(aSample, targetAttribute, aAttributes);
                        root.AddTreeNode(ChildNode, value, "eq");
                    }
                }
            }


            return(root);
        }