Пример #1
0
        /// <summary>
        /// Method resposible for build up a tree
        /// </summary>
        /// <param name="attributeList">Array of attributes</param>
        /// <param name="currentAttribute">Categorized Attribute</param>
        /// <param name="data">Array of data</param>
        /// <returns>A single node</returns>
        private static TreeNode MountTree(shared.Attribute[] attributeList, 
                                            shared.Attribute categoricalAttribute, 
                                            string[][] data)
        {
            // if data is empty, return a single node with value failure
            if (data.Length == 0)
                return new TreeNode(new shared.Attribute(typeof(string), "Failure", -1, new string[] { "Failure" }));

            var groupingData = data.GroupBy(m => m[categoricalAttribute.Index]);

            /*
             * if data consists of records all with the same value for
             * the categorical attribute,
             * return a single node with that value
             */
            if (groupingData.Count() == 1)
                return new TreeNode(new
                    shared.Attribute(
                        categoricalAttribute.Type,
                        categoricalAttribute.Name,
                        categoricalAttribute.Index,
                        new string[] { groupingData.First().Key }));

            /*
             * if attributeList is empty, the return a single node with as value
             * the most frequent of the values of the categorical attribute
             * that are found in the records of data
             */
            if (attributeList.Length == 0)
                return new TreeNode(GetMostFrequentValue(categoricalAttribute, data));

            /*
             * Let bestAttribute be the attribute with the largest gain
             * among attributes in listAttributes
             */
            shared.Attribute bestAttribute
                = GetAttributeLargestGain(attributeList, categoricalAttribute, data);

            // creates a root
            TreeNode root = new TreeNode(bestAttribute);

            // creates a new attribute list
            shared.Attribute[] newAttributeList = attributeList
                                                    .Where(m => m.Name.Trim() != bestAttribute.Name.Trim()).ToArray();

            // for each value in attributes
            foreach (string value in root.Attribute.Values)
            {
                // if node type is continous range
                if (root.Attribute.Type == typeof(double))
                {
                    string[][] newData = null;

                    // gets the new data
                    if (value.IndexOf('<') > -1)
                        newData = data
                            .Where(m => double.Parse(m[bestAttribute.Index].Trim()) <
                                        double.Parse(value.Trim().Replace("<", ""))).ToArray();
                    else
                        newData = data
                            .Where(m => double.Parse(m[bestAttribute.Index].Trim()) >=
                                        double.Parse(value.Trim().Replace("=", "").Replace(">", ""))).ToArray();

                    // calls again
                    TreeNode childContinous = MountTree(newAttributeList, categoricalAttribute, newData);
                    root.Childs.Add(childContinous);
                }
                else // or node has a domain
                {
                    // gets the new data
                    string[][] newData = data.Where(m => m[bestAttribute.Index].Trim() == value.Trim()).ToArray();

                    // calls again
                    TreeNode child = MountTree(newAttributeList, categoricalAttribute, newData);
                    root.Childs.Add(child);
                }
            }

            return root;
        }