コード例 #1
0
ファイル: ID3Learner.cs プロジェクト: mihnearo/decisionTrees
        private Node Learn(Instance[] instanceList, Arff.Attribute[] attributeList, Arff.Attribute classAttribute)
        {
            // compute the class distribution of all the examples
            var classDistribution = GetClassDistribution(instanceList, classAttribute);

            string[] classesWithExamples = classDistribution.Where(a => a.Value > 0).Select(a => a.Key).ToArray();
            if (1 == classesWithExamples.Length)
            {
                // all examples belong to the same class so we have reached a leaf node
                Logger.Log(LogLevel.Progress, ".");
                return new Node()
                {
                    Label = classesWithExamples[0],
                    Children = null
                };
            }

            string mostCommonClass = classDistribution.OrderBy(a => a.Value).Last().Key;
            if (null == attributeList
                || 0 == attributeList.Length)
            {
                // no more attributes to split on
                Logger.Log(LogLevel.Progress, ".");
                return new Node()
                {
                    Label = mostCommonClass,
                    Children = null
                };
            }

            var decisionAttribute = GetDecisionAttribute(instanceList, attributeList, classDistribution, classAttribute);
            if (null == decisionAttribute)
            {
                // can't find a attribute to split on that passes the split-termination condition
                Logger.Log(LogLevel.Progress, ".");
                return new Node()
                {
                    Label = mostCommonClass,
                    Children = null
                };
            }

            // recursively build the tree
            var root = new Node()
            {
                Label = decisionAttribute.Name,
                Children = new Dictionary<string, Node>()
            };

            // pre-process the instances
            var instanceGroups = instanceList
                .GroupBy(a => a.Data[decisionAttribute.Name])
                .ToDictionary(g => g.Key, v => v.ToArray());

            // build the sub-trees
            foreach (string value in decisionAttribute.Values)
            {
                Node childNode = null;

                if (!instanceGroups.ContainsKey(value)
                    || 0 == instanceGroups[value].Length)
                {
                    // if there are not example for the node value assign the
                    // label of most instances to the value branch
                    childNode = new Node()
                    {
                        Label = mostCommonClass,
                        Children = null
                    };
                    root.Children.Add(value, childNode);

                    continue;
                }

                // build the subtree recursively
                childNode = this.Learn(
                    instanceGroups[value],
                    attributeList.Where(a => !a.Name.Equals(decisionAttribute.Name)).ToArray(),
                    classAttribute);
                root.Children.Add(value, childNode);
            }

            return root;
        }
コード例 #2
0
ファイル: ID3Learner.cs プロジェクト: mihnearo/decisionTrees
        private Arff.Attribute GetDecisionAttribute(Instance[] instanceList, Arff.Attribute[] attributeList, Dictionary<string, int> classDistribution, Arff.Attribute classAttribute)
        {
            Arff.Attribute decisionAttribute = null;
            double decisionAttributeGain = double.MinValue;

            // compute entropy
            double entropy = ComputeEntropy(instanceList.Length, classDistribution);

            // compute entropy of each attribute
            foreach (Arff.Attribute attribute in attributeList)
            {
                // group the instances by their values for the attribute being evaluated
                var groupedInstanceList = instanceList.GroupBy(i => i.Data[attribute.Name]);
                if (!this.handleUnknownAsValue)
                {
                    groupedInstanceList = groupedInstanceList.Where(g => !g.Key.Equals(Instances.UnknownValue));
                }

                // compute the chi-squared statistic for the data
                double dataChiSquared = ComputeAttributeChiSquared(groupedInstanceList, instanceList.Length, classDistribution, classAttribute);
                double criticalChiSquared = ChiSquare.CriticalChiSquareValue(1.0f - this.splitStoppingConfidenceLevel, attribute.Values.Length - 1);
                Logger.Log(LogLevel.Info, "Chi-Square test for [{0}]: data=[{1}] critical=[{2}]", attribute.Name, dataChiSquared, criticalChiSquared);

                if (dataChiSquared < criticalChiSquared)
                {
                    // attribute did not pass chi-square split test
                    continue;
                }

                // compute the attribute entropy
                double attributeEntropy = ComputeAttributeEntropy(groupedInstanceList, instanceList.Length, classAttribute);

                // compute the gain
                double attributeGain = entropy - attributeEntropy;

                if (this.useGainRatio)
                {
                    // compute the gain ratio
                    double splitInfo = this.ComputeAttributeSplitInfo(groupedInstanceList, instanceList.Length);

                    attributeGain = attributeGain / splitInfo;
                }

                // check the attribute
                if (decisionAttributeGain < attributeGain)
                {
                    // found a better attribute
                    decisionAttribute = attribute;
                    decisionAttributeGain = attributeGain;
                }
            }

            if (null != decisionAttribute)
            {
                Logger.Log(LogLevel.Info, "Selected Attribute - name=[{0}] gain=[{1}].", decisionAttribute.Name, decisionAttributeGain);
            }
            else
            {
                Logger.Log(LogLevel.Info, "No relevant attribute found.");
            }

            return decisionAttribute;
        }