Exemple #1
0
        public Classifier(BuildDecisionTree tree, Hashtable item)
        {
            while (true)
            {
                if (tree.Category != null)
                {
                    result = tree.Category;
                    return;
                }

                double itemValue = (double)item[tree.SplitAttribute];

                if (itemValue >= tree.SplitPivot)
                {
                    tree = tree.MatchSubTree;
                }
                else
                {
                    tree = tree.NotMatchSubTree;
                }
            }
        }
Exemple #2
0
        public BuildDecisionTree(Hashtable[] trainingSet, int maxTreeDepth)
        {
            {
                // Checking if maxTreeDepth/minItemsCount is reached
                if (maxTreeDepth == 0 || trainingSet.Length <= minItemsCount)
                {
                    category = MostFrequentValue(trainingSet, categoryAttribute);
                    return;
                }

                double initialEntropy = Entropy(trainingSet, categoryAttribute);

                // Checking if entropyThreshold is reached
                if (initialEntropy <= entropyThreshold)
                {
                    category = MostFrequentValue(trainingSet, categoryAttribute);
                    return;
                }

                Hashtable bestSplit = new Hashtable();
                bestSplit.Add("gain", 0);

                List <string> alreadyChecked = new List <string>();

                for (int i = 0; i < trainingSet.Length; i++)
                {
                    Hashtable   instance = trainingSet[i];
                    ICollection keys     = instance.Keys;

                    // Iterating over all attributes of instance
                    foreach (object attribute in keys)
                    {
                        if (Convert.ToString(attribute) == categoryAttribute)
                        {
                            continue;
                        }

                        // Let the value of current attribute be the pivot
                        double pivot = (Convert.ToDouble(instance[attribute]));

                        string attributePivot = attribute + Convert.ToString(pivot);

                        if (alreadyChecked.Contains(attributePivot))
                        {
                            continue;
                        }

                        alreadyChecked.Add(attributePivot);

                        // Splitting training set
                        Hashtable currentSplit = Split(trainingSet, attribute, pivot);

                        // Calculating entropy of subsets
                        double matchEntropy    = Entropy((Hashtable[])currentSplit["match"], categoryAttribute);
                        double notMatchEntropy = Entropy((Hashtable[])currentSplit["notMatch"], categoryAttribute);

                        // Calculating informational gain
                        double newEntropy = 0;
                        newEntropy += matchEntropy * ((Hashtable[])currentSplit["match"]).Length;
                        newEntropy += notMatchEntropy * ((Hashtable[])currentSplit["notMatch"]).Length;
                        newEntropy /= trainingSet.Length;

                        double currentGain = initialEntropy - newEntropy;

                        if (currentGain > Convert.ToDouble(bestSplit["gain"]))
                        {
                            bestSplit = currentSplit;
                            bestSplit.Add("splitAttribute", attribute);
                            bestSplit.Add("splitPivot", pivot);
                            bestSplit.Add("gain", currentGain);
                        }
                    }
                }

                if (Convert.ToDouble(bestSplit["gain"]) == 0)
                {
                    category = MostFrequentValue(trainingSet, categoryAttribute);
                }

                // Building subtrees
                maxTreeDepth--;
                matchSubTree    = new BuildDecisionTree((Hashtable[])bestSplit["match"], maxTreeDepth);
                notMatchSubTree = new BuildDecisionTree((Hashtable[])bestSplit["notMatch"], maxTreeDepth);

                splitAttribute   = Convert.ToString(bestSplit["splitAttribute"]);
                splitPivot       = Convert.ToDouble(bestSplit["splitPivot"]);
                matchedLength    = ((Hashtable[])bestSplit["match"]).Length;
                notMatchedLength = ((Hashtable[])bestSplit["notMatch"]).Length;
            }
        }