public Classifier(BuildDecisionTree tree, Hashtable item) { while (true) { if (tree.Category != null) { result = tree.Category; return; } double itemValue = (double)item[tree.SplitAttribute]; if (itemValue >= tree.SplitPivot) { tree = tree.MatchSubTree; } else { tree = tree.NotMatchSubTree; } } }
public BuildDecisionTree(Hashtable[] trainingSet, int maxTreeDepth) { { // Checking if maxTreeDepth/minItemsCount is reached if (maxTreeDepth == 0 || trainingSet.Length <= minItemsCount) { category = MostFrequentValue(trainingSet, categoryAttribute); return; } double initialEntropy = Entropy(trainingSet, categoryAttribute); // Checking if entropyThreshold is reached if (initialEntropy <= entropyThreshold) { category = MostFrequentValue(trainingSet, categoryAttribute); return; } Hashtable bestSplit = new Hashtable(); bestSplit.Add("gain", 0); List <string> alreadyChecked = new List <string>(); for (int i = 0; i < trainingSet.Length; i++) { Hashtable instance = trainingSet[i]; ICollection keys = instance.Keys; // Iterating over all attributes of instance foreach (object attribute in keys) { if (Convert.ToString(attribute) == categoryAttribute) { continue; } // Let the value of current attribute be the pivot double pivot = (Convert.ToDouble(instance[attribute])); string attributePivot = attribute + Convert.ToString(pivot); if (alreadyChecked.Contains(attributePivot)) { continue; } alreadyChecked.Add(attributePivot); // Splitting training set Hashtable currentSplit = Split(trainingSet, attribute, pivot); // Calculating entropy of subsets double matchEntropy = Entropy((Hashtable[])currentSplit["match"], categoryAttribute); double notMatchEntropy = Entropy((Hashtable[])currentSplit["notMatch"], categoryAttribute); // Calculating informational gain double newEntropy = 0; newEntropy += matchEntropy * ((Hashtable[])currentSplit["match"]).Length; newEntropy += notMatchEntropy * ((Hashtable[])currentSplit["notMatch"]).Length; newEntropy /= trainingSet.Length; double currentGain = initialEntropy - newEntropy; if (currentGain > Convert.ToDouble(bestSplit["gain"])) { bestSplit = currentSplit; bestSplit.Add("splitAttribute", attribute); bestSplit.Add("splitPivot", pivot); bestSplit.Add("gain", currentGain); } } } if (Convert.ToDouble(bestSplit["gain"]) == 0) { category = MostFrequentValue(trainingSet, categoryAttribute); } // Building subtrees maxTreeDepth--; matchSubTree = new BuildDecisionTree((Hashtable[])bestSplit["match"], maxTreeDepth); notMatchSubTree = new BuildDecisionTree((Hashtable[])bestSplit["notMatch"], maxTreeDepth); splitAttribute = Convert.ToString(bestSplit["splitAttribute"]); splitPivot = Convert.ToDouble(bestSplit["splitPivot"]); matchedLength = ((Hashtable[])bestSplit["match"]).Length; notMatchedLength = ((Hashtable[])bestSplit["notMatch"]).Length; } }