private static void AssignNewTreeNode(ITreeNode root, int bestAttributeId, int id) { root.Children[id] = TreeNode.CreateIt(); root.Children[id].Parent = root; var symbolicDomainDataParams = SymbolicDomainDataParams.CreateIt(root.Data, bestAttributeId); root.Children[id].Data = GetSubsetOf(symbolicDomainDataParams, id); root.Children[id].TestAttributeValue = id; }
private int CalculateBestAttributeId(ITreeNode root, int categoriesCount, int categoryId) { var bestAttributeId = -1; var bestGainFactor = 0.0; for (var attributeId = 0; attributeId < categoryId; attributeId++) { if (IsAttributeAlreadyUsed(root, attributeId)) { continue; } var entropies = new List <double>(); var nodeSubsetsSizes = new List <int>(); var attributeSymbolicValuesCount = DomainTree.Domain[attributeId].Count; for (var attributeSymbolicValue = 0; attributeSymbolicValue < attributeSymbolicValuesCount; attributeSymbolicValue++) { var symbolicDomainDataParams = SymbolicDomainDataParams.CreateIt(root.Data, attributeId); var nodeSubset = GetSubsetOf(symbolicDomainDataParams, attributeSymbolicValue); nodeSubsetsSizes.Add(nodeSubset.Count); if (nodeSubset.Count == 0) { continue; } var symbolicDomainDataParamsForNodeSubset = SymbolicDomainDataParams.CreateIt(nodeSubset, categoryId); var entropy = Entropy.CalculateEntropy(symbolicDomainDataParamsForNodeSubset, categoriesCount); entropies.Add(entropy); } var gainFactor = CalculateGainFactor(root, nodeSubsetsSizes, entropies); if (gainFactor <= bestGainFactor) { continue; } bestGainFactor = gainFactor; bestAttributeId = attributeId; } return(bestAttributeId); }
private string ClassifyInternal(IList <string> data, ITreeNode evaluatedNode) { var outputAttributeId = DomainTree.Attributes.Count - 1; if (evaluatedNode.IsLeaf()) { var values = DomainTree.GetAllSymbolicValuesOfAttribute(SymbolicDomainDataParams.CreateIt(evaluatedNode.Data, outputAttributeId)); var choicedValue = _choiceStrategy.MakeChoice(values.Select(symbolicValue => DomainTree.Domain[outputAttributeId][symbolicValue])); return(choicedValue); } var categories = evaluatedNode.Children.Where((treeNode, index) => AreAttributeSymbolicValuesEqual(evaluatedNode, index, data)) .Select(treeNode => ClassifyInternal(data, treeNode)); return(_choiceStrategy.MakeChoice(categories)); }
private static void PrintTree(IDomainTree domainTree, ITreeNode node, String tab = "") { var outputAttributeId = domainTree.Attributes.Count - 1; if (node.IsLeaf()) { var values = domainTree.GetAllSymbolicValuesOfAttribute(SymbolicDomainDataParams.CreateIt(node.Data, outputAttributeId)); if (values.Length == 0) { Console.WriteLine("{0}\t{1} = \"null\";", tab, domainTree.Attributes[outputAttributeId]); } else { Console.WriteLine("{0}\t{1} = \"{2}\";", tab, domainTree.Attributes[outputAttributeId], domainTree.Domain[outputAttributeId][values[0]]); } return; } var numvalues = node.Children.Length; for (var i = 0; i < numvalues; i++) { Console.WriteLine(tab + "if( " + domainTree.Attributes[node.TestAttribute] + " == \"" + domainTree.Domain[node.TestAttribute][i] + "\") {"); PrintTree(domainTree, node.Children[i], tab + "\t"); if (i != numvalues - 1) { Console.Write(tab + "} else "); } else { Console.WriteLine(tab + "}"); } } }
public void BuildDecisionTreeFrom(ITreeNode root) { var categoryId = DomainTree.Attributes.Count - 1; var categoriesCount = DomainTree.Domain[categoryId].Count; var symbolicDomainDataParams = SymbolicDomainDataParams.CreateIt(root.Data, categoryId); root.Entropy = Entropy.CalculateEntropy(symbolicDomainDataParams, categoriesCount); if (Math.Abs(root.Entropy - 0.0) < 0.000001) { return; } var bestAttributeId = CalculateBestAttributeId(root, categoriesCount, categoryId); if (bestAttributeId == -1) { return; } var bestAttributeValuesCount = DomainTree.Domain[bestAttributeId].Count; root.TestAttribute = bestAttributeId; root.Children = new TreeNode[bestAttributeValuesCount]; for (var id = 0; id < bestAttributeValuesCount; id++) { AssignNewTreeNode(root, bestAttributeId, id); } foreach (var treeNode in root.Children) { BuildDecisionTreeFrom(treeNode); } }