private static void AssignNewTreeNode(ITreeNode root, int bestAttributeId, int id)
        {
            root.Children[id]        = TreeNode.CreateIt();
            root.Children[id].Parent = root;

            var symbolicDomainDataParams = SymbolicDomainDataParams.CreateIt(root.Data, bestAttributeId);

            root.Children[id].Data = GetSubsetOf(symbolicDomainDataParams, id);

            root.Children[id].TestAttributeValue = id;
        }
        private int CalculateBestAttributeId(ITreeNode root, int categoriesCount, int categoryId)
        {
            var bestAttributeId = -1;
            var bestGainFactor  = 0.0;

            for (var attributeId = 0; attributeId < categoryId; attributeId++)
            {
                if (IsAttributeAlreadyUsed(root, attributeId))
                {
                    continue;
                }

                var entropies        = new List <double>();
                var nodeSubsetsSizes = new List <int>();

                var attributeSymbolicValuesCount = DomainTree.Domain[attributeId].Count;

                for (var attributeSymbolicValue = 0; attributeSymbolicValue < attributeSymbolicValuesCount; attributeSymbolicValue++)
                {
                    var symbolicDomainDataParams = SymbolicDomainDataParams.CreateIt(root.Data, attributeId);

                    var nodeSubset = GetSubsetOf(symbolicDomainDataParams, attributeSymbolicValue);
                    nodeSubsetsSizes.Add(nodeSubset.Count);

                    if (nodeSubset.Count == 0)
                    {
                        continue;
                    }

                    var symbolicDomainDataParamsForNodeSubset = SymbolicDomainDataParams.CreateIt(nodeSubset, categoryId);
                    var entropy = Entropy.CalculateEntropy(symbolicDomainDataParamsForNodeSubset, categoriesCount);
                    entropies.Add(entropy);
                }

                var gainFactor = CalculateGainFactor(root, nodeSubsetsSizes, entropies);

                if (gainFactor <= bestGainFactor)
                {
                    continue;
                }

                bestGainFactor  = gainFactor;
                bestAttributeId = attributeId;
            }

            return(bestAttributeId);
        }
        private string ClassifyInternal(IList <string> data, ITreeNode evaluatedNode)
        {
            var outputAttributeId = DomainTree.Attributes.Count - 1;

            if (evaluatedNode.IsLeaf())
            {
                var values =
                    DomainTree.GetAllSymbolicValuesOfAttribute(SymbolicDomainDataParams.CreateIt(evaluatedNode.Data, outputAttributeId));

                var choicedValue =
                    _choiceStrategy.MakeChoice(values.Select(symbolicValue => DomainTree.Domain[outputAttributeId][symbolicValue]));

                return(choicedValue);
            }

            var categories = evaluatedNode.Children.Where((treeNode, index) => AreAttributeSymbolicValuesEqual(evaluatedNode, index, data))
                             .Select(treeNode => ClassifyInternal(data, treeNode));

            return(_choiceStrategy.MakeChoice(categories));
        }
        private static void PrintTree(IDomainTree domainTree, ITreeNode node, String tab = "")
        {
            var outputAttributeId = domainTree.Attributes.Count - 1;

            if (node.IsLeaf())
            {
                var values = domainTree.GetAllSymbolicValuesOfAttribute(SymbolicDomainDataParams.CreateIt(node.Data, outputAttributeId));
                if (values.Length == 0)
                {
                    Console.WriteLine("{0}\t{1} = \"null\";", tab, domainTree.Attributes[outputAttributeId]);
                }
                else
                {
                    Console.WriteLine("{0}\t{1} = \"{2}\";", tab, domainTree.Attributes[outputAttributeId],
                                      domainTree.Domain[outputAttributeId][values[0]]);
                }

                return;
            }

            var numvalues = node.Children.Length;

            for (var i = 0; i < numvalues; i++)
            {
                Console.WriteLine(tab + "if( " + domainTree.Attributes[node.TestAttribute] + " == \"" +
                                  domainTree.Domain[node.TestAttribute][i] + "\") {");
                PrintTree(domainTree, node.Children[i], tab + "\t");

                if (i != numvalues - 1)
                {
                    Console.Write(tab + "} else ");
                }
                else
                {
                    Console.WriteLine(tab + "}");
                }
            }
        }
        public void BuildDecisionTreeFrom(ITreeNode root)
        {
            var categoryId      = DomainTree.Attributes.Count - 1;
            var categoriesCount = DomainTree.Domain[categoryId].Count;

            var symbolicDomainDataParams = SymbolicDomainDataParams.CreateIt(root.Data, categoryId);

            root.Entropy = Entropy.CalculateEntropy(symbolicDomainDataParams, categoriesCount);

            if (Math.Abs(root.Entropy - 0.0) < 0.000001)
            {
                return;
            }

            var bestAttributeId = CalculateBestAttributeId(root, categoriesCount, categoryId);

            if (bestAttributeId == -1)
            {
                return;
            }

            var bestAttributeValuesCount = DomainTree.Domain[bestAttributeId].Count;

            root.TestAttribute = bestAttributeId;
            root.Children      = new TreeNode[bestAttributeValuesCount];

            for (var id = 0; id < bestAttributeValuesCount; id++)
            {
                AssignNewTreeNode(root, bestAttributeId, id);
            }

            foreach (var treeNode in root.Children)
            {
                BuildDecisionTreeFrom(treeNode);
            }
        }