コード例 #1
        /// <summary>
        /// Reconstruct a decision tree from
        /// </summary>
        /// <param name="attrSet"></param>
        /// <param name="goalAttr"></param>
        /// <param name="nodes"></param>
        public TestDecisionTree(AttributeSet attrSet, SymbolicAttribute goalAttr, Node[] nodes)
            : base(attrSet, goalAttr)
            int  cur = 0;
            Node curNode;

            Node[] nodeArray = nodes.ToArray();

            if (!(nodeArray[0] is AnchorNode))
                throw new ArgumentException("");

            this._anchor = new AnchorNode(this);

            curNode = nodeArray[++cur];

            curNode.Father = this._anchor;

            Queue <Node> queue = new Queue <Node>();


                curNode = queue.Dequeue();

                int nbSons = curNode.NumOfSons();

                List <Node> sons = new List <Node>();

                for (int i = 0; i < nbSons; i++)
                    Node son = nodeArray[++cur];


                    //update the son

                if (curNode is TestNode)
                    TestNode tn = new TestNode(curNode.Weight, (curNode as TestNode).Test, sons);

                    curNode = tn;
                else if (curNode is LeafNode)

                foreach (Node n in sons)
                    n.Father = curNode;
            }while (queue.Count > 0);
コード例 #2
        public SymbolicTest(SymbolicAttribute attr, IEnumerable <KnownSymbolicValue> values)
            : base(attr)
            if (values == null)
                throw new ArgumentNullException();

            _values = values.ToArray();
コード例 #3
        /// <summary>
        /// Create an empty decision tree object.
        /// </summary>
        /// <param name="attrSet"></param>
        /// <param name="goalAttr"></param>
        public DecisionTree(AttributeSet attrSet, SymbolicAttribute goalAttr)
            if (attrSet == null || goalAttr == null)
                throw new ArgumentNullException();

            _anchor        = new AnchorNode(this);
            _attributeSet  = attrSet;
            _goalAttribute = goalAttr;
コード例 #4
 public TestScore BestSplitTest(Attribute testAttribute, SymbolicAttribute goalAttribute)
     if (testAttribute is SymbolicAttribute)
         return(BestSplitTest((SymbolicAttribute)testAttribute, goalAttribute));
     else if (testAttribute is NumericalAttribute)
         return(BestSplitTest((NumericalAttribute)testAttribute, goalAttribute));
         throw new ArgumentException("Unknow attribute type.");
コード例 #5
        public SimpleDecisionTreeBuilder(ItemSet learningItemSet, AttributeSet testAttributeSet, SymbolicAttribute goalAttribute)
            System.Console.WriteLine("Inside the tree builder!!!!!!!!!!");
            if (learningItemSet == null || learningItemSet.NumOfItems() == 0)
                throw new ArgumentNullException();

            this._learningSet      = learningItemSet;
            this._testAttributeSet = testAttributeSet;
            this._goalAttribute    = goalAttribute;

            LearningDecisionTree tree =
                new LearningDecisionTree(learningItemSet.AttrSet, goalAttribute, learningItemSet);

            this._tree = tree;
コード例 #6
        /// <summary>
        /// Returns the distribution of goal values. This distributionis represented by an array,
        /// its i-th element is proportional to the weight of the i-th goal value.
        /// The Sum of the elements of this array is equal to 1.
        /// </summary>
        /// <returns>An array describing the goal value distribution associated to this node.</returns>
        public override double[] GetGoalValueDistribution()
            WeightedItemSet itemSet;
            DecisionTree    dt = base.Tree();

            if (dt == null || _learningSet == null)

            if (!(_learningSet is WeightedItemSet))
                itemSet = new WeightedItemSet(_learningSet);
                itemSet = (WeightedItemSet)_learningSet;

            SymbolicAttribute goalAttr = dt.GoalAttribute;

            if (goalAttr == null)

            //Find the most frequent goal value in the items of the learning set
            double[] frequencies = new double[goalAttr.NumOfValues];

            for (int i = 0; i < itemSet.NumOfItems(); i++)
                int id = ((KnownSymbolicValue)(itemSet.Items[i].ValueOf(itemSet.AttrSet.IndexOf(goalAttr)))).IntValue;
                frequencies[id] += itemSet.GetWeight(i);

            for (int i = 0; i < frequencies.Length; i++)
                frequencies[i] /= itemSet.Size();

コード例 #7
        /// <summary>
        /// Computes the entropy of the set regarding a given symbolic attribute.
        /// </summary>
        /// <param name="attr">the attribute against which to compute the entropy.</param>
        /// <returns></returns>
        public virtual double CalEntropy(SymbolicAttribute attr)
            if (!_attributeSet.Contains(attr))
                throw new ArgumentException("Unknown attribute");

            if (this._entropy < 0.0d || !_entropyAttribute.Equals(attr))
                double[] frequencies = new double[attr.NumOfValues];

                for (int i = 0; i < _items.Count; i++)
                    KnownSymbolicValue sv = (KnownSymbolicValue)(_items[i].ValueOf(_attributeSet.IndexOf(attr)));

                this._entropy     = Entropy.CalEntropy(frequencies);
                _entropyAttribute = attr;

コード例 #8
        /// <summary>
        /// </summary>
        /// <param name="testAttr"></param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        protected TestScore BestSplitTest(Attribute testAttr, SymbolicAttribute goalAttr)
            ItemSet knownItems = new ItemSet(_attributeSet);
            int     nbKnown    = 0;

            foreach (Item it in _items)
                if (!it.ValueOf(_attributeSet, testAttr).IsUnknown())

            if (nbKnown == 0)
            { //No Information can be gained from this test
                Test test;

                if (testAttr is SymbolicAttribute)
                { //Symblic test
                    test = new SymbolicTest((SymbolicAttribute)testAttr,
                                            new KnownSymbolicValue[] { new KnownSymbolicValue(0) });
                { //Numerical test
                    test = new NumericalTest((NumericalAttribute)testAttr, 0.0d);

                return(new TestScore(test, 0.0d));
                TestScore knownTestScore = knownItems.BestSplitTest(testAttr, goalAttr);

                return(new TestScore(knownTestScore.Test, knownTestScore.Score * (double)nbKnown / Items.Count));
コード例 #9
        /// <summary>
        /// This method computes the entropy of the set regarding a given symbolic attribute.
        /// The frequency of each value of this attribute is counted according to the weights.
        /// The value of this attribute must be known for allt e itmes of this set.
        /// </summary>
        /// <param name="attr">the attribute against which to compute the entropy.</param>
        /// <returns>entropy</returns>
        public override double CalEntropy(SymbolicAttribute attr)
            if (!_attributeSet.Contains(attr))
                throw new ArgumentException("Unknown attribute");

            if (_entropy < 0.0d || !_entropyAttribute.Equals(attr))
                double[] freqs = new double[attr.NumOfValues];

                for (int i = 0; i < Items.Count; i++)
                    KnownSymbolicValue sv = Items[i].ValueOf(_attributeSet, attr) as KnownSymbolicValue;
                    freqs[sv.IntValue] += _weights[i];

                _entropy          = Biotracker.Signature.DT.Entropy.CalEntropy(freqs);
                _entropyAttribute = attr;

コード例 #10
        /// <summary>
        /// Finds the best splitting test involving a numerical attribute
        /// </summary>
        /// <param name="testAttr"></param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        private TestScore BestSplitTest(NumericalAttribute testAttr, SymbolicAttribute goalAttr)
            int testIndex = _attributeSet.IndexOf(testAttr);
            int goalNbVal = goalAttr.NumOfValues;
            int goalIndex = _attributeSet.IndexOf(goalAttr);

            //frequencyLower (frequencyHigher) counts the number of items lower
            //(higher) than the threshold for each goal value.  In the beginning,
            //frequencyLower is zeroed because the threshold is chosen small.
            double[] freqLower  = new double[goalNbVal];
            double[] freqHigher = new double[goalNbVal];

            for (int gvi = 0; gvi < goalNbVal; gvi++)
                SymbolicTest valTest = new SymbolicTest(goalAttr,
                                                        new KnownSymbolicValue[] { new KnownSymbolicValue(gvi) });

                freqHigher[gvi] = Split(valTest).ElementAt(1).Size();

            //Those two variables hold sum of the elements of the corresponding array.
            double freqLowerSum  = 0.0d;
            double freqHigherSum = (double)_items.Count;

            List <TestGoalValue> tgv = new List <TestGoalValue>();

            for (int i = 0; i < _items.Count; i++)
                double testVal = ((KnownNumericalValue)(this._items[i].ValueOf(testIndex))).Value;
                int    goalVal = ((KnownSymbolicValue)(this._items[i].ValueOf(goalIndex))).IntValue;
                tgv.Add(new TestGoalValue(testVal, goalVal));


            int    goalValue, goalValueNew = tgv[0].GoalValue;
            double testValue, testValueNew = tgv[0].TestValue;

            double bestScore     = 0.0d;
            double bestThreshold = testValueNew;

            for (int i = 1; i < _items.Count; i++)
                testValue    = testValueNew;
                goalValue    = goalValueNew;
                testValueNew = tgv[i].TestValue;
                goalValueNew = tgv[i].GoalValue;


                if (testValue != testValueNew)
                    double score = CalEntropy(goalAttr)
                                   - (freqLowerSum / _items.Count) * Entropy.CalEntropy(freqLower)
                                   - (freqHigherSum / _items.Count) * Entropy.CalEntropy(freqHigher);

                    if (score > bestScore)
                        bestScore     = score;
                        bestThreshold = (testValue + testValueNew) / 2.0d;

            return(new TestScore(new NumericalTest(testAttr, bestThreshold), bestScore));
コード例 #11
        /// <summary>
        /// Finds the best splitting test involving a Symbolic attribute.
        /// </summary>
        /// <param name="testAttr">Symbolic attribute for test</param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        protected TestScore BestSplitTest(SymbolicAttribute testAttr, SymbolicAttribute goalAttr)
            int testNbVal = testAttr.NumOfValues;
            int testIndex = _attributeSet.IndexOf(testAttr);
            int goalNbVal = goalAttr.NumOfValues;
            int goalIndex = _attributeSet.IndexOf(goalAttr);

            //freqMatch[tvi][gvi] is the number of items that has a value equal to tvi for their 'test'
            //attribute and value equal to 'gvi' for their 'goal' attribute.
            //freqMatchSum[tvi] is the sum of the frequencyMatch[tvi][gvi] elements (for all gvi).
            double[][] freqMatch = new double[testNbVal][];
            for (int i = 0; i < testNbVal; i++)
                freqMatch[i] = new double[goalNbVal];
            double[] freqMatchSum = new double[testNbVal];

            //Identically for the items that do not have tvi as a test attribute value.
            double[][] freqNoMatch = new double[testNbVal][];
            for (int i = 0; i < testNbVal; i++)
                freqNoMatch[i] = new double[goalNbVal];

            double[] freqNoMatchSum = new double[testNbVal];

            for (int i = 0; i < _items.Count; i++)
                int testVal = ((KnownSymbolicValue)(_items[i].ValueOf(testIndex))).IntValue;
                int goalVal = ((KnownSymbolicValue)(_items[i].ValueOf(goalIndex))).IntValue;

                for (int tvi = 0; tvi < testNbVal; tvi++)
                    if (testVal == tvi)

            double bestScore = -1.0d;
            int    bestValue = -1;

            for (int tvi = 0; tvi < testNbVal; tvi++)
                double score = CalEntropy(goalAttr)
                               - ((freqMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqMatch[tvi]))
                               - ((freqNoMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqNoMatch[tvi]));

                if (score > bestScore)
                    bestScore = score;
                    bestValue = tvi;

            //Group the attribute values one by one
            List <int> remainTestValueIndexes = new List <int>();

            for (int i = 0; i < testNbVal; i++)

            double[] remainingFreqMatch   = new double[goalNbVal];
            double[] remainingFreqNoMatch = new double[goalNbVal];

            for (int gvi = 0; gvi < goalNbVal; gvi++)
                remainingFreqNoMatch[gvi] = freqMatch[0][gvi] + freqNoMatch[0][gvi];

            double remainingFreqMatchSum   = 0.0d;
            double remainingFreqNoMatchSum = (double)(_items.Count);

            List <int>    orderedValueIndex = new List <int>();
            List <double> orderedScores     = new List <double>();


            //Remove values until only one is left
            while (remainTestValueIndexes.Count >= 2)
                //Update remaining Frequency.. arrays according to the last test attribute value removed.

                for (int gvi = 0; gvi < goalNbVal; gvi++)
                    remainingFreqMatch[gvi]   += freqMatch[bestValue][gvi];
                    remainingFreqNoMatch[gvi] -= freqMatch[bestValue][gvi];

                remainingFreqMatchSum   += freqMatchSum[bestValue];
                remainingFreqNoMatchSum -= freqMatchSum[bestValue];

                bestScore = -1.0d;

                //Find the next best test attribute value
                for (int i = 0; i < remainTestValueIndexes.Count; i++)
                    int tvi = remainTestValueIndexes[i];

                    double[] thisFreqMatch      = new double[goalNbVal];
                    double[] thisFreqNoMatch    = new double[goalNbVal];
                    double   thisFreqMatchSum   = 0.0d;
                    double   thisFreqNoMatchSum = 0.0d;

                    for (int gvi = 0; gvi < goalNbVal; gvi++)
                        thisFreqMatch[gvi]   = freqMatch[tvi][gvi] + remainingFreqMatch[gvi];
                        thisFreqNoMatch[gvi] = remainingFreqNoMatch[gvi] - freqMatch[tvi][gvi];
                    thisFreqMatchSum   = freqMatchSum[tvi] + remainingFreqMatchSum;
                    thisFreqNoMatchSum = remainingFreqNoMatchSum - freqMatchSum[tvi];

                    double score = CalEntropy(goalAttr)
                                   - ((thisFreqMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqMatch))
                                   - ((thisFreqNoMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqNoMatch));

                    if (score > bestScore)
                        bestScore = score;
                        bestValue = tvi;


            bestScore = -1.0d;
            int bestIndex = 0;

            for (int i = 0; i < orderedScores.Count; i++)
                double score = orderedScores[i];

                if (score > bestScore)
                    bestScore = score;
                    bestIndex = i;

            KnownSymbolicValue[] testValueIndexes = new KnownSymbolicValue[bestIndex + 1];
            for (int i = 0; i <= bestIndex; i++)
                int val = orderedValueIndex[i];
                testValueIndexes[i] = new KnownSymbolicValue(val);

            return(new TestScore(new SymbolicTest(testAttr, testValueIndexes), bestScore));
コード例 #12
        /// <summary>
        /// Finds the test on each attribute performing the best split for finding the value of a 'goal'
        /// attribute.
        /// </summary>
        /// <param name="candidateAttributes"></param>
        /// <param name="goalAttribute"></param>
        /// <returns></returns>
        public IEnumerable <TestScore> BestSplitTests(AttributeSet candidateAttributes, SymbolicAttribute goalAttribute)
            if (candidateAttributes == null || goalAttribute == null || candidateAttributes.Size() == 0)
                throw new ArgumentNullException();

            List <TestScore> bestScores = new List <TestScore>();

            List <Attribute> attributes = candidateAttributes.GetAttributes().ToList();

            foreach (Attribute attr in attributes)
                bestScores.Add(BestSplitTest(attr, goalAttribute));

コード例 #13
 /// <summary>
 /// Finds the test on one attribute performing the best split (bringing the most information)
 /// for finding the value of a 'goal' attribute
 /// </summary>
 /// <param name="candidateAttributes">The set of attributes defining which attributes can be tested</param>
 /// <param name="goalAttribute">the attribute guess using the test</param>
 /// <returns></returns>
 public TestScore BestSplitTest(AttributeSet candidateAttributes, SymbolicAttribute goalAttribute)
     return(BestSplitTests(candidateAttributes, goalAttribute).Max <TestScore>());
コード例 #14
 /// <summary>
 /// Create an empty learning decision tree.
 /// </summary>
 /// <param name="attrSet"></param>
 /// <param name="goalAttr"></param>
 /// <param name="learnignSet"></param>
 public LearningDecisionTree(AttributeSet attrSet, SymbolicAttribute goalAttr, ItemSet learnignSet)
     : base(attrSet, goalAttr)
     Root().Replace(new LearningOpenNode(0, learnignSet));