/// <summary>
        /// Reconstruct a decision tree from
        /// </summary>
        /// <param name="attrSet"></param>
        /// <param name="goalAttr"></param>
        /// <param name="nodes"></param>
        public TestDecisionTree(AttributeSet attrSet, SymbolicAttribute goalAttr, Node[] nodes)
            : base(attrSet, goalAttr)
        {
            int  cur = 0;
            Node curNode;

            Node[] nodeArray = nodes.ToArray();

            if (!(nodeArray[0] is AnchorNode))
            {
                throw new ArgumentException("");
            }

            this._anchor = new AnchorNode(this);

            curNode = nodeArray[++cur];
            this._anchor.Replace(curNode);

            curNode.Father = this._anchor;

            Queue <Node> queue = new Queue <Node>();

            queue.Enqueue(curNode);

            do
            {
                curNode = queue.Dequeue();

                int nbSons = curNode.NumOfSons();

                List <Node> sons = new List <Node>();

                for (int i = 0; i < nbSons; i++)
                {
                    Node son = nodeArray[++cur];

                    sons.Add(son);

                    //update the son
                    queue.Enqueue(son);
                }

                if (curNode is TestNode)
                {
                    TestNode tn = new TestNode(curNode.Weight, (curNode as TestNode).Test, sons);
                    curNode.Replace(tn);

                    curNode = tn;
                }
                else if (curNode is LeafNode)
                {
                    curNode.Replace((LeafNode)curNode);
                }

                foreach (Node n in sons)
                {
                    n.Father = curNode;
                }
            }while (queue.Count > 0);
        }
Exemplo n.º 2
0
        public SymbolicTest(SymbolicAttribute attr, IEnumerable <KnownSymbolicValue> values)
            : base(attr)
        {
            if (values == null)
            {
                throw new ArgumentNullException();
            }

            _values = values.ToArray();
        }
        /// <summary>
        /// Create an empty decision tree object.
        /// </summary>
        /// <param name="attrSet"></param>
        /// <param name="goalAttr"></param>
        public DecisionTree(AttributeSet attrSet, SymbolicAttribute goalAttr)
        {
            if (attrSet == null || goalAttr == null)
            {
                throw new ArgumentNullException();
            }

            _anchor        = new AnchorNode(this);
            _attributeSet  = attrSet;
            _goalAttribute = goalAttr;
        }
Exemplo n.º 4
0
 public TestScore BestSplitTest(Attribute testAttribute, SymbolicAttribute goalAttribute)
 {
     if (testAttribute is SymbolicAttribute)
     {
         return(BestSplitTest((SymbolicAttribute)testAttribute, goalAttribute));
     }
     else if (testAttribute is NumericalAttribute)
     {
         return(BestSplitTest((NumericalAttribute)testAttribute, goalAttribute));
     }
     else
     {
         throw new ArgumentException("Unknow attribute type.");
     }
 }
Exemplo n.º 5
0
        public SimpleDecisionTreeBuilder(ItemSet learningItemSet, AttributeSet testAttributeSet, SymbolicAttribute goalAttribute)
        {
            System.Console.WriteLine("Inside the tree builder!!!!!!!!!!");
            if (learningItemSet == null || learningItemSet.NumOfItems() == 0)
            {
                throw new ArgumentNullException();
            }

            this._learningSet      = learningItemSet;
            this._testAttributeSet = testAttributeSet;
            this._goalAttribute    = goalAttribute;

            LearningDecisionTree tree =
                new LearningDecisionTree(learningItemSet.AttrSet, goalAttribute, learningItemSet);

            this._tree = tree;
        }
        /// <summary>
        /// Returns the distribution of goal values. This distributionis represented by an array,
        /// its i-th element is proportional to the weight of the i-th goal value.
        /// The Sum of the elements of this array is equal to 1.
        /// </summary>
        /// <returns>An array describing the goal value distribution associated to this node.</returns>
        public override double[] GetGoalValueDistribution()
        {
            WeightedItemSet itemSet;
            DecisionTree    dt = base.Tree();

            if (dt == null || _learningSet == null)
            {
                return(null);
            }

            if (!(_learningSet is WeightedItemSet))
            {
                itemSet = new WeightedItemSet(_learningSet);
            }
            else
            {
                itemSet = (WeightedItemSet)_learningSet;
            }

            SymbolicAttribute goalAttr = dt.GoalAttribute;

            if (goalAttr == null)
            {
                return(null);
            }

            //Find the most frequent goal value in the items of the learning set
            double[] frequencies = new double[goalAttr.NumOfValues];

            for (int i = 0; i < itemSet.NumOfItems(); i++)
            {
                int id = ((KnownSymbolicValue)(itemSet.Items[i].ValueOf(itemSet.AttrSet.IndexOf(goalAttr)))).IntValue;
                frequencies[id] += itemSet.GetWeight(i);
            }

            for (int i = 0; i < frequencies.Length; i++)
            {
                frequencies[i] /= itemSet.Size();
            }

            return(frequencies);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Computes the entropy of the set regarding a given symbolic attribute.
        /// </summary>
        /// <param name="attr">the attribute against which to compute the entropy.</param>
        /// <returns></returns>
        public virtual double CalEntropy(SymbolicAttribute attr)
        {
            if (!_attributeSet.Contains(attr))
            {
                throw new ArgumentException("Unknown attribute");
            }

            if (this._entropy < 0.0d || !_entropyAttribute.Equals(attr))
            {
                double[] frequencies = new double[attr.NumOfValues];

                for (int i = 0; i < _items.Count; i++)
                {
                    KnownSymbolicValue sv = (KnownSymbolicValue)(_items[i].ValueOf(_attributeSet.IndexOf(attr)));
                    frequencies[sv.IntValue]++;
                }

                this._entropy     = Entropy.CalEntropy(frequencies);
                _entropyAttribute = attr;
            }

            return(_entropy);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="testAttr"></param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        protected TestScore BestSplitTest(Attribute testAttr, SymbolicAttribute goalAttr)
        {
            ItemSet knownItems = new ItemSet(_attributeSet);
            int     nbKnown    = 0;

            foreach (Item it in _items)
            {
                if (!it.ValueOf(_attributeSet, testAttr).IsUnknown())
                {
                    knownItems.Add(it);
                    nbKnown++;
                }
            }

            if (nbKnown == 0)
            { //No Information can be gained from this test
                Test test;

                if (testAttr is SymbolicAttribute)
                { //Symblic test
                    test = new SymbolicTest((SymbolicAttribute)testAttr,
                                            new KnownSymbolicValue[] { new KnownSymbolicValue(0) });
                }
                else
                { //Numerical test
                    test = new NumericalTest((NumericalAttribute)testAttr, 0.0d);
                }

                return(new TestScore(test, 0.0d));
            }
            else
            {
                TestScore knownTestScore = knownItems.BestSplitTest(testAttr, goalAttr);

                return(new TestScore(knownTestScore.Test, knownTestScore.Score * (double)nbKnown / Items.Count));
            }
        }
        /// <summary>
        /// This method computes the entropy of the set regarding a given symbolic attribute.
        /// The frequency of each value of this attribute is counted according to the weights.
        /// The value of this attribute must be known for allt e itmes of this set.
        /// </summary>
        /// <param name="attr">the attribute against which to compute the entropy.</param>
        /// <returns>entropy</returns>
        public override double CalEntropy(SymbolicAttribute attr)
        {
            if (!_attributeSet.Contains(attr))
            {
                throw new ArgumentException("Unknown attribute");
            }

            if (_entropy < 0.0d || !_entropyAttribute.Equals(attr))
            {
                double[] freqs = new double[attr.NumOfValues];


                for (int i = 0; i < Items.Count; i++)
                {
                    KnownSymbolicValue sv = Items[i].ValueOf(_attributeSet, attr) as KnownSymbolicValue;
                    freqs[sv.IntValue] += _weights[i];
                }

                _entropy          = Biotracker.Signature.DT.Entropy.CalEntropy(freqs);
                _entropyAttribute = attr;
            }

            return(this._entropy);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Finds the best splitting test involving a numerical attribute
        /// </summary>
        /// <param name="testAttr"></param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        private TestScore BestSplitTest(NumericalAttribute testAttr, SymbolicAttribute goalAttr)
        {
            int testIndex = _attributeSet.IndexOf(testAttr);
            int goalNbVal = goalAttr.NumOfValues;
            int goalIndex = _attributeSet.IndexOf(goalAttr);

            //frequencyLower (frequencyHigher) counts the number of items lower
            //(higher) than the threshold for each goal value.  In the beginning,
            //frequencyLower is zeroed because the threshold is chosen small.
            double[] freqLower  = new double[goalNbVal];
            double[] freqHigher = new double[goalNbVal];

            for (int gvi = 0; gvi < goalNbVal; gvi++)
            {
                SymbolicTest valTest = new SymbolicTest(goalAttr,
                                                        new KnownSymbolicValue[] { new KnownSymbolicValue(gvi) });

                freqHigher[gvi] = Split(valTest).ElementAt(1).Size();
            }

            //Those two variables hold sum of the elements of the corresponding array.
            double freqLowerSum  = 0.0d;
            double freqHigherSum = (double)_items.Count;

            List <TestGoalValue> tgv = new List <TestGoalValue>();

            for (int i = 0; i < _items.Count; i++)
            {
                double testVal = ((KnownNumericalValue)(this._items[i].ValueOf(testIndex))).Value;
                int    goalVal = ((KnownSymbolicValue)(this._items[i].ValueOf(goalIndex))).IntValue;
                tgv.Add(new TestGoalValue(testVal, goalVal));
            }

            tgv.Sort();

            int    goalValue, goalValueNew = tgv[0].GoalValue;
            double testValue, testValueNew = tgv[0].TestValue;

            double bestScore     = 0.0d;
            double bestThreshold = testValueNew;

            for (int i = 1; i < _items.Count; i++)
            {
                testValue    = testValueNew;
                goalValue    = goalValueNew;
                testValueNew = tgv[i].TestValue;
                goalValueNew = tgv[i].GoalValue;

                freqLower[goalValue]++;
                freqLowerSum++;
                freqHigher[goalValue]--;
                freqHigherSum--;

                if (testValue != testValueNew)
                {
                    double score = CalEntropy(goalAttr)
                                   - (freqLowerSum / _items.Count) * Entropy.CalEntropy(freqLower)
                                   - (freqHigherSum / _items.Count) * Entropy.CalEntropy(freqHigher);

                    if (score > bestScore)
                    {
                        bestScore     = score;
                        bestThreshold = (testValue + testValueNew) / 2.0d;
                    }
                }
            }

            return(new TestScore(new NumericalTest(testAttr, bestThreshold), bestScore));
        }
Exemplo n.º 11
0
        /// <summary>
        /// Finds the best splitting test involving a Symbolic attribute.
        /// </summary>
        /// <param name="testAttr">Symbolic attribute for test</param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        protected TestScore BestSplitTest(SymbolicAttribute testAttr, SymbolicAttribute goalAttr)
        {
            int testNbVal = testAttr.NumOfValues;
            int testIndex = _attributeSet.IndexOf(testAttr);
            int goalNbVal = goalAttr.NumOfValues;
            int goalIndex = _attributeSet.IndexOf(goalAttr);

            //freqMatch[tvi][gvi] is the number of items that has a value equal to tvi for their 'test'
            //attribute and value equal to 'gvi' for their 'goal' attribute.
            //freqMatchSum[tvi] is the sum of the frequencyMatch[tvi][gvi] elements (for all gvi).
            double[][] freqMatch = new double[testNbVal][];
            for (int i = 0; i < testNbVal; i++)
            {
                freqMatch[i] = new double[goalNbVal];
            }
            double[] freqMatchSum = new double[testNbVal];

            //Identically for the items that do not have tvi as a test attribute value.
            double[][] freqNoMatch = new double[testNbVal][];
            for (int i = 0; i < testNbVal; i++)
            {
                freqNoMatch[i] = new double[goalNbVal];
            }

            double[] freqNoMatchSum = new double[testNbVal];

            for (int i = 0; i < _items.Count; i++)
            {
                int testVal = ((KnownSymbolicValue)(_items[i].ValueOf(testIndex))).IntValue;
                int goalVal = ((KnownSymbolicValue)(_items[i].ValueOf(goalIndex))).IntValue;

                for (int tvi = 0; tvi < testNbVal; tvi++)
                {
                    if (testVal == tvi)
                    {
                        freqMatch[tvi][goalVal]++;
                        freqMatchSum[tvi]++;
                    }
                    else
                    {
                        freqNoMatch[tvi][goalVal]++;
                        freqNoMatchSum[tvi]++;
                    }
                }
            }

            double bestScore = -1.0d;
            int    bestValue = -1;

            for (int tvi = 0; tvi < testNbVal; tvi++)
            {
                double score = CalEntropy(goalAttr)
                               - ((freqMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqMatch[tvi]))
                               - ((freqNoMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqNoMatch[tvi]));

                if (score > bestScore)
                {
                    bestScore = score;
                    bestValue = tvi;
                }
            }

            //Group the attribute values one by one
            List <int> remainTestValueIndexes = new List <int>();

            for (int i = 0; i < testNbVal; i++)
            {
                remainTestValueIndexes.Add(i);
            }

            double[] remainingFreqMatch   = new double[goalNbVal];
            double[] remainingFreqNoMatch = new double[goalNbVal];

            for (int gvi = 0; gvi < goalNbVal; gvi++)
            {
                remainingFreqNoMatch[gvi] = freqMatch[0][gvi] + freqNoMatch[0][gvi];
            }

            double remainingFreqMatchSum   = 0.0d;
            double remainingFreqNoMatchSum = (double)(_items.Count);

            List <int>    orderedValueIndex = new List <int>();
            List <double> orderedScores     = new List <double>();

            orderedValueIndex.Add(bestValue);
            orderedScores.Add(bestScore);

            //Remove values until only one is left
            while (remainTestValueIndexes.Count >= 2)
            {
                //Update remaining Frequency.. arrays according to the last test attribute value removed.
                remainTestValueIndexes.Remove(bestValue);

                for (int gvi = 0; gvi < goalNbVal; gvi++)
                {
                    remainingFreqMatch[gvi]   += freqMatch[bestValue][gvi];
                    remainingFreqNoMatch[gvi] -= freqMatch[bestValue][gvi];
                }

                remainingFreqMatchSum   += freqMatchSum[bestValue];
                remainingFreqNoMatchSum -= freqMatchSum[bestValue];

                bestScore = -1.0d;

                //Find the next best test attribute value
                for (int i = 0; i < remainTestValueIndexes.Count; i++)
                {
                    int tvi = remainTestValueIndexes[i];

                    double[] thisFreqMatch      = new double[goalNbVal];
                    double[] thisFreqNoMatch    = new double[goalNbVal];
                    double   thisFreqMatchSum   = 0.0d;
                    double   thisFreqNoMatchSum = 0.0d;

                    for (int gvi = 0; gvi < goalNbVal; gvi++)
                    {
                        thisFreqMatch[gvi]   = freqMatch[tvi][gvi] + remainingFreqMatch[gvi];
                        thisFreqNoMatch[gvi] = remainingFreqNoMatch[gvi] - freqMatch[tvi][gvi];
                    }
                    thisFreqMatchSum   = freqMatchSum[tvi] + remainingFreqMatchSum;
                    thisFreqNoMatchSum = remainingFreqNoMatchSum - freqMatchSum[tvi];

                    double score = CalEntropy(goalAttr)
                                   - ((thisFreqMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqMatch))
                                   - ((thisFreqNoMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqNoMatch));

                    if (score > bestScore)
                    {
                        bestScore = score;
                        bestValue = tvi;
                    }
                }
            }

            orderedScores.Add(bestScore);
            orderedValueIndex.Add(bestValue);

            bestScore = -1.0d;
            int bestIndex = 0;

            for (int i = 0; i < orderedScores.Count; i++)
            {
                double score = orderedScores[i];

                if (score > bestScore)
                {
                    bestScore = score;
                    bestIndex = i;
                }
            }

            KnownSymbolicValue[] testValueIndexes = new KnownSymbolicValue[bestIndex + 1];
            for (int i = 0; i <= bestIndex; i++)
            {
                int val = orderedValueIndex[i];
                testValueIndexes[i] = new KnownSymbolicValue(val);
            }

            return(new TestScore(new SymbolicTest(testAttr, testValueIndexes), bestScore));
        }
Exemplo n.º 12
0
        /// <summary>
        /// Finds the test on each attribute performing the best split for finding the value of a 'goal'
        /// attribute.
        /// </summary>
        /// <param name="candidateAttributes"></param>
        /// <param name="goalAttribute"></param>
        /// <returns></returns>
        public IEnumerable <TestScore> BestSplitTests(AttributeSet candidateAttributes, SymbolicAttribute goalAttribute)
        {
            if (candidateAttributes == null || goalAttribute == null || candidateAttributes.Size() == 0)
            {
                throw new ArgumentNullException();
            }

            List <TestScore> bestScores = new List <TestScore>();

            List <Attribute> attributes = candidateAttributes.GetAttributes().ToList();

            foreach (Attribute attr in attributes)
            {
                bestScores.Add(BestSplitTest(attr, goalAttribute));
            }

            return(bestScores);
        }
Exemplo n.º 13
0
 /// <summary>
 /// Finds the test on one attribute performing the best split (bringing the most information)
 /// for finding the value of a 'goal' attribute
 /// </summary>
 /// <param name="candidateAttributes">The set of attributes defining which attributes can be tested</param>
 /// <param name="goalAttribute">the attribute guess using the test</param>
 /// <returns></returns>
 public TestScore BestSplitTest(AttributeSet candidateAttributes, SymbolicAttribute goalAttribute)
 {
     return(BestSplitTests(candidateAttributes, goalAttribute).Max <TestScore>());
 }
Exemplo n.º 14
0
 /// <summary>
 /// Create an empty learning decision tree.
 /// </summary>
 /// <param name="attrSet"></param>
 /// <param name="goalAttr"></param>
 /// <param name="learnignSet"></param>
 public LearningDecisionTree(AttributeSet attrSet, SymbolicAttribute goalAttr, ItemSet learnignSet)
     : base(attrSet, goalAttr)
 {
     Root().Replace(new LearningOpenNode(0, learnignSet));
 }