Пример #1
0
        /// <summary>
        /// Applies the test. The test checks if an attribute value belongs to a given set of values.
        /// </summary>
        /// <param name="val">The value to test. </param>
        /// <returns>1 - the value belongs to the set of admitted values, 0 - otherwise.</returns>
        public int Perform(KnownSymbolicValue val)
        {
            foreach (KnownSymbolicValue kv in _values)
            {
                if (kv.Equals(val))
                {
                    return(1);
                }
            }

            return(0);
        }
Пример #2
0
        /// <summary>
        /// Computes the entropy of the set regarding a given symbolic attribute.
        /// </summary>
        /// <param name="attr">the attribute against which to compute the entropy.</param>
        /// <returns></returns>
        public virtual double CalEntropy(SymbolicAttribute attr)
        {
            if (!_attributeSet.Contains(attr))
            {
                throw new ArgumentException("Unknown attribute");
            }

            if (this._entropy < 0.0d || !_entropyAttribute.Equals(attr))
            {
                double[] frequencies = new double[attr.NumOfValues];

                for (int i = 0; i < _items.Count; i++)
                {
                    KnownSymbolicValue sv = (KnownSymbolicValue)(_items[i].ValueOf(_attributeSet.IndexOf(attr)));
                    frequencies[sv.IntValue]++;
                }

                this._entropy     = Entropy.CalEntropy(frequencies);
                _entropyAttribute = attr;
            }

            return(_entropy);
        }
        /// <summary>
        /// This method computes the entropy of the set regarding a given symbolic attribute.
        /// The frequency of each value of this attribute is counted according to the weights.
        /// The value of this attribute must be known for allt e itmes of this set.
        /// </summary>
        /// <param name="attr">the attribute against which to compute the entropy.</param>
        /// <returns>entropy</returns>
        public override double CalEntropy(SymbolicAttribute attr)
        {
            if (!_attributeSet.Contains(attr))
            {
                throw new ArgumentException("Unknown attribute");
            }

            if (_entropy < 0.0d || !_entropyAttribute.Equals(attr))
            {
                double[] freqs = new double[attr.NumOfValues];


                for (int i = 0; i < Items.Count; i++)
                {
                    KnownSymbolicValue sv = Items[i].ValueOf(_attributeSet, attr) as KnownSymbolicValue;
                    freqs[sv.IntValue] += _weights[i];
                }

                _entropy          = Biotracker.Signature.DT.Entropy.CalEntropy(freqs);
                _entropyAttribute = attr;
            }

            return(this._entropy);
        }
Пример #4
0
        /// <summary>
        /// Finds the best splitting test involving a Symbolic attribute.
        /// </summary>
        /// <param name="testAttr">Symbolic attribute for test</param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        protected TestScore BestSplitTest(SymbolicAttribute testAttr, SymbolicAttribute goalAttr)
        {
            int testNbVal = testAttr.NumOfValues;
            int testIndex = _attributeSet.IndexOf(testAttr);
            int goalNbVal = goalAttr.NumOfValues;
            int goalIndex = _attributeSet.IndexOf(goalAttr);

            //freqMatch[tvi][gvi] is the number of items that has a value equal to tvi for their 'test'
            //attribute and value equal to 'gvi' for their 'goal' attribute.
            //freqMatchSum[tvi] is the sum of the frequencyMatch[tvi][gvi] elements (for all gvi).
            double[][] freqMatch = new double[testNbVal][];
            for (int i = 0; i < testNbVal; i++)
            {
                freqMatch[i] = new double[goalNbVal];
            }
            double[] freqMatchSum = new double[testNbVal];

            //Identically for the items that do not have tvi as a test attribute value.
            double[][] freqNoMatch = new double[testNbVal][];
            for (int i = 0; i < testNbVal; i++)
            {
                freqNoMatch[i] = new double[goalNbVal];
            }

            double[] freqNoMatchSum = new double[testNbVal];

            for (int i = 0; i < _items.Count; i++)
            {
                int testVal = ((KnownSymbolicValue)(_items[i].ValueOf(testIndex))).IntValue;
                int goalVal = ((KnownSymbolicValue)(_items[i].ValueOf(goalIndex))).IntValue;

                for (int tvi = 0; tvi < testNbVal; tvi++)
                {
                    if (testVal == tvi)
                    {
                        freqMatch[tvi][goalVal]++;
                        freqMatchSum[tvi]++;
                    }
                    else
                    {
                        freqNoMatch[tvi][goalVal]++;
                        freqNoMatchSum[tvi]++;
                    }
                }
            }

            double bestScore = -1.0d;
            int    bestValue = -1;

            for (int tvi = 0; tvi < testNbVal; tvi++)
            {
                double score = CalEntropy(goalAttr)
                               - ((freqMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqMatch[tvi]))
                               - ((freqNoMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqNoMatch[tvi]));

                if (score > bestScore)
                {
                    bestScore = score;
                    bestValue = tvi;
                }
            }

            //Group the attribute values one by one
            List <int> remainTestValueIndexes = new List <int>();

            for (int i = 0; i < testNbVal; i++)
            {
                remainTestValueIndexes.Add(i);
            }

            double[] remainingFreqMatch   = new double[goalNbVal];
            double[] remainingFreqNoMatch = new double[goalNbVal];

            for (int gvi = 0; gvi < goalNbVal; gvi++)
            {
                remainingFreqNoMatch[gvi] = freqMatch[0][gvi] + freqNoMatch[0][gvi];
            }

            double remainingFreqMatchSum   = 0.0d;
            double remainingFreqNoMatchSum = (double)(_items.Count);

            List <int>    orderedValueIndex = new List <int>();
            List <double> orderedScores     = new List <double>();

            orderedValueIndex.Add(bestValue);
            orderedScores.Add(bestScore);

            //Remove values until only one is left
            while (remainTestValueIndexes.Count >= 2)
            {
                //Update remaining Frequency.. arrays according to the last test attribute value removed.
                remainTestValueIndexes.Remove(bestValue);

                for (int gvi = 0; gvi < goalNbVal; gvi++)
                {
                    remainingFreqMatch[gvi]   += freqMatch[bestValue][gvi];
                    remainingFreqNoMatch[gvi] -= freqMatch[bestValue][gvi];
                }

                remainingFreqMatchSum   += freqMatchSum[bestValue];
                remainingFreqNoMatchSum -= freqMatchSum[bestValue];

                bestScore = -1.0d;

                //Find the next best test attribute value
                for (int i = 0; i < remainTestValueIndexes.Count; i++)
                {
                    int tvi = remainTestValueIndexes[i];

                    double[] thisFreqMatch      = new double[goalNbVal];
                    double[] thisFreqNoMatch    = new double[goalNbVal];
                    double   thisFreqMatchSum   = 0.0d;
                    double   thisFreqNoMatchSum = 0.0d;

                    for (int gvi = 0; gvi < goalNbVal; gvi++)
                    {
                        thisFreqMatch[gvi]   = freqMatch[tvi][gvi] + remainingFreqMatch[gvi];
                        thisFreqNoMatch[gvi] = remainingFreqNoMatch[gvi] - freqMatch[tvi][gvi];
                    }
                    thisFreqMatchSum   = freqMatchSum[tvi] + remainingFreqMatchSum;
                    thisFreqNoMatchSum = remainingFreqNoMatchSum - freqMatchSum[tvi];

                    double score = CalEntropy(goalAttr)
                                   - ((thisFreqMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqMatch))
                                   - ((thisFreqNoMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqNoMatch));

                    if (score > bestScore)
                    {
                        bestScore = score;
                        bestValue = tvi;
                    }
                }
            }

            orderedScores.Add(bestScore);
            orderedValueIndex.Add(bestValue);

            bestScore = -1.0d;
            int bestIndex = 0;

            for (int i = 0; i < orderedScores.Count; i++)
            {
                double score = orderedScores[i];

                if (score > bestScore)
                {
                    bestScore = score;
                    bestIndex = i;
                }
            }

            KnownSymbolicValue[] testValueIndexes = new KnownSymbolicValue[bestIndex + 1];
            for (int i = 0; i <= bestIndex; i++)
            {
                int val = orderedValueIndex[i];
                testValueIndexes[i] = new KnownSymbolicValue(val);
            }

            return(new TestScore(new SymbolicTest(testAttr, testValueIndexes), bestScore));
        }