/// <summary>
        ///
        /// </summary>
        /// <param name="testAttr"></param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        protected TestScore BestSplitTest(Attribute testAttr, SymbolicAttribute goalAttr)
        {
            ItemSet knownItems = new ItemSet(_attributeSet);
            int     nbKnown    = 0;

            foreach (Item it in _items)
            {
                if (!it.ValueOf(_attributeSet, testAttr).IsUnknown())
                {
                    knownItems.Add(it);
                    nbKnown++;
                }
            }

            if (nbKnown == 0)
            { //No Information can be gained from this test
                Test test;

                if (testAttr is SymbolicAttribute)
                { //Symblic test
                    test = new SymbolicTest((SymbolicAttribute)testAttr,
                                            new KnownSymbolicValue[] { new KnownSymbolicValue(0) });
                }
                else
                { //Numerical test
                    test = new NumericalTest((NumericalAttribute)testAttr, 0.0d);
                }

                return(new TestScore(test, 0.0d));
            }
            else
            {
                TestScore knownTestScore = knownItems.BestSplitTest(testAttr, goalAttr);

                return(new TestScore(knownTestScore.Test, knownTestScore.Score * (double)nbKnown / Items.Count));
            }
        }
예제 #2
0
        /// <summary>
        /// Finds the best splitting test involving a numerical attribute
        /// </summary>
        /// <param name="testAttr"></param>
        /// <param name="goalAttr"></param>
        /// <returns></returns>
        private TestScore BestSplitTest(NumericalAttribute testAttr, SymbolicAttribute goalAttr)
        {
            int testIndex = _attributeSet.IndexOf(testAttr);
            int goalNbVal = goalAttr.NumOfValues;
            int goalIndex = _attributeSet.IndexOf(goalAttr);

            //frequencyLower (frequencyHigher) counts the number of items lower
            //(higher) than the threshold for each goal value.  In the beginning,
            //frequencyLower is zeroed because the threshold is chosen small.
            double[] freqLower  = new double[goalNbVal];
            double[] freqHigher = new double[goalNbVal];

            for (int gvi = 0; gvi < goalNbVal; gvi++)
            {
                SymbolicTest valTest = new SymbolicTest(goalAttr,
                                                        new KnownSymbolicValue[] { new KnownSymbolicValue(gvi) });

                freqHigher[gvi] = Split(valTest).ElementAt(1).Size();
            }

            //Those two variables hold sum of the elements of the corresponding array.
            double freqLowerSum  = 0.0d;
            double freqHigherSum = (double)_items.Count;

            List <TestGoalValue> tgv = new List <TestGoalValue>();

            for (int i = 0; i < _items.Count; i++)
            {
                double testVal = ((KnownNumericalValue)(this._items[i].ValueOf(testIndex))).Value;
                int    goalVal = ((KnownSymbolicValue)(this._items[i].ValueOf(goalIndex))).IntValue;
                tgv.Add(new TestGoalValue(testVal, goalVal));
            }

            tgv.Sort();

            int    goalValue, goalValueNew = tgv[0].GoalValue;
            double testValue, testValueNew = tgv[0].TestValue;

            double bestScore     = 0.0d;
            double bestThreshold = testValueNew;

            for (int i = 1; i < _items.Count; i++)
            {
                testValue    = testValueNew;
                goalValue    = goalValueNew;
                testValueNew = tgv[i].TestValue;
                goalValueNew = tgv[i].GoalValue;

                freqLower[goalValue]++;
                freqLowerSum++;
                freqHigher[goalValue]--;
                freqHigherSum--;

                if (testValue != testValueNew)
                {
                    double score = CalEntropy(goalAttr)
                                   - (freqLowerSum / _items.Count) * Entropy.CalEntropy(freqLower)
                                   - (freqHigherSum / _items.Count) * Entropy.CalEntropy(freqHigher);

                    if (score > bestScore)
                    {
                        bestScore     = score;
                        bestThreshold = (testValue + testValueNew) / 2.0d;
                    }
                }
            }

            return(new TestScore(new NumericalTest(testAttr, bestThreshold), bestScore));
        }