/// <summary> /// /// </summary> /// <param name="testAttr"></param> /// <param name="goalAttr"></param> /// <returns></returns> protected TestScore BestSplitTest(Attribute testAttr, SymbolicAttribute goalAttr) { ItemSet knownItems = new ItemSet(_attributeSet); int nbKnown = 0; foreach (Item it in _items) { if (!it.ValueOf(_attributeSet, testAttr).IsUnknown()) { knownItems.Add(it); nbKnown++; } } if (nbKnown == 0) { //No Information can be gained from this test Test test; if (testAttr is SymbolicAttribute) { //Symblic test test = new SymbolicTest((SymbolicAttribute)testAttr, new KnownSymbolicValue[] { new KnownSymbolicValue(0) }); } else { //Numerical test test = new NumericalTest((NumericalAttribute)testAttr, 0.0d); } return(new TestScore(test, 0.0d)); } else { TestScore knownTestScore = knownItems.BestSplitTest(testAttr, goalAttr); return(new TestScore(knownTestScore.Test, knownTestScore.Score * (double)nbKnown / Items.Count)); } }
/// <summary> /// Finds the best splitting test involving a numerical attribute /// </summary> /// <param name="testAttr"></param> /// <param name="goalAttr"></param> /// <returns></returns> private TestScore BestSplitTest(NumericalAttribute testAttr, SymbolicAttribute goalAttr) { int testIndex = _attributeSet.IndexOf(testAttr); int goalNbVal = goalAttr.NumOfValues; int goalIndex = _attributeSet.IndexOf(goalAttr); //frequencyLower (frequencyHigher) counts the number of items lower //(higher) than the threshold for each goal value. In the beginning, //frequencyLower is zeroed because the threshold is chosen small. double[] freqLower = new double[goalNbVal]; double[] freqHigher = new double[goalNbVal]; for (int gvi = 0; gvi < goalNbVal; gvi++) { SymbolicTest valTest = new SymbolicTest(goalAttr, new KnownSymbolicValue[] { new KnownSymbolicValue(gvi) }); freqHigher[gvi] = Split(valTest).ElementAt(1).Size(); } //Those two variables hold sum of the elements of the corresponding array. double freqLowerSum = 0.0d; double freqHigherSum = (double)_items.Count; List <TestGoalValue> tgv = new List <TestGoalValue>(); for (int i = 0; i < _items.Count; i++) { double testVal = ((KnownNumericalValue)(this._items[i].ValueOf(testIndex))).Value; int goalVal = ((KnownSymbolicValue)(this._items[i].ValueOf(goalIndex))).IntValue; tgv.Add(new TestGoalValue(testVal, goalVal)); } tgv.Sort(); int goalValue, goalValueNew = tgv[0].GoalValue; double testValue, testValueNew = tgv[0].TestValue; double bestScore = 0.0d; double bestThreshold = testValueNew; for (int i = 1; i < _items.Count; i++) { testValue = testValueNew; goalValue = goalValueNew; testValueNew = tgv[i].TestValue; goalValueNew = tgv[i].GoalValue; freqLower[goalValue]++; freqLowerSum++; freqHigher[goalValue]--; freqHigherSum--; if (testValue != testValueNew) { double score = CalEntropy(goalAttr) - (freqLowerSum / _items.Count) * Entropy.CalEntropy(freqLower) - (freqHigherSum / _items.Count) * Entropy.CalEntropy(freqHigher); if (score > bestScore) { bestScore = score; bestThreshold = (testValue + testValueNew) / 2.0d; } } } return(new TestScore(new NumericalTest(testAttr, bestThreshold), bestScore)); }