public NumericalTest(NumericalAttribute attr, double thresh) : base(attr) { if (attr == null) { throw new ArgumentNullException(); } this._threshold = thresh; }
/// <summary> /// Finds the best splitting test involving a numerical attribute /// </summary> /// <param name="testAttr"></param> /// <param name="goalAttr"></param> /// <returns></returns> private TestScore BestSplitTest(NumericalAttribute testAttr, SymbolicAttribute goalAttr) { int testIndex = _attributeSet.IndexOf(testAttr); int goalNbVal = goalAttr.NumOfValues; int goalIndex = _attributeSet.IndexOf(goalAttr); //frequencyLower (frequencyHigher) counts the number of items lower //(higher) than the threshold for each goal value. In the beginning, //frequencyLower is zeroed because the threshold is chosen small. double[] freqLower = new double[goalNbVal]; double[] freqHigher = new double[goalNbVal]; for (int gvi = 0; gvi < goalNbVal; gvi++) { SymbolicTest valTest = new SymbolicTest(goalAttr, new KnownSymbolicValue[] { new KnownSymbolicValue(gvi) }); freqHigher[gvi] = Split(valTest).ElementAt(1).Size(); } //Those two variables hold sum of the elements of the corresponding array. double freqLowerSum = 0.0d; double freqHigherSum = (double)_items.Count; List <TestGoalValue> tgv = new List <TestGoalValue>(); for (int i = 0; i < _items.Count; i++) { double testVal = ((KnownNumericalValue)(this._items[i].ValueOf(testIndex))).Value; int goalVal = ((KnownSymbolicValue)(this._items[i].ValueOf(goalIndex))).IntValue; tgv.Add(new TestGoalValue(testVal, goalVal)); } tgv.Sort(); int goalValue, goalValueNew = tgv[0].GoalValue; double testValue, testValueNew = tgv[0].TestValue; double bestScore = 0.0d; double bestThreshold = testValueNew; for (int i = 1; i < _items.Count; i++) { testValue = testValueNew; goalValue = goalValueNew; testValueNew = tgv[i].TestValue; goalValueNew = tgv[i].GoalValue; freqLower[goalValue]++; freqLowerSum++; freqHigher[goalValue]--; freqHigherSum--; if (testValue != testValueNew) { double score = CalEntropy(goalAttr) - (freqLowerSum / _items.Count) * Entropy.CalEntropy(freqLower) - (freqHigherSum / _items.Count) * Entropy.CalEntropy(freqHigher); if (score > bestScore) { bestScore = score; bestThreshold = (testValue + testValueNew) / 2.0d; } } } return(new TestScore(new NumericalTest(testAttr, bestThreshold), bestScore)); }