/// <summary> /// Applies the test. The test checks if an attribute value belongs to a given set of values. /// </summary> /// <param name="val">The value to test. </param> /// <returns>1 - the value belongs to the set of admitted values, 0 - otherwise.</returns> public int Perform(KnownSymbolicValue val) { foreach (KnownSymbolicValue kv in _values) { if (kv.Equals(val)) { return(1); } } return(0); }
/// <summary> /// Computes the entropy of the set regarding a given symbolic attribute. /// </summary> /// <param name="attr">the attribute against which to compute the entropy.</param> /// <returns></returns> public virtual double CalEntropy(SymbolicAttribute attr) { if (!_attributeSet.Contains(attr)) { throw new ArgumentException("Unknown attribute"); } if (this._entropy < 0.0d || !_entropyAttribute.Equals(attr)) { double[] frequencies = new double[attr.NumOfValues]; for (int i = 0; i < _items.Count; i++) { KnownSymbolicValue sv = (KnownSymbolicValue)(_items[i].ValueOf(_attributeSet.IndexOf(attr))); frequencies[sv.IntValue]++; } this._entropy = Entropy.CalEntropy(frequencies); _entropyAttribute = attr; } return(_entropy); }
/// <summary> /// This method computes the entropy of the set regarding a given symbolic attribute. /// The frequency of each value of this attribute is counted according to the weights. /// The value of this attribute must be known for allt e itmes of this set. /// </summary> /// <param name="attr">the attribute against which to compute the entropy.</param> /// <returns>entropy</returns> public override double CalEntropy(SymbolicAttribute attr) { if (!_attributeSet.Contains(attr)) { throw new ArgumentException("Unknown attribute"); } if (_entropy < 0.0d || !_entropyAttribute.Equals(attr)) { double[] freqs = new double[attr.NumOfValues]; for (int i = 0; i < Items.Count; i++) { KnownSymbolicValue sv = Items[i].ValueOf(_attributeSet, attr) as KnownSymbolicValue; freqs[sv.IntValue] += _weights[i]; } _entropy = Biotracker.Signature.DT.Entropy.CalEntropy(freqs); _entropyAttribute = attr; } return(this._entropy); }
/// <summary> /// Finds the best splitting test involving a Symbolic attribute. /// </summary> /// <param name="testAttr">Symbolic attribute for test</param> /// <param name="goalAttr"></param> /// <returns></returns> protected TestScore BestSplitTest(SymbolicAttribute testAttr, SymbolicAttribute goalAttr) { int testNbVal = testAttr.NumOfValues; int testIndex = _attributeSet.IndexOf(testAttr); int goalNbVal = goalAttr.NumOfValues; int goalIndex = _attributeSet.IndexOf(goalAttr); //freqMatch[tvi][gvi] is the number of items that has a value equal to tvi for their 'test' //attribute and value equal to 'gvi' for their 'goal' attribute. //freqMatchSum[tvi] is the sum of the frequencyMatch[tvi][gvi] elements (for all gvi). double[][] freqMatch = new double[testNbVal][]; for (int i = 0; i < testNbVal; i++) { freqMatch[i] = new double[goalNbVal]; } double[] freqMatchSum = new double[testNbVal]; //Identically for the items that do not have tvi as a test attribute value. double[][] freqNoMatch = new double[testNbVal][]; for (int i = 0; i < testNbVal; i++) { freqNoMatch[i] = new double[goalNbVal]; } double[] freqNoMatchSum = new double[testNbVal]; for (int i = 0; i < _items.Count; i++) { int testVal = ((KnownSymbolicValue)(_items[i].ValueOf(testIndex))).IntValue; int goalVal = ((KnownSymbolicValue)(_items[i].ValueOf(goalIndex))).IntValue; for (int tvi = 0; tvi < testNbVal; tvi++) { if (testVal == tvi) { freqMatch[tvi][goalVal]++; freqMatchSum[tvi]++; } else { freqNoMatch[tvi][goalVal]++; freqNoMatchSum[tvi]++; } } } double bestScore = -1.0d; int bestValue = -1; for (int tvi = 0; tvi < testNbVal; tvi++) { double score = CalEntropy(goalAttr) - ((freqMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqMatch[tvi])) - ((freqNoMatchSum[tvi] / _items.Count) * Entropy.CalEntropy(freqNoMatch[tvi])); if (score > bestScore) { bestScore = score; bestValue = tvi; } } //Group the attribute values one by one List <int> remainTestValueIndexes = new List <int>(); for (int i = 0; i < testNbVal; i++) { remainTestValueIndexes.Add(i); } double[] remainingFreqMatch = new double[goalNbVal]; double[] remainingFreqNoMatch = new double[goalNbVal]; for (int gvi = 0; gvi < goalNbVal; gvi++) { remainingFreqNoMatch[gvi] = freqMatch[0][gvi] + freqNoMatch[0][gvi]; } double remainingFreqMatchSum = 0.0d; double remainingFreqNoMatchSum = (double)(_items.Count); List <int> orderedValueIndex = new List <int>(); List <double> orderedScores = new List <double>(); orderedValueIndex.Add(bestValue); orderedScores.Add(bestScore); //Remove values until only one is left while (remainTestValueIndexes.Count >= 2) { //Update remaining Frequency.. arrays according to the last test attribute value removed. remainTestValueIndexes.Remove(bestValue); for (int gvi = 0; gvi < goalNbVal; gvi++) { remainingFreqMatch[gvi] += freqMatch[bestValue][gvi]; remainingFreqNoMatch[gvi] -= freqMatch[bestValue][gvi]; } remainingFreqMatchSum += freqMatchSum[bestValue]; remainingFreqNoMatchSum -= freqMatchSum[bestValue]; bestScore = -1.0d; //Find the next best test attribute value for (int i = 0; i < remainTestValueIndexes.Count; i++) { int tvi = remainTestValueIndexes[i]; double[] thisFreqMatch = new double[goalNbVal]; double[] thisFreqNoMatch = new double[goalNbVal]; double thisFreqMatchSum = 0.0d; double thisFreqNoMatchSum = 0.0d; for (int gvi = 0; gvi < goalNbVal; gvi++) { thisFreqMatch[gvi] = freqMatch[tvi][gvi] + remainingFreqMatch[gvi]; thisFreqNoMatch[gvi] = remainingFreqNoMatch[gvi] - freqMatch[tvi][gvi]; } thisFreqMatchSum = freqMatchSum[tvi] + remainingFreqMatchSum; thisFreqNoMatchSum = remainingFreqNoMatchSum - freqMatchSum[tvi]; double score = CalEntropy(goalAttr) - ((thisFreqMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqMatch)) - ((thisFreqNoMatchSum / _items.Count) * Entropy.CalEntropy(thisFreqNoMatch)); if (score > bestScore) { bestScore = score; bestValue = tvi; } } } orderedScores.Add(bestScore); orderedValueIndex.Add(bestValue); bestScore = -1.0d; int bestIndex = 0; for (int i = 0; i < orderedScores.Count; i++) { double score = orderedScores[i]; if (score > bestScore) { bestScore = score; bestIndex = i; } } KnownSymbolicValue[] testValueIndexes = new KnownSymbolicValue[bestIndex + 1]; for (int i = 0; i <= bestIndex; i++) { int val = orderedValueIndex[i]; testValueIndexes[i] = new KnownSymbolicValue(val); } return(new TestScore(new SymbolicTest(testAttr, testValueIndexes), bestScore)); }