getMostFrequentValueForIndex_Generic( double[][] filteredData, int index, Boolean checkMissingValue) { Dictionary <double, long> freqs; freqs = InformationGain.Frequency( filteredData[index]); //Do not count missing values if (checkMissingValue) { if (freqs.ContainsKey(_missingValue)) { freqs.Remove(_missingValue); } } //Aggregate is the LINQ name for the commonly known functional concept Fold var max = freqs.Aggregate((l, r) => l.Value > r.Value ? l : r).Key; return(max); }
public void Statistics_EntropyShannon() { double value = InformationGain.EntropyShannon(ta); Assert.IsTrue(2.17 <= value && value <= 2.19); }
public void Statistics_Frequency() { Dictionary <double, long> freqs = InformationGain.Frequency(ta); Assert.AreEqual(freqs[2], 5); Assert.AreEqual(freqs[3], 4); Assert.AreEqual(freqs[4], 4); }
protected bool isTargetDataSame(double[][] filteredData) { double entropy = InformationGain.EntropyShannon(filteredData[_indexTargetAttribute]); if (entropy == 0) { return(true); } return(false); }
public void Statistics_EntropyShannon_ProvideFreq() { Dictionary <double, long> freqs = InformationGain.Frequency(ta); double value = InformationGain.EntropyShannon(freqs); Assert.IsTrue(2.17 <= value && value <= 2.19); }
splitDataOnUnivariateCriterion( double[][] data) { double entropyS; double entropySv; double entropySum; double infoGain = 0; double[] filteredTargetData; Dictionary <double, long> freqs; SplittedAttributeData ed = new SplittedAttributeData(); ed.SplittingCriteriaValue = double.NegativeInfinity; entropyS = getEntropyOfTargetAttribute(data); for (int idxCol = 0; idxCol < data.Count(); idxCol++) { entropySum = 0; if (data[idxCol] != null && idxCol != _indexTargetAttribute) //Do not compute when data not present { freqs = InformationGain.Frequency( data[idxCol]); //key has value foreach (double key in freqs.Keys) { if (key != _missingValue) { filteredTargetData = getFilteredTargetValues(data, idxCol, key); entropySv = InformationGain.EntropyShannon(filteredTargetData); entropySum += ((double)filteredTargetData.Length / (double)data[_indexTargetAttribute].Length) * entropySv; } } infoGain = entropyS - entropySum; //Compute InfoGain if (infoGain > ed.SplittingCriteriaValue) { ed.Freqs = freqs; ed.AttributeIndex = idxCol; ed.SplittingCriteriaValue = infoGain; } } //if condition } //Main loop return(ed); }
public void InformationGainReturnsCorrectForHumidityFactor() { var e = InformationGain.Calculate(data, "Weather"); Assert.That(e, Is.EqualTo(0.151).Within(0.001)); }
public void InformationGainReturnsCorrectForOutlookFactor() { var e = InformationGain.Calculate(data, "Timing"); Assert.That(e, Is.EqualTo(0.246).Within(0.001)); }
public void InformationGainReturnsCorrectForWindFactor() { var e = InformationGain.Calculate(data, "Wind"); Assert.That(e, Is.EqualTo(0.048).Within(0.001)); }
/// <summary> /// /// </summary> /// <param name="data"></param> /// <param name="attributeIdx"></param> /// <param name="attributeValue"></param> /// <returns></returns> protected double getEntropyOfTargetAttribute(double[][] data) { return(InformationGain.EntropyShannon( data[_indexTargetAttribute])); }