Esempio n. 1
0
        getMostFrequentValueForIndex_Generic(
            double[][] filteredData,
            int index,
            Boolean checkMissingValue)
        {
            Dictionary <double, long> freqs;

            freqs = InformationGain.Frequency(
                filteredData[index]);

            //Do not count missing values
            if (checkMissingValue)
            {
                if (freqs.ContainsKey(_missingValue))
                {
                    freqs.Remove(_missingValue);
                }
            }

            //Aggregate is the LINQ name for the commonly known functional concept Fold
            var max = freqs.Aggregate((l, r) => l.Value >
                                      r.Value ? l : r).Key;

            return(max);
        }
Esempio n. 2
0
        public void Statistics_EntropyShannon()
        {
            double value =
                InformationGain.EntropyShannon(ta);

            Assert.IsTrue(2.17 <= value && value <= 2.19);
        }
Esempio n. 3
0
        public void Statistics_Frequency()
        {
            Dictionary <double, long> freqs =
                InformationGain.Frequency(ta);

            Assert.AreEqual(freqs[2], 5);
            Assert.AreEqual(freqs[3], 4);
            Assert.AreEqual(freqs[4], 4);
        }
Esempio n. 4
0
        protected bool isTargetDataSame(double[][] filteredData)
        {
            double entropy = InformationGain.EntropyShannon(filteredData[_indexTargetAttribute]);

            if (entropy == 0)
            {
                return(true);
            }
            return(false);
        }
Esempio n. 5
0
        public void Statistics_EntropyShannon_ProvideFreq()
        {
            Dictionary <double, long> freqs =
                InformationGain.Frequency(ta);

            double value =
                InformationGain.EntropyShannon(freqs);

            Assert.IsTrue(2.17 <= value && value <= 2.19);
        }
Esempio n. 6
0
        splitDataOnUnivariateCriterion(
            double[][] data)
        {
            double entropyS;
            double entropySv;
            double entropySum;
            double infoGain = 0;

            double[] filteredTargetData;
            Dictionary <double, long> freqs;
            SplittedAttributeData     ed = new SplittedAttributeData();

            ed.SplittingCriteriaValue = double.NegativeInfinity;
            entropyS = getEntropyOfTargetAttribute(data);

            for (int idxCol = 0; idxCol < data.Count();
                 idxCol++)
            {
                entropySum = 0;
                if (data[idxCol] != null &&
                    idxCol != _indexTargetAttribute)         //Do not compute when data not present
                {
                    freqs = InformationGain.Frequency(
                        data[idxCol]);
                    //key has value
                    foreach (double key in freqs.Keys)
                    {
                        if (key != _missingValue)
                        {
                            filteredTargetData =
                                getFilteredTargetValues(data, idxCol, key);

                            entropySv   = InformationGain.EntropyShannon(filteredTargetData);
                            entropySum += ((double)filteredTargetData.Length /
                                           (double)data[_indexTargetAttribute].Length)
                                          * entropySv;
                        }
                    }

                    infoGain = entropyS - entropySum;
                    //Compute InfoGain
                    if (infoGain > ed.SplittingCriteriaValue)
                    {
                        ed.Freqs                  = freqs;
                        ed.AttributeIndex         = idxCol;
                        ed.SplittingCriteriaValue = infoGain;
                    }
                } //if condition
            }     //Main loop

            return(ed);
        }
Esempio n. 7
0
        public void InformationGainReturnsCorrectForHumidityFactor()
        {
            var e = InformationGain.Calculate(data, "Weather");

            Assert.That(e, Is.EqualTo(0.151).Within(0.001));
        }
Esempio n. 8
0
        public void InformationGainReturnsCorrectForOutlookFactor()
        {
            var e = InformationGain.Calculate(data, "Timing");

            Assert.That(e, Is.EqualTo(0.246).Within(0.001));
        }
Esempio n. 9
0
        public void InformationGainReturnsCorrectForWindFactor()
        {
            var e = InformationGain.Calculate(data, "Wind");

            Assert.That(e, Is.EqualTo(0.048).Within(0.001));
        }
Esempio n. 10
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="data"></param>
 /// <param name="attributeIdx"></param>
 /// <param name="attributeValue"></param>
 /// <returns></returns>
 protected double getEntropyOfTargetAttribute(double[][] data)
 {
     return(InformationGain.EntropyShannon(
                data[_indexTargetAttribute]));
 }