Ejemplo n.º 1
0
        getMostFrequentValueForIndex_Generic(
            double[][] filteredData,
            int index,
            Boolean checkMissingValue)
        {
            Dictionary <double, long> freqs;

            freqs = InformationGain.Frequency(
                filteredData[index]);

            //Do not count missing values
            if (checkMissingValue)
            {
                if (freqs.ContainsKey(_missingValue))
                {
                    freqs.Remove(_missingValue);
                }
            }

            //Aggregate is the LINQ name for the commonly known functional concept Fold
            var max = freqs.Aggregate((l, r) => l.Value >
                                      r.Value ? l : r).Key;

            return(max);
        }
Ejemplo n.º 2
0
        public void Statistics_Frequency()
        {
            Dictionary <double, long> freqs =
                InformationGain.Frequency(ta);

            Assert.AreEqual(freqs[2], 5);
            Assert.AreEqual(freqs[3], 4);
            Assert.AreEqual(freqs[4], 4);
        }
Ejemplo n.º 3
0
        public void Statistics_EntropyShannon_ProvideFreq()
        {
            Dictionary <double, long> freqs =
                InformationGain.Frequency(ta);

            double value =
                InformationGain.EntropyShannon(freqs);

            Assert.IsTrue(2.17 <= value && value <= 2.19);
        }
Ejemplo n.º 4
0
        splitDataOnUnivariateCriterion(
            double[][] data)
        {
            double entropyS;
            double entropySv;
            double entropySum;
            double infoGain = 0;

            double[] filteredTargetData;
            Dictionary <double, long> freqs;
            SplittedAttributeData     ed = new SplittedAttributeData();

            ed.SplittingCriteriaValue = double.NegativeInfinity;
            entropyS = getEntropyOfTargetAttribute(data);

            for (int idxCol = 0; idxCol < data.Count();
                 idxCol++)
            {
                entropySum = 0;
                if (data[idxCol] != null &&
                    idxCol != _indexTargetAttribute)         //Do not compute when data not present
                {
                    freqs = InformationGain.Frequency(
                        data[idxCol]);
                    //key has value
                    foreach (double key in freqs.Keys)
                    {
                        if (key != _missingValue)
                        {
                            filteredTargetData =
                                getFilteredTargetValues(data, idxCol, key);

                            entropySv   = InformationGain.EntropyShannon(filteredTargetData);
                            entropySum += ((double)filteredTargetData.Length /
                                           (double)data[_indexTargetAttribute].Length)
                                          * entropySv;
                        }
                    }

                    infoGain = entropyS - entropySum;
                    //Compute InfoGain
                    if (infoGain > ed.SplittingCriteriaValue)
                    {
                        ed.Freqs                  = freqs;
                        ed.AttributeIndex         = idxCol;
                        ed.SplittingCriteriaValue = infoGain;
                    }
                } //if condition
            }     //Main loop

            return(ed);
        }