예제 #1
0
        public void Statistics_EntropyShannon()
        {
            double value =
                InformationGain.EntropyShannon(ta);

            Assert.IsTrue(2.17 <= value && value <= 2.19);
        }
예제 #2
0
파일: BuildBase.cs 프로젝트: Dasmic/MLLib
        protected bool isTargetDataSame(double[][] filteredData)
        {
            double entropy = InformationGain.EntropyShannon(filteredData[_indexTargetAttribute]);

            if (entropy == 0)
            {
                return(true);
            }
            return(false);
        }
예제 #3
0
        public void Statistics_EntropyShannon_ProvideFreq()
        {
            Dictionary <double, long> freqs =
                InformationGain.Frequency(ta);

            double value =
                InformationGain.EntropyShannon(freqs);

            Assert.IsTrue(2.17 <= value && value <= 2.19);
        }
예제 #4
0
        splitDataOnUnivariateCriterion(
            double[][] data)
        {
            double entropyS;
            double entropySv;
            double entropySum;
            double infoGain = 0;

            double[] filteredTargetData;
            Dictionary <double, long> freqs;
            SplittedAttributeData     ed = new SplittedAttributeData();

            ed.SplittingCriteriaValue = double.NegativeInfinity;
            entropyS = getEntropyOfTargetAttribute(data);

            for (int idxCol = 0; idxCol < data.Count();
                 idxCol++)
            {
                entropySum = 0;
                if (data[idxCol] != null &&
                    idxCol != _indexTargetAttribute)         //Do not compute when data not present
                {
                    freqs = InformationGain.Frequency(
                        data[idxCol]);
                    //key has value
                    foreach (double key in freqs.Keys)
                    {
                        if (key != _missingValue)
                        {
                            filteredTargetData =
                                getFilteredTargetValues(data, idxCol, key);

                            entropySv   = InformationGain.EntropyShannon(filteredTargetData);
                            entropySum += ((double)filteredTargetData.Length /
                                           (double)data[_indexTargetAttribute].Length)
                                          * entropySv;
                        }
                    }

                    infoGain = entropyS - entropySum;
                    //Compute InfoGain
                    if (infoGain > ed.SplittingCriteriaValue)
                    {
                        ed.Freqs                  = freqs;
                        ed.AttributeIndex         = idxCol;
                        ed.SplittingCriteriaValue = infoGain;
                    }
                } //if condition
            }     //Main loop

            return(ed);
        }
예제 #5
0
파일: BuildBase.cs 프로젝트: Dasmic/MLLib
 /// <summary>
 ///
 /// </summary>
 /// <param name="data"></param>
 /// <param name="attributeIdx"></param>
 /// <param name="attributeValue"></param>
 /// <returns></returns>
 protected double getEntropyOfTargetAttribute(double[][] data)
 {
     return(InformationGain.EntropyShannon(
                data[_indexTargetAttribute]));
 }