Beispiel #1
0
        /// <summary>
        /// This function will calculate the discrete Attribute's Entropy value
        /// </summary>
        /// <param name="attributes">an array of discrete attribute value</param>
        /// <param name="classNumbers">a list of class numbers</param>
        /// <returns>the entropy value</returns>
        public double CalcAttributeGini(string[] attributes, byte[] classNumbers)
        {
            IList <string> uniqValues   = new List <string>();
            IList <int>    samplesCount = new List <int>();

            GetUniqValuesSamples <string>(attributes, out uniqValues, out samplesCount);
            double AttributeEntro = 1.0;
            long   total          = classNumbers.Length;

            CombinationGenerator <string> cg = new CombinationGenerator <string>();
            IEnumerable <List <string> >  potentialCombinations = cg.ProduceWithoutRecursion(uniqValues as List <string>);

            foreach (List <string> combination in potentialCombinations)
            {
                if (combination.Count != 0 && combination.Count == uniqValues.Count)
                {
                    IList <byte> subsetClass = new List <byte>();
                    //in
                    splitVectorByContainage(attributes, classNumbers, combination, true, out subsetClass);
                    byte[]       inClassArray     = convertList2Array(subsetClass);
                    IList <byte> uniqClass        = new List <byte>();
                    IList <int>  uniqClassSamples = new List <int>();
                    GetUniqValuesSamples(inClassArray, out uniqClass, out uniqClassSamples);
                    double inEntropy = getGiniValuefromProbability(uniqClassSamples);
                    long   subTotal  = 0;
                    foreach (int value in uniqClassSamples)
                    {
                        subTotal += value;
                    }
                    inEntropy = inEntropy * ((subTotal * 1.0) / total);
                    //Not in
                    splitVectorByContainage(attributes, classNumbers, combination, true, out subsetClass);
                    byte[] notInClassArray = convertList2Array(subsetClass);
                    uniqClass        = new List <byte>();
                    uniqClassSamples = new List <int>();
                    GetUniqValuesSamples(inClassArray, out uniqClass, out uniqClassSamples);
                    double notInEntropy = getGiniValuefromProbability(uniqClassSamples);
                    subTotal = 0;
                    foreach (int value in uniqClassSamples)
                    {
                        subTotal += value;
                    }
                    notInEntropy = notInEntropy * ((subTotal * 1.0) / total);
                    if (inEntropy + notInEntropy < AttributeEntro)
                    {
                        AttributeEntro = inEntropy + notInEntropy;
                    }
                }
            }
            return(AttributeEntro);
        }