예제 #1
0
        public double GI <E>(string pFieldName, List <Passenger> dataSet)
            where E : struct, IComparable, IFormattable, IConvertible
        {
            double gi = 0;

            double total = dataSet.Count;
            double cYes  = (from x in dataSet where x.Survived == 1 select x).Count();

            List <double> childsE = new List <double>();
            List <double> childsP = new List <double>();

            foreach (var item in Enum.GetValues(typeof(E)))
            {
                var en = (E)item;

                double t     = (from x in dataSet where UnitTest1.GetPropValue(x, pFieldName).Equals(en) select x).Count();
                double cCYes = (from x in dataSet where x.Survived == 1 && UnitTest1.GetPropValue(x, pFieldName).Equals(en) select x).Count();

                if (t == 0)
                {
                    continue;
                }

                var e = UnitTest1.Entropy(cCYes / t);
                var p = t / total;

                childsE.Add(e);
                childsP.Add(p);
            }

            var eP = UnitTest1.Entropy(cYes / total);

            double wEC = 0;

            for (int i = 0; i < childsE.Count; i++)
            {
                wEC += childsE[i] * childsP[i];
            }

            gi = eP - wEC;

            return(gi);
        }
예제 #2
0
        public double GI(string pFieldName, List <Passenger> dataSet)
        {
            double gi = 0;
            var    listOfDistinData = dataSet.Select(x => UnitTest1.GetPropValue(x, pFieldName)).Distinct();
            double total            = dataSet.Count;
            double cYes             = (from x in dataSet where x.Survived == 1 select x).Count();

            List <double> childsE = new List <double>();
            List <double> childsP = new List <double>();

            foreach (var item in listOfDistinData)
            {
                //var en = (E)item;

                double t     = (from x in dataSet where UnitTest1.GetPropValue(x, pFieldName).Equals(item) select x).Count();
                double cCYes = (from x in dataSet where x.Survived == 1 && UnitTest1.GetPropValue(x, pFieldName).Equals(item) select x).Count();

                if (t == 0)
                {
                    continue;
                }

                var e = UnitTest1.Entropy(cCYes / t);
                var p = t / total;

                childsE.Add(e);
                childsP.Add(p);
            }

            var eP = UnitTest1.Entropy(cYes / total);

            double wEC = 0;

            for (int i = 0; i < childsE.Count; i++)
            {
                wEC += childsE[i] * childsP[i];
            }

            gi = eP - wEC;

            return(gi);
        }