public double GI <E>(string pFieldName, List <Passenger> dataSet) where E : struct, IComparable, IFormattable, IConvertible { double gi = 0; double total = dataSet.Count; double cYes = (from x in dataSet where x.Survived == 1 select x).Count(); List <double> childsE = new List <double>(); List <double> childsP = new List <double>(); foreach (var item in Enum.GetValues(typeof(E))) { var en = (E)item; double t = (from x in dataSet where UnitTest1.GetPropValue(x, pFieldName).Equals(en) select x).Count(); double cCYes = (from x in dataSet where x.Survived == 1 && UnitTest1.GetPropValue(x, pFieldName).Equals(en) select x).Count(); if (t == 0) { continue; } var e = UnitTest1.Entropy(cCYes / t); var p = t / total; childsE.Add(e); childsP.Add(p); } var eP = UnitTest1.Entropy(cYes / total); double wEC = 0; for (int i = 0; i < childsE.Count; i++) { wEC += childsE[i] * childsP[i]; } gi = eP - wEC; return(gi); }
public double GI(string pFieldName, List <Passenger> dataSet) { double gi = 0; var listOfDistinData = dataSet.Select(x => UnitTest1.GetPropValue(x, pFieldName)).Distinct(); double total = dataSet.Count; double cYes = (from x in dataSet where x.Survived == 1 select x).Count(); List <double> childsE = new List <double>(); List <double> childsP = new List <double>(); foreach (var item in listOfDistinData) { //var en = (E)item; double t = (from x in dataSet where UnitTest1.GetPropValue(x, pFieldName).Equals(item) select x).Count(); double cCYes = (from x in dataSet where x.Survived == 1 && UnitTest1.GetPropValue(x, pFieldName).Equals(item) select x).Count(); if (t == 0) { continue; } var e = UnitTest1.Entropy(cCYes / t); var p = t / total; childsE.Add(e); childsP.Add(p); } var eP = UnitTest1.Entropy(cYes / total); double wEC = 0; for (int i = 0; i < childsE.Count; i++) { wEC += childsE[i] * childsP[i]; } gi = eP - wEC; return(gi); }