/// <summary> /// Calculates segmented conditional impurity of y | x When stipulating ranges (r), X is broken /// up into /// |r| many segments therefore P(X=x_r) becomes a range probability /// rather than a fixed probability. In essence the average over H(Y|X = x) becomes SUM_s [ p_r * /// H(Y|X = x_r) ]. The values that were used to do the split are stored in the Splits member. /// </summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="y">Target impurity.</param> /// <param name="x">Conditioned impurity.</param> /// <param name="ranges">Number of segments over x to condition upon.</param> /// <returns>Segmented conditional impurity measure.</returns> public double SegmentedConditional(Vector y, Vector x, IEnumerable<Range> ranges) { if (x == null && y == null) throw new InvalidOperationException("x and y do not exist!"); double p = 0, // probability of slice h = 0, // impurity of y | x_i : ith slice result = 0, // aggregated sum count = x.Count(); // total items in list Segments = ranges.OrderBy(r => r.Min).ToArray(); Discrete = false; // for each range calculate // conditional impurity and // aggregate results foreach (Range range in Segments) { // get slice var s = x.Indices(d => d >= range.Min && d < range.Max); // slice probability p = (double)s.Count() / (double)count; // impurity of (y | x_i) h = Calculate(y.Slice(s)); // sum up result += p * h; } return result; }
/// <summary> /// Calculates conditional impurity of y | x /// R(Y|X) is the average of H(Y|X = x) over all possible values /// X may take. /// </summary> /// <param name="y">Target impurity</param> /// <param name="x">Conditioned impurity</param> /// <param name="width">Split of values over x to condition upon</param> /// <returns>Conditional impurity measure</returns> public double Conditional(Vector y, Vector x) { if (x == null && y == null) throw new InvalidOperationException("x and y do not exist!"); double p = 0, // probability of slice h = 0, // impurity of y | x_i : ith slice result = 0, // aggregated sum count = x.Count(); // total items in list var values = x.Distinct().OrderBy(z => z); // distinct values to split on Segments = values.Select(z => Range.Make(z, z)).ToArray(); Discrete = true; // for each distinct value // calculate conditional impurity // and aggregate results foreach (var i in values) { // get slice var s = x.Indices(d => d == i); // slice probability p = (double)s.Count() / (double)count; // impurity of (y | x_i) h = Calculate(y.Slice(s)); // sum up result += p * h; } return result; }
/// <summary>A Vector extension method that modes the given source.</summary> /// <param name="source">The source to act on.</param> /// <returns>A double.</returns> public static double Mode(this Vector source) { var q = from i in source group i by i into g select new { key = g.Key, count = source.Count(d => d == g.Key) }; double mode = -1; var count = -1; foreach (var item in q) { if (item.count > count) { count = item.count; mode = item.key; } } return(mode); }
/// <summary>Calculates Classification Error of x.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The list in question.</param> /// <returns>Impurity measure.</returns> public override double Calculate(Vector x) { if (x == null) { throw new InvalidOperationException("x does not exist!"); } double length = x.Count(); var e = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length; return 1 - e.Max(); }
/// <summary>Calculates the Shannon Entropy of x.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The list in question.</param> /// <returns>Impurity measure.</returns> public override double Calculate(Vector x) { if (x == null) { throw new InvalidOperationException("x does not exist!"); } double length = x.Count(); var px = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length; var e = (from p in px select -1 * p * Math.Log(p, 2)).Sum(); return e; }
/// <summary>Calculates Gini Index of x.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The list in question.</param> /// <returns>Impurity measure.</returns> public override double Calculate(Vector x) { if (x == null) { throw new InvalidOperationException("x does not exist!"); } double length = x.Count(); var px = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length; var g = 1 - px.Select(d => d * d).Sum(); return g; }