/// <summary> /// Calculates segmented conditional impurity of y | x When stipulating ranges (r), X is broken /// up into /// |r| many segments therefore P(X=x_r) becomes a range probability /// rather than a fixed probability. In essence the average over H(Y|X = x) becomes SUM_s [ p_r * /// H(Y|X = x_r) ]. The values that were used to do the split are stored in the Splits member. /// </summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="y">Target impurity.</param> /// <param name="x">Conditioned impurity.</param> /// <param name="ranges">Number of segments over x to condition upon.</param> /// <returns>Segmented conditional impurity measure.</returns> public double SegmentedConditional(Vector y, Vector x, IEnumerable<Range> ranges) { if (x == null && y == null) throw new InvalidOperationException("x and y do not exist!"); double p = 0, // probability of slice h = 0, // impurity of y | x_i : ith slice result = 0, // aggregated sum count = x.Count(); // total items in list Segments = ranges.OrderBy(r => r.Min).ToArray(); Discrete = false; // for each range calculate // conditional impurity and // aggregate results foreach (Range range in Segments) { // get slice var s = x.Indices(d => d >= range.Min && d < range.Max); // slice probability p = (double)s.Count() / (double)count; // impurity of (y | x_i) h = Calculate(y.Slice(s)); // sum up result += p * h; } return result; }
/// <summary> /// Calculates conditional impurity of y | x /// R(Y|X) is the average of H(Y|X = x) over all possible values /// X may take. /// </summary> /// <param name="y">Target impurity</param> /// <param name="x">Conditioned impurity</param> /// <param name="width">Split of values over x to condition upon</param> /// <returns>Conditional impurity measure</returns> public double Conditional(Vector y, Vector x) { if (x == null && y == null) throw new InvalidOperationException("x and y do not exist!"); double p = 0, // probability of slice h = 0, // impurity of y | x_i : ith slice result = 0, // aggregated sum count = x.Count(); // total items in list var values = x.Distinct().OrderBy(z => z); // distinct values to split on Segments = values.Select(z => Range.Make(z, z)).ToArray(); Discrete = true; // for each distinct value // calculate conditional impurity // and aggregate results foreach (var i in values) { // get slice var s = x.Indices(d => d == i); // slice probability p = (double)s.Count() / (double)count; // impurity of (y | x_i) h = Calculate(y.Slice(s)); // sum up result += p * h; } return result; }