Пример #1
0
        /// <summary>
        /// Calculates segmented conditional impurity of y | x When stipulating ranges (r), X is broken
        /// up into
        /// |r| many segments therefore P(X=x_r) becomes a range probability
        /// rather than a fixed probability. In essence the average over H(Y|X = x) becomes SUM_s [ p_r *
        /// H(Y|X = x_r) ]. The values that were used to do the split are stored in the Splits member.
        /// </summary>
        /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception>
        /// <param name="y">Target impurity.</param>
        /// <param name="x">Conditioned impurity.</param>
        /// <param name="ranges">Number of segments over x to condition upon.</param>
        /// <returns>Segmented conditional impurity measure.</returns>
        public double SegmentedConditional(Vector y, Vector x, IEnumerable<Range> ranges)
        {
            if (x == null && y == null)
                throw new InvalidOperationException("x and y do not exist!");

            double p = 0,               // probability of slice
                   h = 0,               // impurity of y | x_i : ith slice
                   result = 0,          // aggregated sum
                   count = x.Count();   // total items in list

            Segments = ranges.OrderBy(r => r.Min).ToArray();
            Discrete = false;

            // for each range calculate
            // conditional impurity and
            // aggregate results
            foreach (Range range in Segments)
            {
                // get slice
                var s = x.Indices(d => d >= range.Min && d < range.Max);
                // slice probability
                p = (double)s.Count() / (double)count;
                // impurity of (y | x_i)
                h = Calculate(y.Slice(s));
                // sum up
                result += p * h;
            }

            return result;
        }
Пример #2
0
        /// <summary>
        /// Calculates conditional impurity of y | x
        /// R(Y|X) is the average of H(Y|X = x) over all possible values
        /// X may take. 
        /// </summary>
        /// <param name="y">Target impurity</param>
        /// <param name="x">Conditioned impurity</param>
        /// <param name="width">Split of values over x to condition upon</param>
        /// <returns>Conditional impurity measure</returns>
        public double Conditional(Vector y, Vector x)
        {
            if (x == null && y == null)
                throw new InvalidOperationException("x and y do not exist!");

            double p = 0,               // probability of slice
                   h = 0,               // impurity of y | x_i : ith slice
                   result = 0,          // aggregated sum
                   count = x.Count();   // total items in list

            var values = x.Distinct().OrderBy(z => z);  // distinct values to split on

            Segments = values.Select(z => Range.Make(z, z)).ToArray();
            Discrete = true;

            // for each distinct value
            // calculate conditional impurity
            // and aggregate results
            foreach (var i in values)
            {
                // get slice
                var s = x.Indices(d => d == i);
                // slice probability
                p = (double)s.Count() / (double)count;
                // impurity of (y | x_i)
                h = Calculate(y.Slice(s));
                // sum up
                result += p * h;
            }

            return result;
        }