Esempio n. 1
0
        /// <summary>
        /// Calculates segmented conditional impurity of y | x When stipulating ranges (r), X is broken
        /// up into
        /// |r| many segments therefore P(X=x_r) becomes a range probability
        /// rather than a fixed probability. In essence the average over H(Y|X = x) becomes SUM_s [ p_r *
        /// H(Y|X = x_r) ]. The values that were used to do the split are stored in the Splits member.
        /// </summary>
        /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception>
        /// <param name="y">Target impurity.</param>
        /// <param name="x">Conditioned impurity.</param>
        /// <param name="ranges">Number of segments over x to condition upon.</param>
        /// <returns>Segmented conditional impurity measure.</returns>
        public double SegmentedConditional(Vector y, Vector x, IEnumerable<Range> ranges)
        {
            if (x == null && y == null)
                throw new InvalidOperationException("x and y do not exist!");

            double p = 0,               // probability of slice
                   h = 0,               // impurity of y | x_i : ith slice
                   result = 0,          // aggregated sum
                   count = x.Count();   // total items in list

            Segments = ranges.OrderBy(r => r.Min).ToArray();
            Discrete = false;

            // for each range calculate
            // conditional impurity and
            // aggregate results
            foreach (Range range in Segments)
            {
                // get slice
                var s = x.Indices(d => d >= range.Min && d < range.Max);
                // slice probability
                p = (double)s.Count() / (double)count;
                // impurity of (y | x_i)
                h = Calculate(y.Slice(s));
                // sum up
                result += p * h;
            }

            return result;
        }
Esempio n. 2
0
        /// <summary>
        /// Calculates conditional impurity of y | x
        /// R(Y|X) is the average of H(Y|X = x) over all possible values
        /// X may take. 
        /// </summary>
        /// <param name="y">Target impurity</param>
        /// <param name="x">Conditioned impurity</param>
        /// <param name="width">Split of values over x to condition upon</param>
        /// <returns>Conditional impurity measure</returns>
        public double Conditional(Vector y, Vector x)
        {
            if (x == null && y == null)
                throw new InvalidOperationException("x and y do not exist!");

            double p = 0,               // probability of slice
                   h = 0,               // impurity of y | x_i : ith slice
                   result = 0,          // aggregated sum
                   count = x.Count();   // total items in list

            var values = x.Distinct().OrderBy(z => z);  // distinct values to split on

            Segments = values.Select(z => Range.Make(z, z)).ToArray();
            Discrete = true;

            // for each distinct value
            // calculate conditional impurity
            // and aggregate results
            foreach (var i in values)
            {
                // get slice
                var s = x.Indices(d => d == i);
                // slice probability
                p = (double)s.Count() / (double)count;
                // impurity of (y | x_i)
                h = Calculate(y.Slice(s));
                // sum up
                result += p * h;
            }

            return result;
        }
Esempio n. 3
0
        /// <summary>A Vector extension method that modes the given source.</summary>
        /// <param name="source">The source to act on.</param>
        /// <returns>A double.</returns>
        public static double Mode(this Vector source)
        {
            var q = from i in source
                    group i by i
                    into g
                    select new
            {
                key   = g.Key,
                count = source.Count(d => d == g.Key)
            };

            double mode  = -1;
            var    count = -1;

            foreach (var item in q)
            {
                if (item.count > count)
                {
                    count = item.count;
                    mode  = item.key;
                }
            }

            return(mode);
        }
Esempio n. 4
0
        /// <summary>Calculates Classification Error of x.</summary>
        /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception>
        /// <param name="x">The list in question.</param>
        /// <returns>Impurity measure.</returns>
        public override double Calculate(Vector x)
        {
            if (x == null)
            {
                throw new InvalidOperationException("x does not exist!");
            }

            double length = x.Count();

            var e = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length;

            return 1 - e.Max();
        }
Esempio n. 5
0
        /// <summary>Calculates the Shannon Entropy of x.</summary>
        /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception>
        /// <param name="x">The list in question.</param>
        /// <returns>Impurity measure.</returns>
        public override double Calculate(Vector x)
        {
            if (x == null)
            {
                throw new InvalidOperationException("x does not exist!");
            }

            double length = x.Count();

            var px = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length;

            var e = (from p in px select -1 * p * Math.Log(p, 2)).Sum();

            return e;
        }
Esempio n. 6
0
        /// <summary>Calculates Gini Index of x.</summary>
        /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception>
        /// <param name="x">The list in question.</param>
        /// <returns>Impurity measure.</returns>
        public override double Calculate(Vector x)
        {
            if (x == null)
            {
                throw new InvalidOperationException("x does not exist!");
            }

            double length = x.Count();

            var px = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length;

            var g = 1 - px.Select(d => d * d).Sum();

            return g;
        }