示例#1
0
        public static RatioValue Calculate(IList<double> numerators, IList<double> denominators)
        {
            if (numerators.Count != denominators.Count)
            {
                throw new ArgumentException();
            }
            if (numerators.Count == 0)
            {
                return null;
            }
            if (numerators.Count == 1)
            {
                return new RatioValue(numerators.First()/denominators.First());
            }
            var statsNumerators = new Statistics(numerators);
            var statsDenominators = new Statistics(denominators);
            var ratios = new Statistics(numerators.Select((value, index) => value/denominators[index]));

            // The mean ratio is the average of "ratios" weighted by "statsDenominators".
            // It's also equal to the sum of the numerators divided by the sum of the denominators.
            var meanRatio = statsNumerators.Sum()/statsDenominators.Sum();

            // Helpers.Assume(Math.Abs(mean - stats.Mean(statsW)) < 0.0001);
            // Make sure the value does not exceed the bounds of a float.
            float meanRatioFloat = (float)Math.Min(float.MaxValue, Math.Max(float.MinValue, meanRatio));

            return new RatioValue
            {
                Ratio = meanRatioFloat,
                StdDev = (float) ratios.StdDev(statsDenominators),
                DotProduct = (float) statsNumerators.Angle(statsDenominators),
            };
        }
示例#2
0
 /// <summary>
 /// Calculates the a term (slope) of the linear regression function (y = a*x + b)
 /// given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The a term of y = a*x + b</returns>
 public static double ATerm2(Statistics y, Statistics x)
 {
     try
     {
         return Covariance(y, x) / (Math.Pow(x.StdDev(), 2));
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
示例#3
0
        public double CalcPiZeroLambda()
        {
            // As in Storey and Tibshirani 2003 calculate Pi-zero across a range of
            // p value cut-offs.
            var lambdas = PiZeroLambdas.ToArray();
            var piZeros = PiZeros(lambdas);
            double minPi0 = piZeros.Min();

            // Because the spline fitting described in Storey and Tibshirani 2003
            // is non-trivial to implement in C#, the method in use in Percolator
            // is used instead.

            // Find the lambda level closest to the minimum with enough precision
            // by testing sets of p values drawn at random from the current set.
            double[] arrayMse = new double[lambdas.Length];
            int numDraw = Math.Min(Length, RANDOM_DRAWS_MAX);
            var rand = new Random(0);   // Use a fixed random seed value for reproducible results
            for (int r = 0; r < RANDOM_CYCLE_COUNT; r++)
            {
                // Create an array of p-values randomly drawn from the current set
                var statBoot = new Statistics(RandomDraw(rand).Take(numDraw));
                piZeros = statBoot.PiZeros(lambdas);
                for (int i = 0; i < lambdas.Length; ++i)
                {
                    double pi0Boot = piZeros[i];
                    // Estimated mean-squared error.
                    arrayMse[i] += (pi0Boot - minPi0) * (pi0Boot - minPi0);
                }
            }

            // Use the original estimate for the lambda that produced
            // the minimum mean-squared error for the random draw iterations
            int iMin = arrayMse.IndexOf(v => v == arrayMse.Min());
            return lambdas[iMin];
        }
示例#4
0
 /// <summary>
 /// Calculates the b term (y-intercept) of the linear
 /// regression function (y = a*x + b) given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The b coefficient of y = a*x + b</returns>
 public static double BTerm2(Statistics y, Statistics x)
 {
     return y.Mean() - ATerm2(y, x)*x.Mean();
 }
示例#5
0
 /// <summary>
 /// Calculates the b term (y-intercept) of the linear
 /// regression function (y = a*x + b) using the current set of numbers as Y values
 /// and another set as X values.
 /// </summary>
 /// <param name="x">X values</param>
 /// <returns>The b coefficient of y = a*x + b</returns>
 public double BTerm2(Statistics x)
 {
     return BTerm2(this, x);
 }
示例#6
0
 /// <summary>
 /// Calculates a weighted mean average of the set of numbers.
 /// See:
 /// http://en.wikipedia.org/wiki/Weighted_mean
 /// </summary>
 /// <param name="weights">The weights</param>
 /// <returns>Weighted mean</returns>
 public double Mean(Statistics weights)
 {
     try
     {
         double sum = 0;
         for (int i = 0; i < _list.Length; i++)
             sum += _list[i] * weights._list[i];
         return sum / weights.Sum();
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
示例#7
0
        /// <summary>
        /// Calculates the variance for a set of numbers from a weighted mean.
        /// See:
        /// http://en.wikipedia.org/wiki/Weighted_mean
        /// </summary>
        /// <param name="weights">The weights</param>
        /// <returns>Variance from weighted mean</returns>
        public double Variance(Statistics weights)
        {
            if (_list.Length < 2)
                return 0;

            try
            {
                double s = 0;
                for (int i = 0; i < _list.Length; i++)
                    s += weights._list[i] * Math.Pow(_list[i], 2);
                return (s/weights.Mean() - _list.Length*Math.Pow(Mean(weights), 2)) / (_list.Length - 1);
            }
            catch (Exception)
            {
                return double.NaN;
            }
        }
示例#8
0
 /// <summary>
 /// Calculates the covariance between this and another set of numbers.
 /// </summary>
 /// <param name="s">Second set of numbers</param>
 /// <returns>Covariance</returns>
 public double Covariance(Statistics s)
 {
     return Covariance(this, s);
 }
示例#9
0
 /// <summary>
 /// Calculates the c term of the quadratic regression function (y = a*x^2 + b*x + c)
 /// using the current set of numbers as Y values and another set
 /// as X values.
 /// </summary>
 /// <param name="x">X values</param>
 /// <returns>The c term of y = a*x^2 + b*x + c</returns>
 public double CTerm3(Statistics x)
 {
     return CTerm3(this, x);
 }
示例#10
0
 /// <summary>
 /// Calculates the correlation coefficient between two sets
 /// of numbers. 
 /// </summary>
 /// <param name="s1">First set of numbers</param>
 /// <param name="s2">Second set of numbers</param>
 /// <returns>Correlation coefficient</returns>
 public static double R(Statistics s1, Statistics s2)
 {
     try
     {
         return Covariance(s1, s2)/(s1.StdDev()*s2.StdDev());
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
示例#11
0
        /// <summary>
        /// Calculates the residuals of the linear regression function
        /// given the Y and X values.
        /// </summary>
        /// <param name="y">Y values</param>
        /// <param name="x">X values</param>
        /// <returns>A set of residuals</returns>
        public static Statistics Residuals(Statistics y, Statistics x)
        {
            double a = ATerm2(y, x);
            double b = BTerm2(y, x);

            List<double> residuals = new List<double>();
            for (int i = 0; i < x.Length; i++)
                residuals.Add(y._list[i] - (a*x._list[i] + b));
            return new Statistics(residuals);
        }
示例#12
0
        /// <summary>
        /// Calculates the covariance between two sets of numbers.
        /// </summary>
        /// <param name="s1">First set of numbers</param>
        /// <param name="s2">Second set of numbers</param>
        /// <returns></returns>
        public static double Covariance(Statistics s1, Statistics s2)
        {
            try
            {
                if (s1.Length != s2.Length)
                    return double.NaN;

                int len = s1.Length;
                double sumMul = 0;
                for (int i = 0; i < len; i++)
                    sumMul += (s1._list[i]*s2._list[i]);
                return (sumMul - len*s1.Mean()*s2.Mean())/(len - 1);
            }
            catch (Exception)
            {
                return double.NaN;
            }
        }
示例#13
0
 /// <summary>
 /// Calculates the y-intercept (Beta coefficient) of the linear
 /// regression function (y = a*x + b) given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The y-intercept</returns>
 public static double Intercept(Statistics y, Statistics x)
 {
     return BTerm2(y, x);
 }
示例#14
0
        private int[] FixZeroRanks(int[] ranks, Statistics sOther, int[] ranksOther)
        {
            if (!_list.Contains(0))
                return ranks;

            var listNewValues = new List<double>();
            foreach (int rank in ranks)
                listNewValues.Add(rank);

            var listRankOtherIndices = new List<KeyValuePair<int, int>>();
            for (int i = 0; i < _list.Length; i++)
            {
                // Look for zero scores
                if (_list[i] == 0)
                {
                    // If the other is also zero, just match the rankings.
                    // Otherwise, save this index for to determine its new rank.
                    if (sOther._list[i] == 0)
                        listNewValues[i] = ranksOther[i];
                    else
                        listRankOtherIndices.Add(new KeyValuePair<int, int>(ranksOther[i], i));
                }
            }
            // Sort by the rank in the other set
            listRankOtherIndices.Sort((p1, p2) => Comparer<int>.Default.Compare(p1.Key, p2.Key));
            // Make the highest ranked in the other set have the lowest rank in this set
            int rankNew = Length + listRankOtherIndices.Count;
            foreach (var pair in listRankOtherIndices)
                listNewValues[pair.Value] = rankNew--;

            // Finally convert ranks to values by reversing numeric order
            for (int i = 0; i < listNewValues.Count; i++)
                listNewValues[i] = -listNewValues[i];
            // And re-rank
            return new Statistics(listNewValues).Rank(true);
        }
示例#15
0
 /// <summary>
 /// Standard deviation of Y for a linear regression.
 /// <para>
 /// Described at:
 /// http://www.chem.utoronto.ca/coursenotes/analsci/StatsTutorial/ErrRegr.html
 /// </para>
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The standard deviation in the y values for the linear regression</returns>
 private static double StdDevY(Statistics y, Statistics x)
 {
     double s = 0;
     Statistics residuals = Residuals(y, x);
     foreach (double value in residuals._list)
         s += Math.Pow(value, 2);
     return Math.Sqrt(s / (residuals._list.Length - 2));
 }
示例#16
0
 /// <summary>
 /// Computes the index standard of a given set of values for the set of numbers.
 /// </summary>
 /// <param name="s">Another set of numbers</param>
 /// <returns>Index standard for each number in the new set</returns>
 public Statistics Z(Statistics s)
 {
     double mean = Mean();
     double stdev = StdDev();
     return new Statistics(s._list.Select(v => Z(v, mean, stdev)));
 }
示例#17
0
 /// <summary>
 /// Calculates a Costa Soares correlation coefficient between this and
 /// another set of numbers. 
 /// </summary>
 /// <param name="s">Second set of numbers</param>
 /// <returns>Correlation coefficient</returns>
 public double CostaSoares(Statistics s)
 {
     return CostaSoares(s, int.MaxValue);
 }
示例#18
0
 /// <summary>
 /// Calculates the slope (a term) of the linear regression function (y = a*x + b)
 /// given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The slope</returns>
 public static double Slope(Statistics y, Statistics x)
 {
     return ATerm2(y, x);
 }
示例#19
0
        /// <summary>
        /// Calculates a Costa Soares correlation coefficient between this and
        /// another set of numbers. 
        /// </summary>
        /// <param name="s">Second set of numbers</param>
        /// <param name="limitRank">Exclude pairs where both rank below this limit</param>
        /// <returns>Correlation coefficient</returns>
        public double CostaSoares(Statistics s, int limitRank)
        {
            if (Length != s.Length)
                return double.NaN;

            int n = Length;

            int[] a = Rank(true);
            int[] b = s.Rank(true);

            a = FixZeroRanks(a, s, b);
            b = s.FixZeroRanks(b, this, a);

            double total = 0;

            for (int i = 0; i < n; i++)
            {
                if (a[i] <= limitRank || b[i] <= limitRank)
                    total += Math.Pow(a[i] - b[i], 2) * ((n - a[i] + 1) + (n - b[i] + 1));
            }

            double n2 = n * n;
            double n3 = n * n2;
            double n4 = n * n3;
            total *= 6.0 / (n4 + n3 - n2 - n);
            total = 1 - total;

            return total;
        }
示例#20
0
 /// <summary>
 /// Standard error for the a term (slope) of a linear
 /// regression function y = a*x + b.
 /// <para>
 /// Described at:
 /// http://www.chem.utoronto.ca/coursenotes/analsci/StatsTutorial/ErrRegr.html
 /// </para>
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 public static double StdErrATerm2(Statistics y, Statistics x)
 {
     try
     {
         return StdDevY(y, x) / Math.Sqrt(x.VarianceTotal());
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
示例#21
0
        public Dictionary<int, double> CrossCorrelation(Statistics s, bool normalize)
        {
            if (Length != s.Length)
                return null;

            double mean1 = Mean();
            double mean2 = s.Mean();
            double invdenominator = 1; // 1/denominator for normalization
            int length = Length; // cache this - profiling shows a surprising cost for repeated access
            var result = new Dictionary<int, double>(1 + (2 * length));

            // Normalized cross-correlation = subtract the mean and divide by the standard deviation
            if (normalize)
            {
                double sqsum1 = 0;
                double sqsum2 = 0;
                foreach (double v in _list)
                  sqsum1 += (v - mean1)*(v - mean1);
                foreach (double v in s._list)
                  sqsum2 += (v - mean2)*(v - mean2);
                // sigma_1 * sigma_2 * n
                double denominator = Math.Sqrt(sqsum1*sqsum2); // find the demominator
                if (denominator > 0)
                {
                    invdenominator = 1.0/denominator; // for speed, we'll multiply by invdenominator rather than divide by denominator
                }
                else
                {
                    // all datapoints are zero
                    for (int delay = -length; delay <= length; delay++)
                    {
                        result.Add(delay,0);
                    }
                    return result;
                }
            }

            for (int delay = -length; delay <= length; delay++)
            {
                double sxy = 0;
                int upper = Math.Min(length, length - delay); // i and i+delay must both be in range(0,length)
                for (int i = Math.Max(0, -delay); i < upper; i++)  // i and i+delay must both be in range(0,length)
                {
                    if (normalize)
                        sxy += (_list[i] - mean1) * (s._list[i + delay] - mean2);
                    else
                        sxy += (_list[i]) * (s._list[i + delay]);
                }

                result.Add(delay, sxy * invdenominator);
            }
            return result;
        }
示例#22
0
 /// <summary>
 /// Calculates a dot-product or 1 - angle/90 between two vectors,
 /// which is more sensetive for small vectors than cos(angle).
 /// </summary>
 /// <param name="s">The other vector</param>
 /// <returns>Normalized contrast angle dot-product</returns>
 public double NormalizedContrastAngle(Statistics s)
 {
     // Acos returns the angle in radians, where Pi == 180 degrees
     return AngleToNormalizedContrastAngle(Angle(s));
 }
示例#23
0
 /// <summary>
 /// Calculates the y-intercept (b term) of the linear
 /// regression function (y = a*x + b) using the current set of numbers as Y values
 /// and another set as X values.
 /// </summary>
 /// <param name="x">X values</param>
 /// <returns>The y-intercept</returns>
 public double Intercept(Statistics x)
 {
     return BTerm2(x);
 }
示例#24
0
        /// <summary>
        /// Calculates the normalized contrast angle dot-product or 1 - angle/90 between two vectors,
        /// with both normalized to a unit vector first.
        /// </summary>
        /// <param name="s">The other vector</param>
        /// <returns>Normalized contrast angle dot-product of normalized vectors</returns>
        public double NormalizedContrastAngleUnitVector(Statistics s)
        {
            var stat1 = NormalizeUnit();
            var stat2 = s.NormalizeUnit();

            return stat1.NormalizedContrastAngle(stat2);
        }
示例#25
0
 /// <summary>
 /// Standard error for the Alpha (y-intercept) coefficient of a linear
 /// regression function y = a*x + b.
 /// <para>
 /// Described at:
 /// http://www.chem.utoronto.ca/coursenotes/analsci/StatsTutorial/ErrRegr.html
 /// </para>
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 public static double StdErrBTerm2(Statistics y, Statistics x)
 {
     try
     {
         return StdDevY(y, x)*Math.Sqrt(x.SumOfSquares()/(x._list.Length*x.VarianceTotal()));
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
示例#26
0
 /// <summary>
 /// Calculates the standard error of the b term (y-intercept) for a
 /// linear regression function y = a*x + b using the current set of numbers as Y values
 /// and another set as X values.
 /// </summary>
 /// <param name="x">X values</param>
 /// <returns>Standard error of a term</returns>
 public double StdErrBTerm2(Statistics x)
 {
     return StdErrBTerm2(this, x);
 }
示例#27
0
        /// <summary>
        /// Calculates the normalized contrast angle dot-product or 1 - angle/90 between two vectors,
        /// using the square roots of the values in the vectors.
        /// </summary>
        /// <param name="s">The other vector</param>
        /// <returns>Normalized contrast angle dot-product of square roots of values in vectors</returns>
        public double NormalizedContrastAngleSqrt(Statistics s)
        {
            var stat1 = new Statistics(_list.Select(Math.Sqrt));
            var stat2 = new Statistics(s._list.Select(Math.Sqrt));

            return stat1.NormalizedContrastAngle(stat2);
        }
示例#28
0
        /// <summary>
        /// Calculates the dot-product or cos(angle) between two vectors.
        /// See:
        /// http://en.wikipedia.org/wiki/Dot_product
        /// </summary>
        /// <param name="s">The other vector</param>
        /// <returns>Dot-Product</returns>
        public double Angle(Statistics s)
        {
            if (Length != s.Length)
                return double.NaN;

            double sumCross = 0;
            double sumLeft = 0;
            double sumRight = 0;

            for (int i = 0, len = Length; i < len; i++)
            {
                double left = _list[i];
                double right = s._list[i];

                sumCross += left*right;
                sumLeft += left*left;
                sumRight += right*right;
            }

            // Avoid dividing by zero
            if (sumLeft == 0 || sumRight == 0)
                return sumLeft == 0 && sumRight == 0 ? 1 : 0;

            // Rounding error can cause values slightly larger than 1.
            return Math.Min(1.0, sumCross/Math.Sqrt(sumLeft*sumRight));
        }
示例#29
0
        /// <summary>
        /// Calculates the a term of the quadratic regression function (y = a*x^2 + b*x + c)
        /// given the Y and X values.
        /// </summary>
        /// <param name="y">Y values</param>
        /// <param name="x">X values</param>
        /// <returns>The a term of y = a*x^2 + b*x + c</returns>
        public static double ATerm3(Statistics y, Statistics x)
        {
            if (x.Length < 3)
                throw new InvalidOperationException("Insufficient pairs of co-ordinates"); // Not L10N

            //notation sjk to mean the sum of x_i^j*y_i^k.
            double s40 = x._list.Sum(v => v*v*v*v); //sum of x^4
            double s30 = x._list.Sum(v => v*v*v); //sum of x^3
            double s20 = x._list.Sum(v => v*v); //sum of x^2
            double s10 = x.Sum();  //sum of x
            double s00 = x.Length;
            //sum of x^0 * y^0  ie 1 * number of entries

            double s21 = x._list.Select((v, i) => v*v*y._list[i]).Sum(); //sum of x^2*y
            double s11 = x._list.Select((v, i) => v*y._list[i]).Sum();  //sum of x*y
            double s01 = y.Sum();   //sum of y

            //a = Da/D
            return (s21 * (s20 * s00 - s10 * s10) -
                    s11 * (s30 * s00 - s10 * s20) +
                    s01 * (s30 * s10 - s20 * s20))
                    /
                    (s40 * (s20 * s00 - s10 * s10) -
                     s30 * (s30 * s00 - s10 * s20) +
                     s20 * (s30 * s10 - s20 * s20));
        }
示例#30
0
        /// <summary>
        /// Calculates the dot-product or cos(angle) between two vectors,
        /// using the square roots of the values in the vectors.
        /// </summary>
        /// <param name="s">The other vector</param>
        /// <returns>Dot-Product of square roots of values in vectors</returns>
        public double AngleSqrt(Statistics s)
        {
            var stat1 = new Statistics(_list.Select(Math.Sqrt));
            var stat2 = new Statistics(s._list.Select(Math.Sqrt));

            return stat1.Angle(stat2);
        }