public static RatioValue Calculate(IList<double> numerators, IList<double> denominators) { if (numerators.Count != denominators.Count) { throw new ArgumentException(); } if (numerators.Count == 0) { return null; } if (numerators.Count == 1) { return new RatioValue(numerators.First()/denominators.First()); } var statsNumerators = new Statistics(numerators); var statsDenominators = new Statistics(denominators); var ratios = new Statistics(numerators.Select((value, index) => value/denominators[index])); // The mean ratio is the average of "ratios" weighted by "statsDenominators". // It's also equal to the sum of the numerators divided by the sum of the denominators. var meanRatio = statsNumerators.Sum()/statsDenominators.Sum(); // Helpers.Assume(Math.Abs(mean - stats.Mean(statsW)) < 0.0001); // Make sure the value does not exceed the bounds of a float. float meanRatioFloat = (float)Math.Min(float.MaxValue, Math.Max(float.MinValue, meanRatio)); return new RatioValue { Ratio = meanRatioFloat, StdDev = (float) ratios.StdDev(statsDenominators), DotProduct = (float) statsNumerators.Angle(statsDenominators), }; }
/// <summary> /// Calculates the a term (slope) of the linear regression function (y = a*x + b) /// given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The a term of y = a*x + b</returns> public static double ATerm2(Statistics y, Statistics x) { try { return Covariance(y, x) / (Math.Pow(x.StdDev(), 2)); } catch (Exception) { return double.NaN; } }
public double CalcPiZeroLambda() { // As in Storey and Tibshirani 2003 calculate Pi-zero across a range of // p value cut-offs. var lambdas = PiZeroLambdas.ToArray(); var piZeros = PiZeros(lambdas); double minPi0 = piZeros.Min(); // Because the spline fitting described in Storey and Tibshirani 2003 // is non-trivial to implement in C#, the method in use in Percolator // is used instead. // Find the lambda level closest to the minimum with enough precision // by testing sets of p values drawn at random from the current set. double[] arrayMse = new double[lambdas.Length]; int numDraw = Math.Min(Length, RANDOM_DRAWS_MAX); var rand = new Random(0); // Use a fixed random seed value for reproducible results for (int r = 0; r < RANDOM_CYCLE_COUNT; r++) { // Create an array of p-values randomly drawn from the current set var statBoot = new Statistics(RandomDraw(rand).Take(numDraw)); piZeros = statBoot.PiZeros(lambdas); for (int i = 0; i < lambdas.Length; ++i) { double pi0Boot = piZeros[i]; // Estimated mean-squared error. arrayMse[i] += (pi0Boot - minPi0) * (pi0Boot - minPi0); } } // Use the original estimate for the lambda that produced // the minimum mean-squared error for the random draw iterations int iMin = arrayMse.IndexOf(v => v == arrayMse.Min()); return lambdas[iMin]; }
/// <summary> /// Calculates the b term (y-intercept) of the linear /// regression function (y = a*x + b) given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The b coefficient of y = a*x + b</returns> public static double BTerm2(Statistics y, Statistics x) { return y.Mean() - ATerm2(y, x)*x.Mean(); }
/// <summary> /// Calculates the b term (y-intercept) of the linear /// regression function (y = a*x + b) using the current set of numbers as Y values /// and another set as X values. /// </summary> /// <param name="x">X values</param> /// <returns>The b coefficient of y = a*x + b</returns> public double BTerm2(Statistics x) { return BTerm2(this, x); }
/// <summary> /// Calculates a weighted mean average of the set of numbers. /// See: /// http://en.wikipedia.org/wiki/Weighted_mean /// </summary> /// <param name="weights">The weights</param> /// <returns>Weighted mean</returns> public double Mean(Statistics weights) { try { double sum = 0; for (int i = 0; i < _list.Length; i++) sum += _list[i] * weights._list[i]; return sum / weights.Sum(); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the variance for a set of numbers from a weighted mean. /// See: /// http://en.wikipedia.org/wiki/Weighted_mean /// </summary> /// <param name="weights">The weights</param> /// <returns>Variance from weighted mean</returns> public double Variance(Statistics weights) { if (_list.Length < 2) return 0; try { double s = 0; for (int i = 0; i < _list.Length; i++) s += weights._list[i] * Math.Pow(_list[i], 2); return (s/weights.Mean() - _list.Length*Math.Pow(Mean(weights), 2)) / (_list.Length - 1); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the covariance between this and another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <returns>Covariance</returns> public double Covariance(Statistics s) { return Covariance(this, s); }
/// <summary> /// Calculates the c term of the quadratic regression function (y = a*x^2 + b*x + c) /// using the current set of numbers as Y values and another set /// as X values. /// </summary> /// <param name="x">X values</param> /// <returns>The c term of y = a*x^2 + b*x + c</returns> public double CTerm3(Statistics x) { return CTerm3(this, x); }
/// <summary> /// Calculates the correlation coefficient between two sets /// of numbers. /// </summary> /// <param name="s1">First set of numbers</param> /// <param name="s2">Second set of numbers</param> /// <returns>Correlation coefficient</returns> public static double R(Statistics s1, Statistics s2) { try { return Covariance(s1, s2)/(s1.StdDev()*s2.StdDev()); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the residuals of the linear regression function /// given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>A set of residuals</returns> public static Statistics Residuals(Statistics y, Statistics x) { double a = ATerm2(y, x); double b = BTerm2(y, x); List<double> residuals = new List<double>(); for (int i = 0; i < x.Length; i++) residuals.Add(y._list[i] - (a*x._list[i] + b)); return new Statistics(residuals); }
/// <summary> /// Calculates the covariance between two sets of numbers. /// </summary> /// <param name="s1">First set of numbers</param> /// <param name="s2">Second set of numbers</param> /// <returns></returns> public static double Covariance(Statistics s1, Statistics s2) { try { if (s1.Length != s2.Length) return double.NaN; int len = s1.Length; double sumMul = 0; for (int i = 0; i < len; i++) sumMul += (s1._list[i]*s2._list[i]); return (sumMul - len*s1.Mean()*s2.Mean())/(len - 1); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the y-intercept (Beta coefficient) of the linear /// regression function (y = a*x + b) given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The y-intercept</returns> public static double Intercept(Statistics y, Statistics x) { return BTerm2(y, x); }
private int[] FixZeroRanks(int[] ranks, Statistics sOther, int[] ranksOther) { if (!_list.Contains(0)) return ranks; var listNewValues = new List<double>(); foreach (int rank in ranks) listNewValues.Add(rank); var listRankOtherIndices = new List<KeyValuePair<int, int>>(); for (int i = 0; i < _list.Length; i++) { // Look for zero scores if (_list[i] == 0) { // If the other is also zero, just match the rankings. // Otherwise, save this index for to determine its new rank. if (sOther._list[i] == 0) listNewValues[i] = ranksOther[i]; else listRankOtherIndices.Add(new KeyValuePair<int, int>(ranksOther[i], i)); } } // Sort by the rank in the other set listRankOtherIndices.Sort((p1, p2) => Comparer<int>.Default.Compare(p1.Key, p2.Key)); // Make the highest ranked in the other set have the lowest rank in this set int rankNew = Length + listRankOtherIndices.Count; foreach (var pair in listRankOtherIndices) listNewValues[pair.Value] = rankNew--; // Finally convert ranks to values by reversing numeric order for (int i = 0; i < listNewValues.Count; i++) listNewValues[i] = -listNewValues[i]; // And re-rank return new Statistics(listNewValues).Rank(true); }
/// <summary> /// Standard deviation of Y for a linear regression. /// <para> /// Described at: /// http://www.chem.utoronto.ca/coursenotes/analsci/StatsTutorial/ErrRegr.html /// </para> /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The standard deviation in the y values for the linear regression</returns> private static double StdDevY(Statistics y, Statistics x) { double s = 0; Statistics residuals = Residuals(y, x); foreach (double value in residuals._list) s += Math.Pow(value, 2); return Math.Sqrt(s / (residuals._list.Length - 2)); }
/// <summary> /// Computes the index standard of a given set of values for the set of numbers. /// </summary> /// <param name="s">Another set of numbers</param> /// <returns>Index standard for each number in the new set</returns> public Statistics Z(Statistics s) { double mean = Mean(); double stdev = StdDev(); return new Statistics(s._list.Select(v => Z(v, mean, stdev))); }
/// <summary> /// Calculates a Costa Soares correlation coefficient between this and /// another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <returns>Correlation coefficient</returns> public double CostaSoares(Statistics s) { return CostaSoares(s, int.MaxValue); }
/// <summary> /// Calculates the slope (a term) of the linear regression function (y = a*x + b) /// given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The slope</returns> public static double Slope(Statistics y, Statistics x) { return ATerm2(y, x); }
/// <summary> /// Calculates a Costa Soares correlation coefficient between this and /// another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <param name="limitRank">Exclude pairs where both rank below this limit</param> /// <returns>Correlation coefficient</returns> public double CostaSoares(Statistics s, int limitRank) { if (Length != s.Length) return double.NaN; int n = Length; int[] a = Rank(true); int[] b = s.Rank(true); a = FixZeroRanks(a, s, b); b = s.FixZeroRanks(b, this, a); double total = 0; for (int i = 0; i < n; i++) { if (a[i] <= limitRank || b[i] <= limitRank) total += Math.Pow(a[i] - b[i], 2) * ((n - a[i] + 1) + (n - b[i] + 1)); } double n2 = n * n; double n3 = n * n2; double n4 = n * n3; total *= 6.0 / (n4 + n3 - n2 - n); total = 1 - total; return total; }
/// <summary> /// Standard error for the a term (slope) of a linear /// regression function y = a*x + b. /// <para> /// Described at: /// http://www.chem.utoronto.ca/coursenotes/analsci/StatsTutorial/ErrRegr.html /// </para> /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> public static double StdErrATerm2(Statistics y, Statistics x) { try { return StdDevY(y, x) / Math.Sqrt(x.VarianceTotal()); } catch (Exception) { return double.NaN; } }
public Dictionary<int, double> CrossCorrelation(Statistics s, bool normalize) { if (Length != s.Length) return null; double mean1 = Mean(); double mean2 = s.Mean(); double invdenominator = 1; // 1/denominator for normalization int length = Length; // cache this - profiling shows a surprising cost for repeated access var result = new Dictionary<int, double>(1 + (2 * length)); // Normalized cross-correlation = subtract the mean and divide by the standard deviation if (normalize) { double sqsum1 = 0; double sqsum2 = 0; foreach (double v in _list) sqsum1 += (v - mean1)*(v - mean1); foreach (double v in s._list) sqsum2 += (v - mean2)*(v - mean2); // sigma_1 * sigma_2 * n double denominator = Math.Sqrt(sqsum1*sqsum2); // find the demominator if (denominator > 0) { invdenominator = 1.0/denominator; // for speed, we'll multiply by invdenominator rather than divide by denominator } else { // all datapoints are zero for (int delay = -length; delay <= length; delay++) { result.Add(delay,0); } return result; } } for (int delay = -length; delay <= length; delay++) { double sxy = 0; int upper = Math.Min(length, length - delay); // i and i+delay must both be in range(0,length) for (int i = Math.Max(0, -delay); i < upper; i++) // i and i+delay must both be in range(0,length) { if (normalize) sxy += (_list[i] - mean1) * (s._list[i + delay] - mean2); else sxy += (_list[i]) * (s._list[i + delay]); } result.Add(delay, sxy * invdenominator); } return result; }
/// <summary> /// Calculates a dot-product or 1 - angle/90 between two vectors, /// which is more sensetive for small vectors than cos(angle). /// </summary> /// <param name="s">The other vector</param> /// <returns>Normalized contrast angle dot-product</returns> public double NormalizedContrastAngle(Statistics s) { // Acos returns the angle in radians, where Pi == 180 degrees return AngleToNormalizedContrastAngle(Angle(s)); }
/// <summary> /// Calculates the y-intercept (b term) of the linear /// regression function (y = a*x + b) using the current set of numbers as Y values /// and another set as X values. /// </summary> /// <param name="x">X values</param> /// <returns>The y-intercept</returns> public double Intercept(Statistics x) { return BTerm2(x); }
/// <summary> /// Calculates the normalized contrast angle dot-product or 1 - angle/90 between two vectors, /// with both normalized to a unit vector first. /// </summary> /// <param name="s">The other vector</param> /// <returns>Normalized contrast angle dot-product of normalized vectors</returns> public double NormalizedContrastAngleUnitVector(Statistics s) { var stat1 = NormalizeUnit(); var stat2 = s.NormalizeUnit(); return stat1.NormalizedContrastAngle(stat2); }
/// <summary> /// Standard error for the Alpha (y-intercept) coefficient of a linear /// regression function y = a*x + b. /// <para> /// Described at: /// http://www.chem.utoronto.ca/coursenotes/analsci/StatsTutorial/ErrRegr.html /// </para> /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> public static double StdErrBTerm2(Statistics y, Statistics x) { try { return StdDevY(y, x)*Math.Sqrt(x.SumOfSquares()/(x._list.Length*x.VarianceTotal())); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the standard error of the b term (y-intercept) for a /// linear regression function y = a*x + b using the current set of numbers as Y values /// and another set as X values. /// </summary> /// <param name="x">X values</param> /// <returns>Standard error of a term</returns> public double StdErrBTerm2(Statistics x) { return StdErrBTerm2(this, x); }
/// <summary> /// Calculates the normalized contrast angle dot-product or 1 - angle/90 between two vectors, /// using the square roots of the values in the vectors. /// </summary> /// <param name="s">The other vector</param> /// <returns>Normalized contrast angle dot-product of square roots of values in vectors</returns> public double NormalizedContrastAngleSqrt(Statistics s) { var stat1 = new Statistics(_list.Select(Math.Sqrt)); var stat2 = new Statistics(s._list.Select(Math.Sqrt)); return stat1.NormalizedContrastAngle(stat2); }
/// <summary> /// Calculates the dot-product or cos(angle) between two vectors. /// See: /// http://en.wikipedia.org/wiki/Dot_product /// </summary> /// <param name="s">The other vector</param> /// <returns>Dot-Product</returns> public double Angle(Statistics s) { if (Length != s.Length) return double.NaN; double sumCross = 0; double sumLeft = 0; double sumRight = 0; for (int i = 0, len = Length; i < len; i++) { double left = _list[i]; double right = s._list[i]; sumCross += left*right; sumLeft += left*left; sumRight += right*right; } // Avoid dividing by zero if (sumLeft == 0 || sumRight == 0) return sumLeft == 0 && sumRight == 0 ? 1 : 0; // Rounding error can cause values slightly larger than 1. return Math.Min(1.0, sumCross/Math.Sqrt(sumLeft*sumRight)); }
/// <summary> /// Calculates the a term of the quadratic regression function (y = a*x^2 + b*x + c) /// given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The a term of y = a*x^2 + b*x + c</returns> public static double ATerm3(Statistics y, Statistics x) { if (x.Length < 3) throw new InvalidOperationException("Insufficient pairs of co-ordinates"); // Not L10N //notation sjk to mean the sum of x_i^j*y_i^k. double s40 = x._list.Sum(v => v*v*v*v); //sum of x^4 double s30 = x._list.Sum(v => v*v*v); //sum of x^3 double s20 = x._list.Sum(v => v*v); //sum of x^2 double s10 = x.Sum(); //sum of x double s00 = x.Length; //sum of x^0 * y^0 ie 1 * number of entries double s21 = x._list.Select((v, i) => v*v*y._list[i]).Sum(); //sum of x^2*y double s11 = x._list.Select((v, i) => v*y._list[i]).Sum(); //sum of x*y double s01 = y.Sum(); //sum of y //a = Da/D return (s21 * (s20 * s00 - s10 * s10) - s11 * (s30 * s00 - s10 * s20) + s01 * (s30 * s10 - s20 * s20)) / (s40 * (s20 * s00 - s10 * s10) - s30 * (s30 * s00 - s10 * s20) + s20 * (s30 * s10 - s20 * s20)); }
/// <summary> /// Calculates the dot-product or cos(angle) between two vectors, /// using the square roots of the values in the vectors. /// </summary> /// <param name="s">The other vector</param> /// <returns>Dot-Product of square roots of values in vectors</returns> public double AngleSqrt(Statistics s) { var stat1 = new Statistics(_list.Select(Math.Sqrt)); var stat2 = new Statistics(s._list.Select(Math.Sqrt)); return stat1.Angle(stat2); }