/// <summary> /// Calculates the Beta coefficient (slope) of the linear regression function /// given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The Beta coefficient</returns> public static double Beta(Statistics y, Statistics x) { try { return Covariance(y, x) / (Math.Pow(x.StdDev(), 2)); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the covariance between two sets of numbers. /// </summary> /// <param name="s1">First set of numbers</param> /// <param name="s2">Second set of numbers</param> /// <returns></returns> public static double Covariance(Statistics s1, Statistics s2) { try { if (s1.Length != s2.Length) return double.NaN; int len = s1.Length; double sumMul = 0; for (int i = 0; i < len; i++) sumMul += (s1._list[i] * s2._list[i]); return (sumMul - len * s1.Mean() * s2.Mean()) / (len - 1); } catch (Exception) { return double.NaN; } }
private RegressionWithOutliers(IList<double> originalTimes, IList<double> targetTimes, ISet<int> outlierIndexes) { if (originalTimes.Count != targetTimes.Count) { throw new ArgumentException("Value lists must have same length"); } OriginalTimes = originalTimes; TargetTimes = targetTimes; OutlierIndexes = outlierIndexes; var statsTarget = new Statistics(TargetTimes.Where((value, index) => !outlierIndexes.Contains(index)).ToArray()); var statsOriginal = new Statistics(OriginalTimes.Where((value, index) => !outlierIndexes.Contains(index)).ToArray()); Debug.Assert(statsTarget.Length == statsOriginal.Length); Debug.Assert(statsTarget.Length == TotalCount - outlierIndexes.Count); Slope = statsTarget.Slope(statsOriginal); Intercept = statsTarget.Intercept(statsOriginal); R = statsTarget.R(statsOriginal); if (double.IsNaN(R)) { R = 0; } }
/// <summary> /// Calculates the variance for a set of numbers from a weighted mean. /// See: /// http://en.wikipedia.org/wiki/Weighted_mean /// </summary> /// <param name="weights">The weights</param> /// <returns>Variance from weighted mean</returns> public double Variance(Statistics weights) { try { double s = 0; for (int i = 0; i < _list.Length; i++) s += weights._list[i] * Math.Pow(_list[i], 2); return (s / weights.Mean() - _list.Length * Math.Pow(Mean(weights), 2)) / (_list.Length - 1); } catch (Exception) { return double.NaN; } }
public double StdErr(Statistics weights) { return StdDev(weights) / Math.Sqrt(_list.Length); }
/// <summary> /// Calculates the dot-product or cos(angle) between two vectors, /// with both normalized to a unit vector first. /// </summary> /// <param name="s">The other vector</param> /// <returns>Dot-Product of normalized vectors</returns> public double AngleUnitVector(Statistics s) { var stat1 = NormalizeUnit(); var stat2 = s.NormalizeUnit(); return stat1.Angle(stat2); }
Result(Statistics statistics) { _statistics = statistics; }
private LinearRegression GetLinearRegression(TracerFormula targetTracerFormula, double startTime, double endTime) { var baseStats = new Statistics(GetBaseValues()); var targetStats = new Statistics(GetValues(targetTracerFormula, startTime, endTime)); return Statistics.LinearRegressionWithErrorsInBothCoordinates(baseStats, targetStats); }
/// <summary> /// Calculates a Costa Soares correlation coefficient between this and /// another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <returns>Correlation coefficient</returns> public double CostaSoares(Statistics s) { return CostaSoares(s, int.MaxValue); }
private int[] FixZeroRanks(int[] ranks, Statistics sOther, int[] ranksOther) { if (!_list.Contains(0)) return ranks; var listNewValues = new List<double>(); foreach (int rank in ranks) listNewValues.Add(rank); var listRankOtherIndices = new List<KeyValuePair<int, int>>(); for (int i = 0; i < _list.Length; i++) { // Look for zero scores if (_list[i] == 0) { // If the other is also zero, just match the rankings. // Otherwise, save this index for to determine its new rank. if (sOther._list[i] == 0) listNewValues[i] = ranksOther[i]; else listRankOtherIndices.Add(new KeyValuePair<int, int>(ranksOther[i], i)); } } // Sort by the rank in the other set listRankOtherIndices.Sort((p1, p2) => Comparer<int>.Default.Compare(p1.Key, p2.Key)); // Make the highest ranked in the other set have the lowest rank in this set int rankNew = Length + listRankOtherIndices.Count; foreach (var pair in listRankOtherIndices) listNewValues[pair.Value] = rankNew--; // Finally convert ranks to values by reversing numeric order for (int i = 0; i < listNewValues.Count; i++) listNewValues[i] = -listNewValues[i]; // And re-rank return new Statistics(listNewValues.ToArray()).Rank(); }
/// <summary> /// Calculates the correlation coefficient between this and /// another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <returns>Correlation coefficient</returns> public double R(Statistics s) { return R(this, s); }
/// <summary> /// Calculates the Alpha coefficient (y-intercept) of the linear /// regression function given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The Alpha coefficient</returns> public static double Alpha(Statistics y, Statistics x) { return y.Mean() - Beta(y, x) * x.Mean(); }
public double Median(Statistics weights) { try { var dict = new Dictionary<double, double>(); for (int i = 0; i < _list.Length; i++) { double value = _list[i]; double weight; dict.TryGetValue(value, out weight); weight += weights._list[i]; dict[value] = weight; } var keys = dict.Keys.ToArray(); Array.Sort(keys); double total = weights.Sum(); double sum = 0; for (int i = 0; i < keys.Length; i++) { sum += dict[keys[i]]; if (sum >= total / 2) { return keys[i]; } } return double.NaN; } catch { return double.NaN; } }
/// <summary> /// Calculates a weighted mean average of the set of numbers. /// See: /// http://en.wikipedia.org/wiki/Weighted_mean /// </summary> /// <param name="weights">The weights</param> /// <returns>Weighted mean</returns> public double Mean(Statistics weights) { try { double sum = 0; for (int i = 0; i < _list.Length; i++) sum += _list[i] * weights._list[i]; return sum / weights.Sum(); } catch (Exception) { return double.NaN; } }
/// <summary> /// Calculates the y-intercept (Alpha coefficient) of the linear /// regression function using the current set of numbers as Y values /// and another set as X values. /// </summary> /// <param name="x">X values</param> /// <returns>The y-intercept</returns> public double Intercept(Statistics x) { return Alpha(x); }
/// <summary> /// Calculates the covariance between this and another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <returns>Covariance</returns> public double Covariance(Statistics s) { return Covariance(this, s); }
/// <summary> /// Calculates a Costa Soares correlation coefficient between this and /// another set of numbers. /// </summary> /// <param name="s">Second set of numbers</param> /// <param name="limitRank">Exclude pairs where both rank below this limit</param> /// <returns>Correlation coefficient</returns> public double CostaSoares(Statistics s, int limitRank) { Debug.Assert(Length == s.Length); int n = Length; int[] a = Rank(); int[] b = s.Rank(); a = FixZeroRanks(a, s, b); b = s.FixZeroRanks(b, this, a); double total = 0; for (int i = 0; i < n; i++) { if (a[i] <= limitRank || b[i] <= limitRank) total += Math.Pow(a[i] - b[i], 2) * ((n - a[i] + 1) + (n - b[i] + 1)); } double n2 = n * n; double n3 = n * n2; double n4 = n * n3; total *= 6.0 / (n4 + n3 - n2 - n); total = 1 - total; return total; }
private static double StdDevY(Statistics y, Statistics x) { double s = 0; Statistics residuals = Residuals(y, x); foreach (double value in residuals._list) s += Math.Pow(value, 2); return Math.Sqrt(s / (residuals._list.Length - 2)); }
private static double StdDevYWithoutIntercept(Statistics y, Statistics x) { Statistics residuals = ResidualsWithoutIntercept(y, x); return Math.Sqrt(residuals.SumOfSquares()/(residuals._list.Length - 2)); }
private void SetColumnValues(DataColumns dataColumns, DataGridViewRow row, Statistics stats) { row.Cells[dataColumns.ValueColumn.Index].Value = stats.Length > 0 ? stats.Mean() : (double?) null; row.Cells[dataColumns.StdDevColumn.Index].Value = stats.Length > 1 ? stats.StdDev() : (double?) null; row.Cells[dataColumns.StdErrColumn.Index].Value = stats.Length > 1 ? stats.StdErr() : (double?) null; }
/// <summary> /// Calculates the residuals of the linear regression function /// using the current set of numbers as Y values and another set /// as X values. /// </summary> /// <param name="x">X values</param> /// <returns>A set of residuals</returns> public Statistics Residuals(Statistics x) { return Residuals(this, x); }
/// <summary> /// Calculates the Beta coefficient (slope) of the linear regression function /// using the current set of numbers as Y values and another set /// as X values. /// </summary> /// <param name="x">X values</param> /// <returns>The Beta coefficient</returns> public double Beta(Statistics x) { return Beta(this, x); }
private ResultData GetResultData(IList<RowData> rowDatas) { var statsArea = new Statistics(rowDatas.Select(r => r.TracerPercentByArea).ToArray()); var statsSlope = new Statistics(rowDatas.Select(r => r.TracerPercentBySlope).ToArray()); var statsTurnover = new Statistics(rowDatas.Where(r => r.Turnover.HasValue).Select(r => r.Turnover.Value).ToArray()); var statsPrecursorEnrichment = new Statistics(rowDatas.Where(r => r.PrecursorEnrichment.HasValue).Select(r => r.PrecursorEnrichment.Value).ToArray()); var statsAreaUnderCurve = new Statistics(rowDatas.Select(r => r.AreaUnderCurve).ToArray()); return new ResultData { TracerPercentByArea = statsArea, TracerPercentBySlope = statsSlope, Turnover = statsTurnover, PrecursorEnrichment = statsPrecursorEnrichment, AreaUnderCurve = statsAreaUnderCurve, }; }
/// <summary> /// Calculates the slope (Beta coefficient) of the linear regression function /// using the current set of numbers as Y values and another set /// as X values. /// </summary> /// <param name="x">X values</param> /// <returns>The slope</returns> public double Slope(Statistics x) { return Beta(x); }
public void RetentionTimeShift(out double rtShift, out double residuals) { var lstX = new List<double>(); var lstY = new List<double>(); foreach (var entry in _peaks) { int eluteBefore, eluteAfter; RelativeElutionTime(TracerFormula.Empty, entry.Key, out eluteBefore, out eluteAfter); lstX.Add(eluteAfter - eluteBefore); lstY.Add((entry.Value.StartTime + entry.Value.EndTime) / 2); } var statsX = new Statistics(lstX.ToArray()); var statsY = new Statistics(lstY.ToArray()); rtShift = statsY.Slope(statsX); residuals = Statistics.StdDevB(statsY, statsX); }
public double SlopeWithoutIntercept(Statistics x) { double dotProduct = 0; for (int i = 0; i < Length; i++) { dotProduct += _list[i]*x._list[i]; } return dotProduct/x.SumOfSquares(); }
private void UpdateStatsGrid(IList<double> xValues, IList<double> yValues) { var timeToValuesDict = new Dictionary<double, IList<double>>(); for (var i = 0; i < xValues.Count(); i ++) { var time = xValues[i]; var value = yValues[i]; IList<double> values; if (!timeToValuesDict.TryGetValue(time, out values)) { values = new List<double>(); timeToValuesDict.Add(time, values); } values.Add(value); } var allTimePoints = new HashSet<double>(Workspace.MsDataFiles .Where(d => d.TimePoint.HasValue) // ReSharper disable PossibleInvalidOperationException .Select(d => d.TimePoint.Value)) // ReSharper restore PossibleInvalidOperationException .ToArray(); Array.Sort(allTimePoints); gridViewStats.Rows.Clear(); if (allTimePoints.Length > 0) { gridViewStats.Rows.Add(allTimePoints.Length); for (int i = 0; i < allTimePoints.Length; i++) { var row = gridViewStats.Rows[i]; var time = allTimePoints[i]; row.Cells[colStatsTime.Index].Value = time; row.Cells[colStatsInclude.Index].Value = !IsTimePointExcluded(time); IList<double> values; if (timeToValuesDict.TryGetValue(time, out values)) { var stats = new Statistics(values.ToArray()); row.Cells[colStatsMean.Index].Value = stats.Mean(); row.Cells[colStatsMedian.Index].Value = stats.Median(); row.Cells[colStatsStdDev.Index].Value = stats.StdDev(); row.Cells[colStatsPointCount.Index].Value = stats.Length; } } } }
/// <summary> /// Calculates the stadard deviation (sqrt(variance)) of the set /// of numbers from a weighted mean. /// </summary> /// <param name="weights">The weights</param> /// <returns>Standard deviation from weighted mean</returns> public double StdDev(Statistics weights) { return Math.Sqrt(Variance(weights)); }
public static Result FromStatistics(Statistics statistics) { return new Result(statistics); }
/// <summary> /// Calculates the dot-product or cos(angle) between two vectors, /// using the square roots of the values in the vectors. /// </summary> /// <param name="s">The other vector</param> /// <returns>Dot-Product of square roots of values in vectors</returns> public double AngleSqrt(Statistics s) { var listNormal1 = new List<double>(_list); listNormal1 = listNormal1.ConvertAll(val => Math.Sqrt(val)); var stat1 = new Statistics(listNormal1.ToArray()); var listNormal2 = new List<double>(s._list); listNormal2 = listNormal2.ConvertAll(val => Math.Sqrt(val)); var stat2 = new Statistics(listNormal2.ToArray()); return stat1.Angle(stat2); }