/// <summary> /// Calculates the Beta coefficient (slope) of the linear regression function /// given the Y and X values. /// </summary> /// <param name="y">Y values</param> /// <param name="x">X values</param> /// <returns>The Beta coefficient</returns> public static double Beta(Statistics y, Statistics x) { try { return Covariance(y, x) / (Math.Pow(x.StdDev(), 2)); } catch (Exception) { return double.NaN; } }
private void UpdateStatsGrid(IList<double> xValues, IList<double> yValues) { var timeToValuesDict = new Dictionary<double, IList<double>>(); for (var i = 0; i < xValues.Count(); i ++) { var time = xValues[i]; var value = yValues[i]; IList<double> values; if (!timeToValuesDict.TryGetValue(time, out values)) { values = new List<double>(); timeToValuesDict.Add(time, values); } values.Add(value); } var allTimePoints = new HashSet<double>(Workspace.MsDataFiles .Where(d => d.TimePoint.HasValue) // ReSharper disable PossibleInvalidOperationException .Select(d => d.TimePoint.Value)) // ReSharper restore PossibleInvalidOperationException .ToArray(); Array.Sort(allTimePoints); gridViewStats.Rows.Clear(); if (allTimePoints.Length > 0) { gridViewStats.Rows.Add(allTimePoints.Length); for (int i = 0; i < allTimePoints.Length; i++) { var row = gridViewStats.Rows[i]; var time = allTimePoints[i]; row.Cells[colStatsTime.Index].Value = time; row.Cells[colStatsInclude.Index].Value = !IsTimePointExcluded(time); IList<double> values; if (timeToValuesDict.TryGetValue(time, out values)) { var stats = new Statistics(values.ToArray()); row.Cells[colStatsMean.Index].Value = stats.Mean(); row.Cells[colStatsMedian.Index].Value = stats.Median(); row.Cells[colStatsStdDev.Index].Value = stats.StdDev(); row.Cells[colStatsPointCount.Index].Value = stats.Length; } } } }
private void SetColumnValues(DataColumns dataColumns, DataGridViewRow row, Statistics stats) { row.Cells[dataColumns.ValueColumn.Index].Value = stats.Length > 0 ? stats.Mean() : (double?) null; row.Cells[dataColumns.StdDevColumn.Index].Value = stats.Length > 1 ? stats.StdDev() : (double?) null; row.Cells[dataColumns.StdErrColumn.Index].Value = stats.Length > 1 ? stats.StdErr() : (double?) null; }
/// <summary> /// Calculates the correlation coefficient between two sets /// of numbers. /// </summary> /// <param name="s1">First set of numbers</param> /// <param name="s2">Second set of numbers</param> /// <returns>Correlation coefficient</returns> public static double R(Statistics s1, Statistics s2) { try { return Covariance(s1, s2) / (s1.StdDev() * s2.StdDev()); } catch (Exception) { return double.NaN; } }
private ResultData CalculateHalfLife(ICollection<ProcessedRowData> rowDatas) { IEnumerable<ProcessedRowData> filteredRowDatas; if (EvviesFilter != EvviesFilterEnum.None) { var applicableRowDatas = new List<ProcessedRowData>(); var values = new Dictionary<double, List<double>>(); var filteredRowDataList = new List<ProcessedRowData>(); foreach (var rowData in rowDatas) { Debug.Assert(RejectReason.EvviesFilter != rowData.RejectReason); if (null != rowData.RejectReason) { continue; } var value = rowData.Turnover; if (!value.HasValue || double.IsNaN(value.Value) || double.IsInfinity(value.Value)) { continue; } var timePoint = GetTimePoint(rowData.RawRowData); if (!timePoint.HasValue) { filteredRowDataList.Add(rowData); continue; } List<double> list; if (!values.TryGetValue(timePoint.Value, out list)) { list = new List<double>(); values.Add(timePoint.Value, list); } list.Add(value.Value); applicableRowDatas.Add(rowData); } if (EvviesFilter == EvviesFilterEnum.Oct2011) { foreach (var entry in values.ToArray()) { var statistics = new Statistics(entry.Value.ToArray()); var min = statistics.Median() - 3*statistics.StdDev(); var max = statistics.Median() + 3*statistics.StdDev(); if (statistics.Median() + 2 * statistics.StdDev() >= .99) { // Throw away any values of 100% or 99% if they are more than 2 SD above the median. max = Math.Min(.99, max); } var newValues = entry.Value.Where(v => v >= min && v <= max).ToList(); if (newValues.Count != entry.Value.Count) { values[entry.Key] = newValues; } } } var cutoffs = new Dictionary<double, KeyValuePair<double, double>>(); foreach (var entry in values) { var statistics = new Statistics(entry.Value.ToArray()); var mean = statistics.Mean(); var stdDev = statistics.StdDev(); double cutoff; if (EvviesFilter == EvviesFilterEnum.TwoStdDev) { cutoff = 2*stdDev; } else { if (stdDev / mean < .3) { cutoff = 2 * stdDev; } else { cutoff = stdDev; } } cutoffs.Add(entry.Key, new KeyValuePair<double, double>(mean - cutoff, mean + cutoff)); } foreach (var rowData in applicableRowDatas) { var cutoff = cutoffs[GetTimePoint(rowData.RawRowData).Value]; var value = rowData.Turnover; rowData.EvviesFilterMin = cutoff.Key; rowData.EvviesFilterMax = cutoff.Value; // Only apply Evvie's Filter to rows that has a time point. if (GetTimePoint(rowData.RawRowData).HasValue) { if (value.Value < cutoff.Key || value.Value > cutoff.Value) { Debug.Assert(null == rowData.RejectReason); rowData.RejectReason = RejectReason.EvviesFilter; continue; } } filteredRowDataList.Add(rowData); } filteredRowDatas = filteredRowDataList; } else { filteredRowDatas = rowDatas.Where(rowData=>null == rowData.RejectReason).ToArray(); } if (HalfLifeSettings.SimpleLinearRegression) { var timePoints = new List<double>(); var logValues = new List<double>(); foreach (var rowData in filteredRowDatas) { if (null != rowData.RejectReason) { continue; } double? logValue = Math.Log(1-rowData.Turnover.Value); if (!logValue.HasValue || double.IsNaN(logValue.Value) || double.IsInfinity(logValue.Value)) { rowData.RejectReason = RejectReason.ValueOutOfRange; continue; } double? timePoint = GetTimePoint(rowData.RawRowData); if (!timePoint.HasValue || ExcludedTimePoints.Contains(timePoint.Value)) { rowData.RejectReason = RejectReason.NoTimePoint; continue; } logValues.Add(logValue.Value); timePoints.Add(timePoint.Value); } var statsTimePoints = new Statistics(timePoints.ToArray()); var statsLogValues = new Statistics(logValues.ToArray()); double rateConstant, stDevRateConstant, rateConstantError, yIntercept; double? rSquared = null; if (FixedInitialPercent) { rateConstant = statsLogValues.SlopeWithoutIntercept(statsTimePoints); stDevRateConstant = Statistics.StdDevSlopeWithoutIntercept(statsLogValues, statsTimePoints); rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 1); yIntercept = 0; } else { rateConstant = statsLogValues.Slope(statsTimePoints); stDevRateConstant = Statistics.StdDevB(statsLogValues, statsTimePoints); rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 2); yIntercept = Statistics.Intercept(statsLogValues, statsTimePoints); rSquared = Math.Pow(Statistics.R(statsLogValues, statsTimePoints), 2); } return new ResultData { RateConstant = rateConstant, RateConstantStdDev = stDevRateConstant, RateConstantError = rateConstantError, PointCount = timePoints.Count, YIntercept = yIntercept, RSquared = rSquared, RowDatas = rowDatas.ToArray(), FilteredRowDatas = filteredRowDatas.ToArray(), }; } else { var dataPoints = new List<KeyValuePair<double, double>>(); foreach (var rowData in filteredRowDatas) { double? time = rowData.RawRowData.MsDataFile.TimePoint; double? y; y = 1-rowData.Turnover; if (!y.HasValue || !time.HasValue) { continue; } dataPoints.Add(new KeyValuePair<double, double>(time.Value, y.Value)); } var timePoints = Workspace.MsDataFiles.Select(msDataFile => msDataFile.TimePoint) .Where(timePoint => timePoint.HasValue).ToList(); var resultData = new ResultData { PointCount = dataPoints.Count, FilteredRowDatas = filteredRowDatas.ToArray(), RowDatas = rowDatas.ToArray(), }; if (resultData.RowDatas.Count == 0 || timePoints.Count == 0) { resultData.RateConstant = double.NaN; resultData.YIntercept = double.NaN; return resultData; } NelderMeadSimplex.SimplexConstant[] initialParameters; double convergenceTolerance = 0; int maxEvaluations = 1000; if (FixedInitialPercent) { timePoints.Add(0); double timePointDifference = timePoints.Max().Value - timePoints.Min().Value; initialParameters = new[] {new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference)}; var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations, constants => SumOfResidualsSquared( x => Math.Exp(-constants[0]*x), dataPoints)); resultData.RateConstant = -regressionResult.Constants[0]; } else { double timePointDifference = timePoints.Max().Value - timePoints.Min().Value; initialParameters = new[] { new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference), new NelderMeadSimplex.SimplexConstant(0, .1), }; var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations, constants=>SumOfResidualsSquared(x=>Math.Exp(-constants[0] * x + constants[1]), dataPoints)); resultData.RateConstant = -regressionResult.Constants[0]; resultData.YIntercept = regressionResult.Constants[1]; } return resultData; } }