private RegressionWithOutliers(IList<double> originalTimes, IList<double> targetTimes, ISet<int> outlierIndexes) { if (originalTimes.Count != targetTimes.Count) { throw new ArgumentException("Value lists must have same length"); } OriginalTimes = originalTimes; TargetTimes = targetTimes; OutlierIndexes = outlierIndexes; var statsTarget = new Statistics(TargetTimes.Where((value, index) => !outlierIndexes.Contains(index)).ToArray()); var statsOriginal = new Statistics(OriginalTimes.Where((value, index) => !outlierIndexes.Contains(index)).ToArray()); Debug.Assert(statsTarget.Length == statsOriginal.Length); Debug.Assert(statsTarget.Length == TotalCount - outlierIndexes.Count); Slope = statsTarget.Slope(statsOriginal); Intercept = statsTarget.Intercept(statsOriginal); R = statsTarget.R(statsOriginal); if (double.IsNaN(R)) { R = 0; } }
public void RetentionTimeShift(out double rtShift, out double residuals) { var lstX = new List<double>(); var lstY = new List<double>(); foreach (var entry in _peaks) { int eluteBefore, eluteAfter; RelativeElutionTime(TracerFormula.Empty, entry.Key, out eluteBefore, out eluteAfter); lstX.Add(eluteAfter - eluteBefore); lstY.Add((entry.Value.StartTime + entry.Value.EndTime) / 2); } var statsX = new Statistics(lstX.ToArray()); var statsY = new Statistics(lstY.ToArray()); rtShift = statsY.Slope(statsX); residuals = Statistics.StdDevB(statsY, statsX); }
private ResultData CalculateHalfLife(ICollection<ProcessedRowData> rowDatas) { IEnumerable<ProcessedRowData> filteredRowDatas; if (EvviesFilter != EvviesFilterEnum.None) { var applicableRowDatas = new List<ProcessedRowData>(); var values = new Dictionary<double, List<double>>(); var filteredRowDataList = new List<ProcessedRowData>(); foreach (var rowData in rowDatas) { Debug.Assert(RejectReason.EvviesFilter != rowData.RejectReason); if (null != rowData.RejectReason) { continue; } var value = rowData.Turnover; if (!value.HasValue || double.IsNaN(value.Value) || double.IsInfinity(value.Value)) { continue; } var timePoint = GetTimePoint(rowData.RawRowData); if (!timePoint.HasValue) { filteredRowDataList.Add(rowData); continue; } List<double> list; if (!values.TryGetValue(timePoint.Value, out list)) { list = new List<double>(); values.Add(timePoint.Value, list); } list.Add(value.Value); applicableRowDatas.Add(rowData); } if (EvviesFilter == EvviesFilterEnum.Oct2011) { foreach (var entry in values.ToArray()) { var statistics = new Statistics(entry.Value.ToArray()); var min = statistics.Median() - 3*statistics.StdDev(); var max = statistics.Median() + 3*statistics.StdDev(); if (statistics.Median() + 2 * statistics.StdDev() >= .99) { // Throw away any values of 100% or 99% if they are more than 2 SD above the median. max = Math.Min(.99, max); } var newValues = entry.Value.Where(v => v >= min && v <= max).ToList(); if (newValues.Count != entry.Value.Count) { values[entry.Key] = newValues; } } } var cutoffs = new Dictionary<double, KeyValuePair<double, double>>(); foreach (var entry in values) { var statistics = new Statistics(entry.Value.ToArray()); var mean = statistics.Mean(); var stdDev = statistics.StdDev(); double cutoff; if (EvviesFilter == EvviesFilterEnum.TwoStdDev) { cutoff = 2*stdDev; } else { if (stdDev / mean < .3) { cutoff = 2 * stdDev; } else { cutoff = stdDev; } } cutoffs.Add(entry.Key, new KeyValuePair<double, double>(mean - cutoff, mean + cutoff)); } foreach (var rowData in applicableRowDatas) { var cutoff = cutoffs[GetTimePoint(rowData.RawRowData).Value]; var value = rowData.Turnover; rowData.EvviesFilterMin = cutoff.Key; rowData.EvviesFilterMax = cutoff.Value; // Only apply Evvie's Filter to rows that has a time point. if (GetTimePoint(rowData.RawRowData).HasValue) { if (value.Value < cutoff.Key || value.Value > cutoff.Value) { Debug.Assert(null == rowData.RejectReason); rowData.RejectReason = RejectReason.EvviesFilter; continue; } } filteredRowDataList.Add(rowData); } filteredRowDatas = filteredRowDataList; } else { filteredRowDatas = rowDatas.Where(rowData=>null == rowData.RejectReason).ToArray(); } if (HalfLifeSettings.SimpleLinearRegression) { var timePoints = new List<double>(); var logValues = new List<double>(); foreach (var rowData in filteredRowDatas) { if (null != rowData.RejectReason) { continue; } double? logValue = Math.Log(1-rowData.Turnover.Value); if (!logValue.HasValue || double.IsNaN(logValue.Value) || double.IsInfinity(logValue.Value)) { rowData.RejectReason = RejectReason.ValueOutOfRange; continue; } double? timePoint = GetTimePoint(rowData.RawRowData); if (!timePoint.HasValue || ExcludedTimePoints.Contains(timePoint.Value)) { rowData.RejectReason = RejectReason.NoTimePoint; continue; } logValues.Add(logValue.Value); timePoints.Add(timePoint.Value); } var statsTimePoints = new Statistics(timePoints.ToArray()); var statsLogValues = new Statistics(logValues.ToArray()); double rateConstant, stDevRateConstant, rateConstantError, yIntercept; double? rSquared = null; if (FixedInitialPercent) { rateConstant = statsLogValues.SlopeWithoutIntercept(statsTimePoints); stDevRateConstant = Statistics.StdDevSlopeWithoutIntercept(statsLogValues, statsTimePoints); rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 1); yIntercept = 0; } else { rateConstant = statsLogValues.Slope(statsTimePoints); stDevRateConstant = Statistics.StdDevB(statsLogValues, statsTimePoints); rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 2); yIntercept = Statistics.Intercept(statsLogValues, statsTimePoints); rSquared = Math.Pow(Statistics.R(statsLogValues, statsTimePoints), 2); } return new ResultData { RateConstant = rateConstant, RateConstantStdDev = stDevRateConstant, RateConstantError = rateConstantError, PointCount = timePoints.Count, YIntercept = yIntercept, RSquared = rSquared, RowDatas = rowDatas.ToArray(), FilteredRowDatas = filteredRowDatas.ToArray(), }; } else { var dataPoints = new List<KeyValuePair<double, double>>(); foreach (var rowData in filteredRowDatas) { double? time = rowData.RawRowData.MsDataFile.TimePoint; double? y; y = 1-rowData.Turnover; if (!y.HasValue || !time.HasValue) { continue; } dataPoints.Add(new KeyValuePair<double, double>(time.Value, y.Value)); } var timePoints = Workspace.MsDataFiles.Select(msDataFile => msDataFile.TimePoint) .Where(timePoint => timePoint.HasValue).ToList(); var resultData = new ResultData { PointCount = dataPoints.Count, FilteredRowDatas = filteredRowDatas.ToArray(), RowDatas = rowDatas.ToArray(), }; if (resultData.RowDatas.Count == 0 || timePoints.Count == 0) { resultData.RateConstant = double.NaN; resultData.YIntercept = double.NaN; return resultData; } NelderMeadSimplex.SimplexConstant[] initialParameters; double convergenceTolerance = 0; int maxEvaluations = 1000; if (FixedInitialPercent) { timePoints.Add(0); double timePointDifference = timePoints.Max().Value - timePoints.Min().Value; initialParameters = new[] {new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference)}; var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations, constants => SumOfResidualsSquared( x => Math.Exp(-constants[0]*x), dataPoints)); resultData.RateConstant = -regressionResult.Constants[0]; } else { double timePointDifference = timePoints.Max().Value - timePoints.Min().Value; initialParameters = new[] { new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference), new NelderMeadSimplex.SimplexConstant(0, .1), }; var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations, constants=>SumOfResidualsSquared(x=>Math.Exp(-constants[0] * x + constants[1]), dataPoints)); resultData.RateConstant = -regressionResult.Constants[0]; resultData.YIntercept = regressionResult.Constants[1]; } return resultData; } }