Пример #1
0
 private RegressionWithOutliers(IList<double> originalTimes, IList<double> targetTimes, ISet<int> outlierIndexes)
 {
     if (originalTimes.Count != targetTimes.Count)
     {
         throw new ArgumentException("Value lists must have same length");
     }
     OriginalTimes = originalTimes;
     TargetTimes = targetTimes;
     OutlierIndexes = outlierIndexes;
     var statsTarget = new Statistics(TargetTimes.Where((value, index) => !outlierIndexes.Contains(index)).ToArray());
     var statsOriginal =
         new Statistics(OriginalTimes.Where((value, index) => !outlierIndexes.Contains(index)).ToArray());
     Debug.Assert(statsTarget.Length == statsOriginal.Length);
     Debug.Assert(statsTarget.Length == TotalCount - outlierIndexes.Count);
     Slope = statsTarget.Slope(statsOriginal);
     Intercept = statsTarget.Intercept(statsOriginal);
     R = statsTarget.R(statsOriginal);
     if (double.IsNaN(R))
     {
         R = 0;
     }
 }
Пример #2
0
 public void RetentionTimeShift(out double rtShift, out double residuals)
 {
     var lstX = new List<double>();
     var lstY = new List<double>();
     foreach (var entry in _peaks)
     {
         int eluteBefore, eluteAfter;
         RelativeElutionTime(TracerFormula.Empty, entry.Key, out eluteBefore, out eluteAfter);
         lstX.Add(eluteAfter - eluteBefore);
         lstY.Add((entry.Value.StartTime + entry.Value.EndTime) / 2);
     }
     var statsX = new Statistics(lstX.ToArray());
     var statsY = new Statistics(lstY.ToArray());
     rtShift = statsY.Slope(statsX);
     residuals = Statistics.StdDevB(statsY, statsX);
 }
Пример #3
0
        private ResultData CalculateHalfLife(ICollection<ProcessedRowData> rowDatas)
        {
            IEnumerable<ProcessedRowData> filteredRowDatas;
            if (EvviesFilter != EvviesFilterEnum.None)
            {
                var applicableRowDatas = new List<ProcessedRowData>();
                var values = new Dictionary<double, List<double>>();
                var filteredRowDataList = new List<ProcessedRowData>();
                foreach (var rowData in rowDatas)
                {
                    Debug.Assert(RejectReason.EvviesFilter != rowData.RejectReason);
                    if (null != rowData.RejectReason)
                    {
                        continue;
                    }
                    var value = rowData.Turnover;
                    if (!value.HasValue || double.IsNaN(value.Value) || double.IsInfinity(value.Value))
                    {
                        continue;
                    }
                    var timePoint = GetTimePoint(rowData.RawRowData);
                    if (!timePoint.HasValue)
                    {
                        filteredRowDataList.Add(rowData);
                        continue;
                    }
                    List<double> list;
                    if (!values.TryGetValue(timePoint.Value, out list))
                    {
                        list = new List<double>();
                        values.Add(timePoint.Value, list);
                    }
                    list.Add(value.Value);
                    applicableRowDatas.Add(rowData);
                }
                if (EvviesFilter == EvviesFilterEnum.Oct2011)
                {
                    foreach (var entry in values.ToArray())
                    {
                        var statistics = new Statistics(entry.Value.ToArray());
                        var min = statistics.Median() - 3*statistics.StdDev();
                        var max = statistics.Median() + 3*statistics.StdDev();
                        if (statistics.Median() + 2 * statistics.StdDev() >= .99)
                        {
                            // Throw away any values of 100% or 99% if they are more than 2 SD above the median.
                            max = Math.Min(.99, max);
                        }
                        var newValues = entry.Value.Where(v => v >= min && v <= max).ToList();
                        if (newValues.Count != entry.Value.Count)
                        {
                            values[entry.Key] = newValues;
                        }
                    }
                }

                var cutoffs = new Dictionary<double, KeyValuePair<double, double>>();
                foreach (var entry in values)
                {
                    var statistics = new Statistics(entry.Value.ToArray());
                    var mean = statistics.Mean();
                    var stdDev = statistics.StdDev();
                    double cutoff;
                    if (EvviesFilter == EvviesFilterEnum.TwoStdDev)
                    {
                        cutoff = 2*stdDev;
                    }
                    else
                    {
                        if (stdDev / mean < .3)
                        {
                            cutoff = 2 * stdDev;
                        }
                        else
                        {
                            cutoff = stdDev;
                        }
                    }
                    cutoffs.Add(entry.Key, new KeyValuePair<double, double>(mean - cutoff, mean + cutoff));
                }
                foreach (var rowData in applicableRowDatas)
                {
                    var cutoff = cutoffs[GetTimePoint(rowData.RawRowData).Value];
                    var value = rowData.Turnover;
                    rowData.EvviesFilterMin = cutoff.Key;
                    rowData.EvviesFilterMax = cutoff.Value;
                    // Only apply Evvie's Filter to rows that has a time point.
                    if (GetTimePoint(rowData.RawRowData).HasValue)
                    {
                        if (value.Value < cutoff.Key || value.Value > cutoff.Value)
                        {
                            Debug.Assert(null == rowData.RejectReason);
                            rowData.RejectReason = RejectReason.EvviesFilter;
                            continue;
                        }
                    }
                    filteredRowDataList.Add(rowData);
                }
                filteredRowDatas = filteredRowDataList;
            }
            else
            {
                filteredRowDatas = rowDatas.Where(rowData=>null == rowData.RejectReason).ToArray();
            }
            if (HalfLifeSettings.SimpleLinearRegression)
            {
                var timePoints = new List<double>();
                var logValues = new List<double>();
                foreach (var rowData in filteredRowDatas)
                {
                    if (null != rowData.RejectReason)
                    {
                        continue;
                    }
                    double? logValue = Math.Log(1-rowData.Turnover.Value);
                    if (!logValue.HasValue || double.IsNaN(logValue.Value) || double.IsInfinity(logValue.Value))
                    {
                        rowData.RejectReason = RejectReason.ValueOutOfRange;
                        continue;
                    }
                    double? timePoint = GetTimePoint(rowData.RawRowData);
                    if (!timePoint.HasValue || ExcludedTimePoints.Contains(timePoint.Value))
                    {
                        rowData.RejectReason = RejectReason.NoTimePoint;
                        continue;
                    }
                    logValues.Add(logValue.Value);
                    timePoints.Add(timePoint.Value);
                }
                var statsTimePoints = new Statistics(timePoints.ToArray());
                var statsLogValues = new Statistics(logValues.ToArray());
                double rateConstant, stDevRateConstant, rateConstantError, yIntercept;
                double? rSquared = null;
                if (FixedInitialPercent)
                {
                    rateConstant = statsLogValues.SlopeWithoutIntercept(statsTimePoints);
                    stDevRateConstant = Statistics.StdDevSlopeWithoutIntercept(statsLogValues, statsTimePoints);
                    rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 1);
                    yIntercept = 0;
                }
                else
                {
                    rateConstant = statsLogValues.Slope(statsTimePoints);
                    stDevRateConstant = Statistics.StdDevB(statsLogValues, statsTimePoints);
                    rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 2);
                    yIntercept = Statistics.Intercept(statsLogValues, statsTimePoints);
                    rSquared = Math.Pow(Statistics.R(statsLogValues, statsTimePoints), 2);
                }
                return new ResultData
                {
                    RateConstant = rateConstant,
                    RateConstantStdDev = stDevRateConstant,
                    RateConstantError = rateConstantError,
                    PointCount = timePoints.Count,
                    YIntercept = yIntercept,
                    RSquared = rSquared,
                    RowDatas = rowDatas.ToArray(),
                    FilteredRowDatas = filteredRowDatas.ToArray(),
                };
            }
            else
            {
                var dataPoints = new List<KeyValuePair<double, double>>();
                foreach (var rowData in filteredRowDatas)
                {
                    double? time = rowData.RawRowData.MsDataFile.TimePoint;
                    double? y;
                    y = 1-rowData.Turnover;
                    if (!y.HasValue || !time.HasValue)
                    {
                        continue;
                    }
                    dataPoints.Add(new KeyValuePair<double, double>(time.Value, y.Value));
                }
                var timePoints =
                    Workspace.MsDataFiles.Select(msDataFile => msDataFile.TimePoint)
                    .Where(timePoint => timePoint.HasValue).ToList();
                var resultData = new ResultData
                                     {
                                         PointCount = dataPoints.Count,
                                         FilteredRowDatas = filteredRowDatas.ToArray(),
                                         RowDatas = rowDatas.ToArray(),
                                     };
                if (resultData.RowDatas.Count == 0 || timePoints.Count == 0)
                {
                    resultData.RateConstant = double.NaN;
                    resultData.YIntercept = double.NaN;
                    return resultData;
                }
                NelderMeadSimplex.SimplexConstant[] initialParameters;
                double convergenceTolerance = 0;
                int maxEvaluations = 1000;
                if (FixedInitialPercent)
                {
                    timePoints.Add(0);
                    double timePointDifference = timePoints.Max().Value - timePoints.Min().Value;
                    initialParameters = new[] {new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference)};
                    var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance,
                                                                     maxEvaluations,
                                                                     constants =>
                                                                     SumOfResidualsSquared(
                                                                         x => Math.Exp(-constants[0]*x), dataPoints));
                    resultData.RateConstant = -regressionResult.Constants[0];
                }
                else
                {
                    double timePointDifference = timePoints.Max().Value - timePoints.Min().Value;
                    initialParameters = new[]
                                            {
                                                new NelderMeadSimplex.SimplexConstant(1/timePointDifference,
                                                                                      1.0/10/timePointDifference),
                                                new NelderMeadSimplex.SimplexConstant(0, .1),
                                            };
                    var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations,
                        constants=>SumOfResidualsSquared(x=>Math.Exp(-constants[0] * x + constants[1]), dataPoints));
                    resultData.RateConstant = -regressionResult.Constants[0];
                    resultData.YIntercept = regressionResult.Constants[1];
                }
                return resultData;
            }
        }