private static RegressionLine CalcConversion(IList <TimeScorePair> listPepCorr, int minCount) { var listTime = listPepCorr.Select(p => p.Time).ToList(); var listScore = listPepCorr.Select(p => p.Score).ToList(); return(IrtRegression.TryGet <RegressionLine>(listScore, listTime, minCount, out var line) ? (RegressionLine)line : null); }
private bool TryGetCirtRegression(int count, out RegressionLine regression, out List <ScoredPeptide> peptides) { peptides = new List <ScoredPeptide>(_cirtPeptides); var rts = _cirtPeptides.Select(pep => pep.Peptide.RetentionTime).ToList(); var irts = _cirtPeptides.Select(pep => _cirtAll[pep.Peptide.Target]).ToList(); var removedValues = new List <Tuple <double, double> >(); var success = IrtRegression.TryGet <RegressionLine>(rts, irts, count, out var line, removedValues); regression = (RegressionLine)line; if (!success) { return(false); } for (var i = peptides.Count - 1; i >= 0; i--) { if (removedValues.Contains(Tuple.Create(rts[i], irts[i]))) { peptides.RemoveAt(i); } } return(peptides.Count >= count); }
public RetentionTimeProviderData(IrtRegressionType regressionType, IRetentionTimeProvider retentionTimes, IReadOnlyList <DbIrtPeptide> standardPeptides, IReadOnlyList <DbIrtPeptide> heavyStandardPeptides) { RetentionTimeProvider = retentionTimes; Peptides = new List <Peptide>(standardPeptides.Count); for (var i = 0; i < standardPeptides.Count; i++) { var heavy = heavyStandardPeptides[i] != null; var standard = heavy ? heavyStandardPeptides[i] : standardPeptides[i]; var rt = retentionTimes.GetRetentionTime(standard.ModifiedTarget); if (!rt.HasValue && heavy) { standard = standardPeptides[i]; rt = retentionTimes.GetRetentionTime(standard.ModifiedTarget); } Peptides.Add(new Peptide(standard.ModifiedTarget, rt, standard.Irt)); } Peptides.Sort((x, y) => x.Irt.CompareTo(y.Irt)); if (!FilteredPeptides.Any()) { Regression = null; RegressionRefined = null; RegressionSuccess = false; } var filteredRt = FilteredPeptides.Select(pep => pep.RetentionTime.Value).ToList(); var filteredIrt = FilteredPeptides.Select(pep => pep.Irt).ToList(); var removed = new List <Tuple <double, double> >(); if (ReferenceEquals(regressionType, IrtRegressionType.LINEAR)) { Regression = new RegressionLine(filteredRt.ToArray(), filteredIrt.ToArray()); } else if (ReferenceEquals(regressionType, IrtRegressionType.LOGARITHMIC)) { Regression = new LogRegression(filteredRt, filteredIrt); } else if (ReferenceEquals(regressionType, IrtRegressionType.LOWESS)) { Regression = new LoessRegression(filteredRt.ToArray(), filteredIrt.ToArray()); } else { throw new ArgumentException(); } IIrtRegression regressionRefined; if (IrtRegression.Accept(Regression, MinPoints)) { regressionRefined = Regression; Regression = null; RegressionSuccess = true; } else { RegressionSuccess = IrtRegression.TryGet(Regression, filteredRt, filteredIrt, MinPoints, out regressionRefined, removed); } RegressionRefined = regressionRefined; foreach (var remove in removed) { for (var i = 0; i < Peptides.Count; i++) { var peptide = Peptides[i]; if (peptide.RetentionTime.Equals(remove.Item1) && peptide.Irt.Equals(remove.Item2)) { Peptides[i] = new Peptide(peptide, true); } } } }
public static ProcessedIrtAverages ProcessRetentionTimesCirt(IProgressMonitor monitor, IRetentionTimeProvider[] providers, DbIrtPeptide[] cirtPeptides, int numCirt, IrtRegressionType regressionType, out DbIrtPeptide[] chosenCirtPeptides) { chosenCirtPeptides = new DbIrtPeptide[0]; var irts = new TargetMap <DbIrtPeptide>(IrtStandard.CIRT.Peptides.Select(pep => new KeyValuePair <Target, DbIrtPeptide>(pep.ModifiedTarget, pep))); var targetRts = new Dictionary <Target, List <double> >(); var targetCounts = new Dictionary <Target, int>(); // count how many successful regressions each peptide participated in foreach (var provider in providers) { if (monitor.IsCanceled) { return(null); } var times = ( from pep in cirtPeptides let rt = provider.GetRetentionTime(pep.ModifiedTarget) where rt.HasValue select Tuple.Create(pep.ModifiedTarget, rt.Value, irts[pep.ModifiedTarget].Irt)).ToList(); foreach (var(target, rt, _) in times) { if (targetRts.TryGetValue(target, out var list)) { list.Add(rt); } else { targetRts[target] = new List <double> { rt } }; } var removed = new List <Tuple <double, double> >(); if (!IrtRegression.TryGet <RegressionLine>(times.Select(t => t.Item2).ToList(), times.Select(t => t.Item3).ToList(), MIN_PEPTIDES_COUNT, out _, removed)) { continue; } foreach (var(removeRt, removeIrt) in removed) { times.Remove(times.First(time => time.Item2.Equals(removeRt) && time.Item3.Equals(removeIrt))); } foreach (var(target, _, _) in times) { targetCounts[target] = targetCounts.TryGetValue(target, out var existing) ? existing + 1 : 1; } } // for each target, only keep median retention time var dupTargets = targetRts.Where(kvp => kvp.Value.Count > 1).Select(kvp => kvp.Key).ToArray(); foreach (var target in dupTargets) { targetRts[target] = new List <double> { new Statistics(targetRts[target]).Median() } } ; // separate targets into equal retention time bins var candidateBins = new List <Tuple <Target, double> > [numCirt]; for (var i = 0; i < candidateBins.Length; i++) { candidateBins[i] = new List <Tuple <Target, double> >(); } var minRt = double.MaxValue; var maxRt = -1d; foreach (var rt in targetRts.Values.Select(list => list.First())) { if (rt < minRt) { minRt = rt; } if (rt > maxRt) { maxRt = rt; } } var binSize = (maxRt - minRt) / numCirt; var lastBinIdx = candidateBins.Length - 1; foreach (var target in targetRts) { foreach (var rt in target.Value) { candidateBins[Math.Min((int)((rt - minRt) / binSize), lastBinIdx)].Add(Tuple.Create(target.Key, rt)); } } Tuple <Target, double> GetBest(List <Tuple <Target, double> > bin, int binIdx, out int bestCount, out double bestRtDelta) { if (bin.Count == 0) { bestCount = 0; bestRtDelta = 0; return(null); } bestCount = 0; var filtered = new List <Tuple <Target, double> >(); foreach (var t in bin) { if (!targetCounts.TryGetValue(t.Item1, out var count)) { continue; } if (count > bestCount) { bestCount = count; filtered.Clear(); filtered.Add(t); } else if (count == bestCount) { filtered.Add(t); } } if (filtered.Count == 0) { filtered = bin; } var targetRt = ((minRt + binSize * binIdx) + (minRt + binSize * (binIdx + 1))) / 2; var closest = filtered.Aggregate((x, y) => Math.Abs(x.Item2 - targetRt) < Math.Abs(y.Item2 - targetRt) ? x : y); bestRtDelta = Math.Abs(closest.Item2 - targetRt); return(closest); } var chosenList = new List <DbIrtPeptide>(); var emptyBins = new HashSet <int>(); for (var i = 0; i < candidateBins.Length; i++) { var bin = candidateBins[i]; if (bin.Count > 0) { // choose the best from this bin var best = GetBest(bin, i, out _, out _); chosenList.Add(irts[best.Item1]); bin.Remove(best); } else { emptyBins.Add(i); } } foreach (var emptyIdx in emptyBins) { var bins = new List <int>(); var left = emptyIdx - 1; var right = emptyIdx + 1; while (bins.Count == 0) { if (left >= 0) { bins.Add(left); } if (right < candidateBins.Length) { bins.Add(right); } left--; right++; } Tuple <Target, double> best = null; var bestBinIdx = -1; var bestCount = 0; var bestRtDelta = 0d; foreach (var i in bins) { var current = GetBest(candidateBins[i], i, out var count, out var rtDelta); if (count > bestCount || (count == bestCount && rtDelta < bestRtDelta)) { best = current; bestBinIdx = i; bestCount = count; bestRtDelta = rtDelta; } } if (best != null) { chosenList.Add(irts[best.Item1]); candidateBins[bestBinIdx].Remove(best); } } // Process retention times using the chosen peptides chosenCirtPeptides = chosenList.ToArray(); return(ProcessRetentionTimes(monitor, providers, chosenCirtPeptides, new DbIrtPeptide[0], regressionType)); }