public static ProcessedIrtAverages ProcessRetentionTimesCirt(IProgressMonitor monitor, IRetentionTimeProvider[] providers, DbIrtPeptide[] cirtPeptides, int numCirt, IrtRegressionType regressionType, out DbIrtPeptide[] chosenCirtPeptides) { chosenCirtPeptides = new DbIrtPeptide[0]; var irts = new TargetMap <DbIrtPeptide>(IrtStandard.CIRT.Peptides.Select(pep => new KeyValuePair <Target, DbIrtPeptide>(pep.ModifiedTarget, pep))); var targetRts = new Dictionary <Target, List <double> >(); var targetCounts = new Dictionary <Target, int>(); // count how many successful regressions each peptide participated in foreach (var provider in providers) { if (monitor.IsCanceled) { return(null); } var times = ( from pep in cirtPeptides let rt = provider.GetRetentionTime(pep.ModifiedTarget) where rt.HasValue select Tuple.Create(pep.ModifiedTarget, rt.Value, irts[pep.ModifiedTarget].Irt)).ToList(); foreach (var(target, rt, _) in times) { if (targetRts.TryGetValue(target, out var list)) { list.Add(rt); } else { targetRts[target] = new List <double> { rt } }; } var removed = new List <Tuple <double, double> >(); if (!IrtRegression.TryGet <RegressionLine>(times.Select(t => t.Item2).ToList(), times.Select(t => t.Item3).ToList(), MIN_PEPTIDES_COUNT, out _, removed)) { continue; } foreach (var(removeRt, removeIrt) in removed) { times.Remove(times.First(time => time.Item2.Equals(removeRt) && time.Item3.Equals(removeIrt))); } foreach (var(target, _, _) in times) { targetCounts[target] = targetCounts.TryGetValue(target, out var existing) ? existing + 1 : 1; } } // for each target, only keep median retention time var dupTargets = targetRts.Where(kvp => kvp.Value.Count > 1).Select(kvp => kvp.Key).ToArray(); foreach (var target in dupTargets) { targetRts[target] = new List <double> { new Statistics(targetRts[target]).Median() } } ; // separate targets into equal retention time bins var candidateBins = new List <Tuple <Target, double> > [numCirt]; for (var i = 0; i < candidateBins.Length; i++) { candidateBins[i] = new List <Tuple <Target, double> >(); } var minRt = double.MaxValue; var maxRt = -1d; foreach (var rt in targetRts.Values.Select(list => list.First())) { if (rt < minRt) { minRt = rt; } if (rt > maxRt) { maxRt = rt; } } var binSize = (maxRt - minRt) / numCirt; var lastBinIdx = candidateBins.Length - 1; foreach (var target in targetRts) { foreach (var rt in target.Value) { candidateBins[Math.Min((int)((rt - minRt) / binSize), lastBinIdx)].Add(Tuple.Create(target.Key, rt)); } } Tuple <Target, double> GetBest(List <Tuple <Target, double> > bin, int binIdx, out int bestCount, out double bestRtDelta) { if (bin.Count == 0) { bestCount = 0; bestRtDelta = 0; return(null); } bestCount = 0; var filtered = new List <Tuple <Target, double> >(); foreach (var t in bin) { if (!targetCounts.TryGetValue(t.Item1, out var count)) { continue; } if (count > bestCount) { bestCount = count; filtered.Clear(); filtered.Add(t); } else if (count == bestCount) { filtered.Add(t); } } if (filtered.Count == 0) { filtered = bin; } var targetRt = ((minRt + binSize * binIdx) + (minRt + binSize * (binIdx + 1))) / 2; var closest = filtered.Aggregate((x, y) => Math.Abs(x.Item2 - targetRt) < Math.Abs(y.Item2 - targetRt) ? x : y); bestRtDelta = Math.Abs(closest.Item2 - targetRt); return(closest); } var chosenList = new List <DbIrtPeptide>(); var emptyBins = new HashSet <int>(); for (var i = 0; i < candidateBins.Length; i++) { var bin = candidateBins[i]; if (bin.Count > 0) { // choose the best from this bin var best = GetBest(bin, i, out _, out _); chosenList.Add(irts[best.Item1]); bin.Remove(best); } else { emptyBins.Add(i); } } foreach (var emptyIdx in emptyBins) { var bins = new List <int>(); var left = emptyIdx - 1; var right = emptyIdx + 1; while (bins.Count == 0) { if (left >= 0) { bins.Add(left); } if (right < candidateBins.Length) { bins.Add(right); } left--; right++; } Tuple <Target, double> best = null; var bestBinIdx = -1; var bestCount = 0; var bestRtDelta = 0d; foreach (var i in bins) { var current = GetBest(candidateBins[i], i, out var count, out var rtDelta); if (count > bestCount || (count == bestCount && rtDelta < bestRtDelta)) { best = current; bestBinIdx = i; bestCount = count; bestRtDelta = rtDelta; } } if (best != null) { chosenList.Add(irts[best.Item1]); candidateBins[bestBinIdx].Remove(best); } } // Process retention times using the chosen peptides chosenCirtPeptides = chosenList.ToArray(); return(ProcessRetentionTimes(monitor, providers, chosenCirtPeptides, new DbIrtPeptide[0], regressionType)); }
public RetentionTimeProviderData(IrtRegressionType regressionType, IRetentionTimeProvider retentionTimes, IReadOnlyList <DbIrtPeptide> standardPeptides, IReadOnlyList <DbIrtPeptide> heavyStandardPeptides) { RetentionTimeProvider = retentionTimes; Peptides = new List <Peptide>(standardPeptides.Count); for (var i = 0; i < standardPeptides.Count; i++) { var heavy = heavyStandardPeptides[i] != null; var standard = heavy ? heavyStandardPeptides[i] : standardPeptides[i]; var rt = retentionTimes.GetRetentionTime(standard.ModifiedTarget); if (!rt.HasValue && heavy) { standard = standardPeptides[i]; rt = retentionTimes.GetRetentionTime(standard.ModifiedTarget); } Peptides.Add(new Peptide(standard.ModifiedTarget, rt, standard.Irt)); } Peptides.Sort((x, y) => x.Irt.CompareTo(y.Irt)); if (!FilteredPeptides.Any()) { Regression = null; RegressionRefined = null; RegressionSuccess = false; } var filteredRt = FilteredPeptides.Select(pep => pep.RetentionTime.Value).ToList(); var filteredIrt = FilteredPeptides.Select(pep => pep.Irt).ToList(); var removed = new List <Tuple <double, double> >(); if (ReferenceEquals(regressionType, IrtRegressionType.LINEAR)) { Regression = new RegressionLine(filteredRt.ToArray(), filteredIrt.ToArray()); } else if (ReferenceEquals(regressionType, IrtRegressionType.LOGARITHMIC)) { Regression = new LogRegression(filteredRt, filteredIrt); } else if (ReferenceEquals(regressionType, IrtRegressionType.LOWESS)) { Regression = new LoessRegression(filteredRt.ToArray(), filteredIrt.ToArray()); } else { throw new ArgumentException(); } IIrtRegression regressionRefined; if (IrtRegression.Accept(Regression, MinPoints)) { regressionRefined = Regression; Regression = null; RegressionSuccess = true; } else { RegressionSuccess = IrtRegression.TryGet(Regression, filteredRt, filteredIrt, MinPoints, out regressionRefined, removed); } RegressionRefined = regressionRefined; foreach (var remove in removed) { for (var i = 0; i < Peptides.Count; i++) { var peptide = Peptides[i]; if (peptide.RetentionTime.Equals(remove.Item1) && peptide.Irt.Equals(remove.Item2)) { Peptides[i] = new Peptide(peptide, true); } } } }
public static ProcessedIrtAverages ProcessRetentionTimes(IProgressMonitor monitor, IRetentionTimeProvider[] providers, DbIrtPeptide[] standardPeptideList, DbIrtPeptide[] items, IrtRegressionType regressionType) { var heavyStandards = new DbIrtPeptide[standardPeptideList.Length]; var matchedStandard = IrtStandard.WhichStandard(standardPeptideList.Select(pep => pep.ModifiedTarget)); if (matchedStandard != null && matchedStandard.HasDocument) { // Check embedded standard document for known standard to determine if the standard peptides should be heavy // Import iRT standard document into an empty document (rather than just getting the document), because importing also imports the modifications var standardDoc = matchedStandard.ImportTo(new SrmDocument(SrmSettingsList.GetDefault())); standardPeptideList = standardPeptideList.Select(pep => new DbIrtPeptide(pep)).ToArray(); foreach (var dummyPep in standardDoc.Molecules.Where(pep => pep.HasExplicitMods)) { var standardPepIdx = standardPeptideList.IndexOf(pep => dummyPep.ModifiedTarget.Equals(pep.ModifiedTarget)); if (standardPepIdx < 0) { continue; } var heavyTarget = standardDoc.Settings.GetModifiedSequence(dummyPep.ModifiedTarget, IsotopeLabelType.heavy, dummyPep.ExplicitMods); if (!standardPeptideList[standardPepIdx].ModifiedTarget.Equals(heavyTarget)) { heavyStandards[standardPepIdx] = new DbIrtPeptide(standardPeptideList[standardPepIdx]) { ModifiedTarget = heavyTarget } } ; } } IProgressStatus status = new ProgressStatus(Resources.LibraryGridViewDriver_ProcessRetentionTimes_Adding_retention_times); var dictPeptideAverages = new Dictionary <Target, IrtPeptideAverages>(); var providerData = new List <RetentionTimeProviderData>(); var runCount = 0; foreach (var retentionTimeProvider in providers) { if (monitor.IsCanceled) { return(null); } monitor.UpdateProgress(status = status.ChangeMessage(string.Format( Resources.LibraryGridViewDriver_ProcessRetentionTimes_Converting_retention_times_from__0__, retentionTimeProvider.Name))); runCount++; var data = new RetentionTimeProviderData(regressionType, retentionTimeProvider, standardPeptideList, heavyStandards); if (data.RegressionSuccess || (ReferenceEquals(regressionType, IrtRegressionType.LINEAR) && data.CalcRegressionWith(retentionTimeProvider, standardPeptideList, items))) { AddRetentionTimesToDict(retentionTimeProvider, data.RegressionRefined, dictPeptideAverages, standardPeptideList); } providerData.Add(data); monitor.UpdateProgress(status = status.ChangePercentComplete(runCount * 100 / providers.Length)); } monitor.UpdateProgress(status.Complete()); return(new ProcessedIrtAverages(dictPeptideAverages, providerData)); }