public List <DeconvolutedEnvelope> RunEnvelopeParsimony(List <DeconvolutedEnvelope> envelopeCandidates, MzSpectrum spectrum) { var parsimoniousEnvelopes = new List <DeconvolutedEnvelope>(); var peaksBuffer = new List <DeconvolutedPeak>(); HashSet <double> mzsClaimed = new HashSet <double>(); List <DeconvolutedEnvelope> overlappingEnvelopes = new List <DeconvolutedEnvelope>(); // greedy algorithm. pick a set of mutually-exclusive isotopic envelopes, ordered by score, // until no more envelopes can be found in the spectrum DeconvolutedEnvelope nextEnvelope = GetNextBestEnvelope(envelopeCandidates, null); bool harmonicsRemoved = false; while (nextEnvelope != null) { DeconvolutedEnvelope chosenEnvelope = nextEnvelope; parsimoniousEnvelopes.Add(chosenEnvelope); foreach (var peak in chosenEnvelope.Peaks) { mzsClaimed.Add(peak.ExperimentalMz); } overlappingEnvelopes.Clear(); overlappingEnvelopes.AddRange(envelopeCandidates.Where(p => p.Peaks.Any(v => mzsClaimed.Contains(v.ExperimentalMz)))); foreach (DeconvolutedEnvelope overlappingEnvelope in overlappingEnvelopes.OrderByDescending(p => p.Score)) { int mzIndex = spectrum.GetClosestPeakIndex(overlappingEnvelope.Peaks.First().ExperimentalMz); int candidateEnvelopeIndex = envelopeCandidates.IndexOf(overlappingEnvelope); DeconvolutedEnvelope redeconEnv = GetIsotopicEnvelope(spectrum, mzIndex, overlappingEnvelope.Charge, peaksBuffer, mzsClaimed); if (redeconEnv != null) { redeconEnv.NoiseFwhm = overlappingEnvelope.NoiseFwhm; redeconEnv.SignalToNoise = overlappingEnvelope.SignalToNoise; redeconEnv.Baseline = overlappingEnvelope.Baseline; } envelopeCandidates[candidateEnvelopeIndex] = redeconEnv; } // remove all invalid isotopic envelopes envelopeCandidates.RemoveAll(p => p == null); //remove harmonics if (chosenEnvelope.MonoisotopicMass > 10000 && !harmonicsRemoved) { RemoveHarmonics(envelopeCandidates, chosenEnvelope, spectrum); harmonicsRemoved = true; } // get the next-best envelope nextEnvelope = GetNextBestEnvelope(envelopeCandidates, chosenEnvelope); } return(parsimoniousEnvelopes); }
public List <DeconvolutedEnvelope> GetEnvelopeCandidates(MzSpectrum spectrum, MzRange mzRange, List <int> optionalIndicies = null) { List <DeconvolutedEnvelope> envelopeCandidates = new List <DeconvolutedEnvelope>(); List <DeconvolutedPeak> peaksBuffer = new List <DeconvolutedPeak>(); HashSet <double> mzsClaimed = new HashSet <double>(); // this is empty, no m/z peaks have been claimed yet HashSet <int> potentialChargeStates = new HashSet <int>(); // get list of envelope candidates for this scan for (int p = 0; p < spectrum.XArray.Length; p++) { double mz = spectrum.XArray[p]; if (optionalIndicies != null && !optionalIndicies.Contains(p)) { continue; } // check to see if this peak is in the m/z deconvolution range if (mz < mzRange.Minimum) { continue; } else if (mz > mzRange.Maximum) { break; } // get rough list of charge states to check for based on m/z peaks around this peak potentialChargeStates = GetPotentialChargeStates(potentialChargeStates, spectrum, p); // examine different charge state possibilities and get corresponding envelope candidates foreach (int z in potentialChargeStates) { DeconvolutedEnvelope candidateEnvelope = GetIsotopicEnvelope(spectrum, p, z, peaksBuffer, mzsClaimed); if (candidateEnvelope != null) { envelopeCandidates.Add(candidateEnvelope); } } } return(envelopeCandidates); }
public void RemoveHarmonics(List <DeconvolutedEnvelope> envelopeCandidates, DeconvolutedEnvelope previousBestEnvelope, MzSpectrum spectrum) { List <DeconvolutedEnvelope> harmonicEnvelopes = new List <DeconvolutedEnvelope>(); foreach (DeconvolutedEnvelope envelope in envelopeCandidates) { for (int i = 2; i < MaxCharge / envelope.Charge; i++) { int harmonicZ = envelope.Charge * i; double mainMz = envelope.Peaks.First().ExperimentalMz; var theoreticalHarmonicMz = (mainMz.ToMass(harmonicZ) + Constants.C13MinusC12).ToMz(harmonicZ); int ind = spectrum.GetClosestPeakIndex(theoreticalHarmonicMz); double experimentalHarmonicMz = spectrum.XArray[ind]; if (PpmTolerance.Within(experimentalHarmonicMz.ToMass(harmonicZ), theoreticalHarmonicMz.ToMass(harmonicZ)) && experimentalHarmonicMz != mainMz) { var harmonicCandidates = envelopeCandidates.Where(p => p.Charge == harmonicZ && p.Peaks.Any(v => v.ExperimentalMz == experimentalHarmonicMz || v.ExperimentalMz == mainMz)) .ToList(); // DEBUG if (harmonicCandidates.Any()) { harmonicEnvelopes.Add(envelope); } } // TODO: look for -1 dalton peak } } foreach (DeconvolutedEnvelope harmonicEnvelope in harmonicEnvelopes) { envelopeCandidates.Remove(harmonicEnvelope); } }
private DeconvolutedEnvelope GetNextBestEnvelope(List <DeconvolutedEnvelope> envelopeCandidates, DeconvolutedEnvelope previousBestEnvelope) { if (previousBestEnvelope == null) { return(envelopeCandidates.OrderByDescending(p => p.Score).FirstOrDefault()); } List <DeconvolutedEnvelope> sameMassEnvelopes = new List <DeconvolutedEnvelope>(); for (int i = -3; i <= 3; i++) { double monoMass = i * Constants.C13MinusC12 + previousBestEnvelope.MonoisotopicMass; sameMassEnvelopes.AddRange(envelopeCandidates.Where(p => PpmTolerance.Within(p.MonoisotopicMass, monoMass))); } if (sameMassEnvelopes.Any()) { return(sameMassEnvelopes.OrderByDescending(p => p.Score).FirstOrDefault()); } return(envelopeCandidates.OrderByDescending(p => p.Score).FirstOrDefault()); }
public DeconvolutedEnvelope GetIsotopicEnvelope(MzSpectrum spectrum, int p, int z, List <DeconvolutedPeak> deconvolutedPeaks, HashSet <double> alreadyClaimedMzs) { double mz = spectrum.XArray[p]; double intensity = spectrum.YArray[p]; if (alreadyClaimedMzs.Contains(mz)) { return(null); } deconvolutedPeaks.Clear(); double mass = mz.ToMass(z); // get the index of an averagine envelope close in mass int massIndex = GetMassIndex(mass) + 1; if (massIndex >= mostIntenseMasses.Length) { return(null); } double[] averagineEnvelopeMasses = allMasses[massIndex]; double[] averagineEnvelopeIntensities = allIntensities[massIndex]; double monoMass = mass - diffToMonoisotopic[massIndex]; int indOfMostIntense = Array.IndexOf(averagineEnvelopeIntensities, 1); // 1 is to the right, -1 is to the left in the envelope int isotopeDirection = 1; for (int i = indOfMostIntense; i < averagineEnvelopeMasses.Length && i >= 0; i += isotopeDirection) { double isotopeMassShift = averagineEnvelopeMasses[i] - averagineEnvelopeMasses[indOfMostIntense]; double isotopeTheoreticalMass = mass + isotopeMassShift; double theoreticalIsotopeMz = isotopeTheoreticalMass.ToMz(z); double theoreticalIsotopeIntensity = averagineEnvelopeIntensities[i] * intensity; var peakIndex = spectrum.GetClosestPeakIndex(theoreticalIsotopeMz); //TODO: look for other peaks in the scan that could be this isotope that meet the m/z tolerance var isotopeExperMz = spectrum.XArray[peakIndex]; var isotopeExperIntensity = spectrum.YArray[peakIndex]; double intensityRatio = isotopeExperIntensity / theoreticalIsotopeIntensity; double isotopeExperimentalMass = isotopeExperMz.ToMass(z); bool withinMassTol = PpmTolerance.Within(isotopeExperMz.ToMass(z), isotopeTheoreticalMass); bool withinIntensityTol = intensityRatio < IntensityRatioLimit && intensityRatio > 1 / IntensityRatioLimit; bool unclaimedMz = !alreadyClaimedMzs.Contains(isotopeExperMz) && !deconvolutedPeaks.Select(p => p.ExperimentalMz).Contains(isotopeExperMz); if (withinMassTol && // check mass tolerance withinIntensityTol && // check intensity tolerance unclaimedMz) // check to see if this peak has already been claimed by another envelope or this envelope { deconvolutedPeaks.Add(new DeconvolutedPeak(isotopeExperMz, theoreticalIsotopeMz, z, isotopeExperIntensity, theoreticalIsotopeIntensity, i, averagineEnvelopeIntensities[i])); } else { if (isotopeDirection == 1) { isotopeDirection = -1; i = indOfMostIntense; } else { break; } } } if (deconvolutedPeaks.Count < 2) { return(null); } // calculate % intensity missing double sumIntensity = deconvolutedPeaks.Sum(p => Math.Min(p.ExperimentalIntensity, p.TheoreticalIntensity)); double expectedTotalIntensity = 0; double maxTheorIntensity = deconvolutedPeaks.First().TheoreticalIntensity; for (int i = 0; i < averagineEnvelopeIntensities.Length; i++) { double expectedIsotopeIntensity = averagineEnvelopeIntensities[i] * maxTheorIntensity; expectedTotalIntensity += expectedIsotopeIntensity; } double fracIntensityObserved = sumIntensity / expectedTotalIntensity; if (fracIntensityObserved < MinFractionIntensityRequired) { return(null); } // calculate correlation to averagine double corr = Correlation.Pearson(deconvolutedPeaks.Select(p => p.ExperimentalIntensity), deconvolutedPeaks.Select(p => p.TheoreticalIntensity)); DeconvolutedEnvelope env = null; // this is just to save memory, but quality filtering can happen outside of this method after the envelope has been returned, if desired if (corr >= PearsonCorrelationRequired) { // create + return the isotopic envelope object env = new DeconvolutedEnvelope(deconvolutedPeaks.ToList(), monoMass, z, corr, fracIntensityObserved); } else { // see if a subset of the peaks is a valid envelope List <DeconvolutedEnvelope> subsetEnvelopeCandidates = new List <DeconvolutedEnvelope>(); //TODO: calculate subsets of peaks that have been gathered. return the best one that meets the filtering criteria, // or all of the ones that meet the filtering criteria? var sortedPeaks = deconvolutedPeaks.OrderBy(p => p.TheoreticalMz).ToList(); List <DeconvolutedPeak> subsetPeaks = new List <DeconvolutedPeak>(); for (int start = 0; start < sortedPeaks.Count; start++) { for (int end = sortedPeaks.Count - 1; end >= start + 1; end--) { subsetPeaks.Clear(); for (int k = start; k <= end; k++) { subsetPeaks.Add(sortedPeaks[k]); } //TODO: make this more efficient by changing start + end if (!subsetPeaks.Any(p => p.ExperimentalMz == mz)) { continue; } corr = Correlation.Pearson(subsetPeaks.Select(p => p.ExperimentalIntensity), subsetPeaks.Select(p => p.TheoreticalIntensity)); sumIntensity = subsetPeaks.Sum(p => Math.Min(p.ExperimentalIntensity, p.TheoreticalIntensity)); fracIntensityObserved = sumIntensity / expectedTotalIntensity; if (corr >= PearsonCorrelationRequired && fracIntensityObserved >= MinFractionIntensityRequired && subsetPeaks.Count >= 2) { var subsetEnvelope = new DeconvolutedEnvelope(subsetPeaks.OrderBy(p => Math.Abs(p.ExperimentalMz - mz)).ToList(), monoMass, z, corr, fracIntensityObserved); subsetEnvelopeCandidates.Add(subsetEnvelope); } } } // use the best subset isotopic envelope env = subsetEnvelopeCandidates.OrderByDescending(p => p.Score).FirstOrDefault(); } return(env); }