public void AddEnvelope(IsotopicEnvelope isotopicEnvelope, int scanIndex, double elutionTime) { MinScanIndex = Math.Min(scanIndex, MinScanIndex); MaxScanIndex = Math.Max(scanIndex, MaxScanIndex); MinElutionTime = Math.Min(elutionTime, MinElutionTime); MaxElutionTime = Math.Max(elutionTime, MaxElutionTime); bool added = false; foreach (var massGroup in groups) { if (Math.Abs(massGroup.Mass - isotopicEnvelope.monoisotopicMass) < 0.5) { massGroup.AddEnvelope(scanIndex, elutionTime, isotopicEnvelope); added = true; break; } } if (!added) { var newMassGroup = new DeconvolutionFeature(); newMassGroup.AddEnvelope(scanIndex, elutionTime, isotopicEnvelope); groups.Add(newMassGroup); } Mass = groups.OrderBy(b => - b.NumPeaks).First().Mass; TotalNormalizedIntensity += isotopicEnvelope.totalIntensity / isotopicEnvelope.charge; if (MostIntenseEnvelope == null || MostIntenseEnvelope.totalIntensity < isotopicEnvelope.totalIntensity) { MostIntenseEnvelope = isotopicEnvelope; MostIntenseEnvelopeElutionTime = elutionTime; } }
// Mass tolerance must account for different isotope spacing! public IEnumerable <IsotopicEnvelope> Deconvolute(MzRange theRange, int minAssumedChargeState, int maxAssumedChargeState, double deconvolutionTolerancePpm, double intensityRatioLimit) { if (Size == 0) { yield break; } var isolatedMassesAndCharges = new List <IsotopicEnvelope>(); foreach (var candidateForMostIntensePeak in ExtractIndices(theRange.Minimum, theRange.Maximum)) { IsotopicEnvelope bestIsotopeEnvelopeForThisPeak = null; var candidateForMostIntensePeakMz = XArray[candidateForMostIntensePeak]; //Console.WriteLine("candidateForMostIntensePeakMz: " + candidateForMostIntensePeakMz); var candidateForMostIntensePeakIntensity = YArray[candidateForMostIntensePeak]; for (int chargeState = minAssumedChargeState; chargeState <= maxAssumedChargeState; chargeState++) { //Console.WriteLine(" chargeState: " + chargeState); var testMostIntenseMass = candidateForMostIntensePeakMz.ToMass(chargeState); var massIndex = Array.BinarySearch(mostIntenseMasses, testMostIntenseMass); if (massIndex < 0) { massIndex = ~massIndex; } if (massIndex == mostIntenseMasses.Length) { //Console.WriteLine("Breaking because mass is too high: " + testMostIntenseMass); break; } //Console.WriteLine(" massIndex: " + massIndex); var listOfPeaks = new List <(double, double)> { (candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity) }; var listOfRatios = new List <double> { allIntensities[massIndex][0] / candidateForMostIntensePeakIntensity }; // Assuming the test peak is most intense... // Try to find the rest of the isotopes! double differenceBetweenTheorAndActual = testMostIntenseMass - mostIntenseMasses[massIndex]; double totalIntensity = candidateForMostIntensePeakIntensity; for (int indexToLookAt = 1; indexToLookAt < allIntensities[massIndex].Length; indexToLookAt++) { //Console.WriteLine(" indexToLookAt: " + indexToLookAt); double theorMassThatTryingToFind = allMasses[massIndex][indexToLookAt] + differenceBetweenTheorAndActual; //Console.WriteLine(" theorMassThatTryingToFind: " + theorMassThatTryingToFind); //Console.WriteLine(" theorMassThatTryingToFind.ToMz(chargeState): " + theorMassThatTryingToFind.ToMz(chargeState)); var closestPeakToTheorMass = GetClosestPeakIndex(theorMassThatTryingToFind.ToMz(chargeState)); var closestPeakmz = XArray[closestPeakToTheorMass.Value]; //Console.WriteLine(" closestPeakmz: " + closestPeakmz); var closestPeakIntensity = YArray[closestPeakToTheorMass.Value]; if (Math.Abs(closestPeakmz.ToMass(chargeState) - theorMassThatTryingToFind) / theorMassThatTryingToFind * 1e6 <= deconvolutionTolerancePpm && Peak2satisfiesRatio(allIntensities[massIndex][0], allIntensities[massIndex][indexToLookAt], candidateForMostIntensePeakIntensity, closestPeakIntensity, intensityRatioLimit) && !listOfPeaks.Contains((closestPeakmz, closestPeakIntensity))) { //Found a match to an isotope peak for this charge state! //Console.WriteLine(" * Found a match to an isotope peak for this charge state!"); //Console.WriteLine(" * chargeState: " + chargeState); //Console.WriteLine(" * closestPeakmz: " + closestPeakmz); listOfPeaks.Add((closestPeakmz, closestPeakIntensity)); totalIntensity += closestPeakIntensity; listOfRatios.Add(allIntensities[massIndex][indexToLookAt] / closestPeakIntensity); }
/// <summary> /// Shortreed's idea for top-down ms1 deconvolution (Jan. 6, 2020) /// Potential utility for non-isotopically resolved envelopes /// deconvolute the whole spectrum by treating every peak as charge 1, 2, 3, etc and recording the masses /// do a histogram to find which masses have multiple charge states: those with high overlap are the real species /// Could possibly avoid doing the whole spectrum by just targeting areas of interest /// </summary> //public IEnumerable<IsotopicEnvelope> TopDownDeconvolution(int minAssumedChargeState, int maxAssumedChargeState) //{ // //cycle through all charge states //} // Mass tolerance must account for different isotope spacing! public IEnumerable <IsotopicEnvelope> Deconvolute(MzRange theRange, int minAssumedChargeState, int maxAssumedChargeState, double deconvolutionTolerancePpm, double intensityRatioLimit) { //if no peaks, stop if (Size == 0) { yield break; } var isolatedMassesAndCharges = new List <IsotopicEnvelope>(); (int start, int end)indexes = ExtractIndices(theRange.Minimum, theRange.Maximum); //find the most intense peak in the range double maxIntensity = 0; for (int index = indexes.start; index < indexes.end; index++) { if (YArray[index] > maxIntensity) { maxIntensity = YArray[index]; } } //go through each peak in the selected range and assume it is the most intense peak of its isotopic envelope (if it's not, it will hopefully get a low score) //cycle through possible charge states and select the one that has the best score (fit) with the averagine model for (int candidateForMostIntensePeakIndex = indexes.start; candidateForMostIntensePeakIndex < indexes.end; candidateForMostIntensePeakIndex++) { double candidateForMostIntensePeakIntensity = YArray[candidateForMostIntensePeakIndex]; if (candidateForMostIntensePeakIntensity * 100 >= maxIntensity) //ignore peptides that are over 100 times less intense than the most intense peak in the range (heuristic from Top-Down yeast) { IsotopicEnvelope bestIsotopeEnvelopeForThisPeak = null; double candidateForMostIntensePeakMz = XArray[candidateForMostIntensePeakIndex]; //Find what charge states this peak might be based on the spacing of nearby peaks (assumes isotopic resolution) HashSet <int> allPossibleChargeStates = new HashSet <int>(); for (int i = candidateForMostIntensePeakIndex + 1; i < XArray.Length; i++) //look at peaks of higher m/z { double deltaMass = XArray[i] - candidateForMostIntensePeakMz; if (deltaMass < 1.1) //if we're past a Th spacing, we're no longer looking at the closest isotope { //get the lower bound charge state int charge = (int)Math.Floor(1 / deltaMass); //e.g. deltaMass = 0.4 Th, charge is now 2 (but might be 3) if (charge >= minAssumedChargeState && charge <= maxAssumedChargeState) { allPossibleChargeStates.Add(charge); } //get the upper bound charge state charge++; if (charge >= minAssumedChargeState && charge <= maxAssumedChargeState) { allPossibleChargeStates.Add(charge); } } else { break; } } //investigate the putative charge states foreach (int chargeState in allPossibleChargeStates) { //get the mass of this peak assuming it's the charge we're looking at double testMostIntenseMass = candidateForMostIntensePeakMz.ToMass(chargeState); //get the index of the theoretical isotopic envelope for an averagine model that's close in mass int massIndex = Array.BinarySearch(mostIntenseMasses, testMostIntenseMass); if (massIndex < 0) { massIndex = ~massIndex; } if (massIndex == mostIntenseMasses.Length) { break; } if (massIndex != 0 && mostIntenseMasses[massIndex] - testMostIntenseMass > testMostIntenseMass - mostIntenseMasses[massIndex - 1]) { massIndex--; } //create a list for each isotopic peak from this envelope. This is used to fine tune the monoisotopic mass and is populated in "FindIsotopicEnvelope" List <double> monoisotopicMassPredictions = new List <double>(); //Look for other isotopes using the assumed charge state IsotopicEnvelope putativeIsotopicEnvelope = FindIsotopicEnvelope(massIndex, candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity, testMostIntenseMass, chargeState, deconvolutionTolerancePpm, intensityRatioLimit, monoisotopicMassPredictions); if (putativeIsotopicEnvelope.Peaks.Count >= 2) //if there are at least two isotopes { //look for other charge states, using them for scoring and monoisotopic mass estimates //need to use this method before comparing scores because it changes the score of the test envelope int numOtherChargeStatesObserved = ObserveAdjacentChargeStates(putativeIsotopicEnvelope, candidateForMostIntensePeakMz, massIndex, deconvolutionTolerancePpm, intensityRatioLimit, minAssumedChargeState, maxAssumedChargeState, monoisotopicMassPredictions); //is this the best charge state for this peak? if ((bestIsotopeEnvelopeForThisPeak == null || putativeIsotopicEnvelope.Score > bestIsotopeEnvelopeForThisPeak.Score) && //and the score is better for this charge state than others (putativeIsotopicEnvelope.Charge / 5 <= numOtherChargeStatesObserved)) //and if we suspect there to be multiple charge states and there are (higher the charge, more states expected, z=5, need 2 charge states, z=10, need 3 charge states, etc { putativeIsotopicEnvelope.SetMedianMonoisotopicMass(monoisotopicMassPredictions); //take the median mass from all of the isotopes (this is fine tuning!) bestIsotopeEnvelopeForThisPeak = putativeIsotopicEnvelope; } } } if (bestIsotopeEnvelopeForThisPeak != null) //add this envelope (it might be wrong, but hopefully it has a low score and gets outscored later by the right thing) { isolatedMassesAndCharges.Add(bestIsotopeEnvelopeForThisPeak); } } } HashSet <double> seen = new HashSet <double>(); foreach (var ok in isolatedMassesAndCharges.OrderByDescending(b => b.Score)) { if (seen.Overlaps(ok.Peaks.Select(b => b.mz))) { continue; } foreach (var ah in ok.Peaks.Select(b => b.mz)) { seen.Add(ah); } yield return(ok); } }