public IndexedMassSpectralPeak GetIndexedPeak(double theorMass, int zeroBasedScanIndex, Tolerance tolerance, int chargeState) { IndexedMassSpectralPeak bestPeak = null; int ceilingMz = (int)Math.Ceiling(tolerance.GetMaximumValue(theorMass).ToMz(chargeState) * BinsPerDalton); int floorMz = (int)Math.Floor(tolerance.GetMinimumValue(theorMass).ToMz(chargeState) * BinsPerDalton); for (int j = floorMz; j <= ceilingMz; j++) { if (j < _indexedPeaks.Length && _indexedPeaks[j] != null) { List <IndexedMassSpectralPeak> bin = _indexedPeaks[j]; int index = BinarySearchForIndexedPeak(bin, zeroBasedScanIndex); for (int i = index; i < bin.Count; i++) { IndexedMassSpectralPeak peak = bin[i]; if (peak.ZeroBasedMs1ScanIndex > zeroBasedScanIndex) { break; } double expMass = peak.Mz.ToMass(chargeState); if (tolerance.Within(expMass, theorMass) && peak.ZeroBasedMs1ScanIndex == zeroBasedScanIndex && (bestPeak == null || Math.Abs(expMass - theorMass) < Math.Abs(bestPeak.Mz.ToMass(chargeState) - theorMass))) { bestPeak = peak; } } } } return(bestPeak); }
public void ToleranceMinMaxTest() { var tol = new Tolerance(ToleranceUnit.Absolute, 9, 10); Assert.AreEqual(2, tol.GetMaximumValue(1)); Assert.AreEqual(0, tol.GetMinimumValue(1)); }
public void MassToleranceImplicitValue() { var tol = new Tolerance("10 ppm"); Assert.AreEqual(10, tol.Value); Assert.AreEqual(1 + 1e-5 / 2, tol.GetMaximumValue(1)); Assert.AreEqual(1 - 1e-5 / 2, tol.GetMinimumValue(1)); }
private (List <LabeledMs2DataPoint>, int, int, int) SearchMS2Spectrum(IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2DataScan, Proteomics.Peptide peptide, int peptideCharge, PeptideSpectralMatch identification) { List <LabeledMs2DataPoint> result = new List <LabeledMs2DataPoint>(); int numMs2MassChargeCombinationsConsidered = 0; int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int numFragmentsIdentified = 0; if (ms2DataScan.MassSpectrum.Size == 0) { return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified); } // Key: mz value, Value: error var addedPeaks = new Dictionary <double, double>(); var countForThisMS2 = 0; var countForThisMS2a = 0; var scanWindowRange = ms2DataScan.ScanWindowRange; IHasChemicalFormula[] fragmentList = peptide.Fragment(fragmentTypesForCalibration, true).OfType <IHasChemicalFormula>().ToArray(); foreach (var fragment in fragmentList) { bool fragmentIdentified = false; bool computedIsotopologues = false; double[] masses = new double[0]; double[] intensities = new double[0]; // First look for monoisotopic masses, do not compute distribution spectrum! for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++) { var monoisotopicMZ = fragment.MonoisotopicMass.ToMz(chargeToLookAt); if (monoisotopicMZ > scanWindowRange.Maximum) { continue; } if (monoisotopicMZ < scanWindowRange.Minimum) { break; } var closestPeakMZ = ms2DataScan.MassSpectrum.GetClosestPeakXvalue(monoisotopicMZ); if (mzToleranceForMs2Search.Within(closestPeakMZ.Value, monoisotopicMZ) && !computedIsotopologues) { var dist = IsotopicDistribution.GetDistribution(fragment.ThisChemicalFormula, fineResolutionForIsotopeDistCalculation, 0.001); masses = dist.Masses.ToArray(); intensities = dist.Intensities.ToArray(); Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x))); computedIsotopologues = true; break; } } if (computedIsotopologues) { bool startingToAdd = false; for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++) { if (masses.First().ToMz(chargeToLookAt) > scanWindowRange.Maximum) { continue; } if (masses.Last().ToMz(chargeToLookAt) < scanWindowRange.Minimum) { break; } var trainingPointsToAverage = new List <LabeledMs2DataPoint>(); foreach (double a in masses) { double theMZ = a.ToMz(chargeToLookAt); var npwr = ms2DataScan.MassSpectrum.NumPeaksWithinRange(mzToleranceForMs2Search.GetMinimumValue(theMZ), mzToleranceForMs2Search.GetMaximumValue(theMZ)); if (npwr == 0) { break; } numMs2MassChargeCombinationsConsidered++; if (npwr > 1) { numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++; continue; } var closestPeakIndex = ms2DataScan.MassSpectrum.GetClosestPeakIndex(theMZ); var closestPeakMZ = ms2DataScan.MassSpectrum.XArray[closestPeakIndex.Value]; if (!addedPeaks.ContainsKey(closestPeakMZ)) { addedPeaks.Add(closestPeakMZ, Math.Abs(closestPeakMZ - theMZ)); trainingPointsToAverage.Add(new LabeledMs2DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(ms2DataScan.MassSpectrum.YArray[closestPeakIndex.Value]), theMZ, null)); } } // If started adding and suddnely stopped, go to next one, no need to look at higher charges if (trainingPointsToAverage.Count == 0 && startingToAdd) { break; } if (trainingPointsToAverage.Count < Math.Min(minMS2isotopicPeaksNeededForConfirmedIdentification, intensities.Count())) { } else { startingToAdd = true; if (!fragmentIdentified) { fragmentIdentified = true; numFragmentsIdentified += 1; } countForThisMS2 += trainingPointsToAverage.Count; countForThisMS2a++; result.Add(new LabeledMs2DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(), ms2DataScan.RetentionTime, Math.Log(ms2DataScan.TotalIonCurrent), ms2DataScan.InjectionTime.HasValue ? Math.Log(ms2DataScan.InjectionTime.Value) : double.NaN, trainingPointsToAverage.Select(b => b.logIntensity).Average(), trainingPointsToAverage.Select(b => b.expectedMZ).Average(), identification)); } } } } return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified); }
private (List <LabeledMs1DataPoint>, int, int) SearchMS1Spectra(double[] theoreticalMasses, double[] theoreticalIntensities, int ms2spectrumIndex, int direction, int peptideCharge, PeptideSpectralMatch identification) { List <LabeledMs1DataPoint> result = new List <LabeledMs1DataPoint>(); int numMs1MassChargeCombinationsConsidered = 0; int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int theIndex; theIndex = direction == 1 ? ms2spectrumIndex : ms2spectrumIndex - 1; bool addedAscan = true; int highestKnownChargeForThisPeptide = peptideCharge; while (theIndex >= 1 && theIndex <= myMsDataFile.NumSpectra && addedAscan) { int countForThisScan = 0; if (myMsDataFile.GetOneBasedScan(theIndex).MsnOrder > 1) { theIndex += direction; continue; } addedAscan = false; var fullMS1scan = myMsDataFile.GetOneBasedScan(theIndex); var scanWindowRange = fullMS1scan.ScanWindowRange; var fullMS1spectrum = fullMS1scan.MassSpectrum; if (fullMS1spectrum.Size == 0) { break; } bool startingToAddCharges = false; int chargeToLookAt = 1; do { if (theoreticalMasses[0].ToMz(chargeToLookAt) > scanWindowRange.Maximum) { chargeToLookAt++; continue; } if (theoreticalMasses[0].ToMz(chargeToLookAt) < scanWindowRange.Minimum) { break; } var trainingPointsToAverage = new List <LabeledMs1DataPoint>(); foreach (double a in theoreticalMasses) { double theMZ = a.ToMz(chargeToLookAt); var npwr = fullMS1spectrum.NumPeaksWithinRange(mzToleranceForMs1Search.GetMinimumValue(theMZ), mzToleranceForMs1Search.GetMaximumValue(theMZ)); if (npwr == 0) { break; } numMs1MassChargeCombinationsConsidered++; if (npwr > 1) { numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++; continue; } var closestPeakIndex = fullMS1spectrum.GetClosestPeakIndex(theMZ); var closestPeakMZ = fullMS1spectrum.XArray[closestPeakIndex.Value]; highestKnownChargeForThisPeptide = Math.Max(highestKnownChargeForThisPeptide, chargeToLookAt); trainingPointsToAverage.Add(new LabeledMs1DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(fullMS1spectrum.YArray[closestPeakIndex.Value]), theMZ, null)); } // If started adding and suddnely stopped, go to next one, no need to look at higher charges if (trainingPointsToAverage.Count == 0 && startingToAddCharges) { break; } if ((trainingPointsToAverage.Count == 0 || (trainingPointsToAverage.Count == 1 && theoreticalIntensities[0] < 0.65)) && (peptideCharge <= chargeToLookAt)) { break; } if ((trainingPointsToAverage.Count == 1 && theoreticalIntensities[0] < 0.65) || trainingPointsToAverage.Count < Math.Min(minMS1isotopicPeaksNeededForConfirmedIdentification, theoreticalIntensities.Count())) { } else { addedAscan = true; startingToAddCharges = true; countForThisScan++; result.Add(new LabeledMs1DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(), fullMS1scan.RetentionTime, Math.Log(fullMS1scan.TotalIonCurrent), fullMS1scan.InjectionTime.HasValue ? Math.Log(fullMS1scan.InjectionTime.Value) : double.NaN, trainingPointsToAverage.Select(b => b.logIntensity).Average(), trainingPointsToAverage.Select(b => b.expectedMZ).Average(), identification)); } chargeToLookAt++; } while (chargeToLookAt <= highestKnownChargeForThisPeptide + 1); theIndex += direction; } return(result, numMs1MassChargeCombinationsConsidered, numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks); }
public static void MatchIons(IMsDataScan <IMzSpectrum <IMzPeak> > thisScan, Tolerance productMassTolerance, double[] sortedTheoreticalProductMassesForThisPeptide, List <double> matchedIonMassesList, List <double> productMassErrorDa, List <double> productMassErrorPpm, double precursorMass, List <DissociationType> dissociationTypes, bool addCompIons) { var TotalProductsHere = sortedTheoreticalProductMassesForThisPeptide.Length; if (TotalProductsHere == 0) { return; } int currentTheoreticalIndex = -1; double currentTheoreticalMass; do { currentTheoreticalIndex++; currentTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[currentTheoreticalIndex]; } while (double.IsNaN(currentTheoreticalMass) && currentTheoreticalIndex < sortedTheoreticalProductMassesForThisPeptide.Length - 1); if (double.IsNaN(currentTheoreticalMass)) { return; } double currentTheoreticalMz = currentTheoreticalMass + Constants.protonMass; int testTheoreticalIndex; double testTheoreticalMass; double testTheoreticalMz; // speed optimizations double[] experimental_mzs = thisScan.MassSpectrum.XArray; double[] experimental_intensities = thisScan.MassSpectrum.YArray; int numExperimentalPeaks = experimental_mzs.Length; // Loop over all experimental indices for (int experimentalIndex = 0; experimentalIndex < numExperimentalPeaks; experimentalIndex++) { double currentExperimentalMz = experimental_mzs[experimentalIndex]; // If found match if (productMassTolerance.Within(currentExperimentalMz, currentTheoreticalMz)) { matchedIonMassesList.Add(currentTheoreticalMass); double currentExperimentalMass = currentExperimentalMz - Constants.protonMass; productMassErrorDa.Add(currentExperimentalMass - currentTheoreticalMass); productMassErrorPpm.Add((currentExperimentalMass - currentTheoreticalMass) * 1000000 / currentTheoreticalMass); currentTheoreticalIndex++; if (currentTheoreticalIndex == TotalProductsHere) { break; } currentTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[currentTheoreticalIndex]; currentTheoreticalMz = currentTheoreticalMass + Constants.protonMass; } // Else if for sure did not reach the next theoretical yet else if (currentExperimentalMz > currentTheoreticalMz) { // Move on to next index and never come back! currentTheoreticalIndex++; if (currentTheoreticalIndex == TotalProductsHere) { break; } currentTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[currentTheoreticalIndex]; currentTheoreticalMz = currentTheoreticalMass + Constants.protonMass; // Start with the current ones testTheoreticalIndex = currentTheoreticalIndex; testTheoreticalMass = currentTheoreticalMass; testTheoreticalMz = currentTheoreticalMz; // Mark the skipped theoreticals as not found. The last one is not for sure, might be flipped! while (currentExperimentalMz > testTheoreticalMz) { // Store old info for possible reuse currentTheoreticalMz = testTheoreticalMz; currentTheoreticalMass = testTheoreticalMass; currentTheoreticalIndex = testTheoreticalIndex; // Update test stuff! testTheoreticalIndex++; if (testTheoreticalIndex == TotalProductsHere) { break; } testTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[testTheoreticalIndex]; testTheoreticalMz = testTheoreticalMass + Constants.protonMass; } experimentalIndex--; } } if (addCompIons) { double[] complementaryMasses = new double[numExperimentalPeaks]; double[] complementaryIntensities = new double[numExperimentalPeaks]; foreach (DissociationType dissociationType in dissociationTypes) { if (complementaryIonConversionDictionary.TryGetValue(dissociationType, out double protonMassShift)) { currentTheoreticalIndex = -1; do { currentTheoreticalIndex++; currentTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[currentTheoreticalIndex]; } while (double.IsNaN(currentTheoreticalMass) && currentTheoreticalIndex < sortedTheoreticalProductMassesForThisPeptide.Length - 1); double massShiftForComplementaryConversion = precursorMass + protonMassShift; //mass shift needed to reobtain the original product ion for calculating tolerance for (int i = numExperimentalPeaks - 1; i >= 0; i--) { complementaryMasses[numExperimentalPeaks - i - 1] = massShiftForComplementaryConversion - experimental_mzs[i]; complementaryIntensities[numExperimentalPeaks - i - 1] = experimental_intensities[i]; } // Loop over all experimental indices for (int experimentalIndex = 0; experimentalIndex < numExperimentalPeaks; experimentalIndex++) { double currentExperimentalMass = complementaryMasses[experimentalIndex]; double originalExperimentalMass = massShiftForComplementaryConversion - currentExperimentalMass; double minBoundary = currentExperimentalMass - originalExperimentalMass + productMassTolerance.GetMinimumValue(originalExperimentalMass); double maxBoundary = currentExperimentalMass - originalExperimentalMass + productMassTolerance.GetMaximumValue(originalExperimentalMass); // If found match if (minBoundary < currentTheoreticalMass && maxBoundary > currentTheoreticalMass) { matchedIonMassesList.Add(currentTheoreticalMass); productMassErrorDa.Add(currentExperimentalMass - currentTheoreticalMass); productMassErrorPpm.Add((currentExperimentalMass - currentTheoreticalMass) * 1000000 / currentTheoreticalMass); currentTheoreticalIndex++; if (currentTheoreticalIndex == TotalProductsHere) { break; } currentTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[currentTheoreticalIndex]; } // Else if for sure passed a theoretical else if (currentExperimentalMass > currentTheoreticalMass) { // Move on to next index and never come back! currentTheoreticalIndex++; if (currentTheoreticalIndex == TotalProductsHere) { break; } currentTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[currentTheoreticalIndex]; // Start with the current ones testTheoreticalIndex = currentTheoreticalIndex; testTheoreticalMass = currentTheoreticalMass; // Mark the skipped theoreticals as not found. The last one is not for sure, might be flipped! while (currentExperimentalMass > testTheoreticalMass) { // Store old info for possible reuse currentTheoreticalMass = testTheoreticalMass; currentTheoreticalIndex = testTheoreticalIndex; // Update test stuff! testTheoreticalIndex++; if (testTheoreticalIndex == TotalProductsHere) { break; } testTheoreticalMass = sortedTheoreticalProductMassesForThisPeptide[testTheoreticalIndex]; } experimentalIndex--; } } } else { throw new NotImplementedException(); } } } }
//private double Search(ref double[] eMasses, ref double[] eIntenisties, double[] tMasses, double productTolerance, double tic, ref Dictionary<double, double> scores) //{ // double score = 0.0; // int eLength = eMasses.Length; // int tLength = tMasses.Length; // int e = 0; // foreach (double t in tMasses) // { // double storedScore; // if (scores.TryGetValue(t, out storedScore)) // { // score += storedScore; // continue; // } // double minMZ = t - productTolerance; // double maxMZ = t + productTolerance; // while (e < eLength && eMasses[e] < minMZ) // e++; // if (e >= eLength) // break; // if (eMasses[e] > maxMZ) // continue; // double intensities = 0; // int index = e; // switch variables to keep e the same for the next loop around // do // { // intensities += eIntenisties[index]; // index++; // } while (index < eLength && eMasses[index] < maxMZ); // storedScore = 1 + intensities/tic; // score += storedScore; // scores[t] = storedScore; // } // return score; //} /// <summary> /// The main searching algorithm of Morpheus /// </summary> /// <param name="eMasses">The experimental masses</param> /// <param name="eIntenisties">The experimental intensities</param> /// <param name="tMasses">The theoretical masses</param> /// <param name="productTolerance">The product mass tolerance</param> /// <param name="tic">The total ion current of the experimental peaks</param> /// <returns></returns> private double Search(double[] eMasses, double[] eIntenisties, double[] tMasses, Tolerance productTolerance, double tic) { double score = 0.0; double intensities = 0.0; int eLength = eMasses.Length; int tLength = tMasses.Length; int e = 0; bool forceCheck = productTolerance.GetMinimumValue(tMasses[tLength - 1]) >= eMasses[eLength - 1]; if (forceCheck) { foreach (double t in tMasses) { IRange<double> range = productTolerance.GetRange(t); double minMZ = range.Minimum; double maxMZ = range.Maximum; while (e < eLength && eMasses[e] < minMZ) e++; if (e >= eLength) break; if (eMasses[e] > maxMZ) continue; score++; int index = e; // switch variables to keep e the same for the next loop around do { intensities += eIntenisties[index]; index++; } while (index < eLength && eMasses[index] < maxMZ); } } else { foreach (double t in tMasses) { IRange<double> range = productTolerance.GetRange(t); double minMZ = range.Minimum; double maxMZ = range.Maximum; while (eMasses[e] < minMZ) e++; if (eMasses[e] > maxMZ) continue; score++; int index = e; // switch variables to keep e the same for the next loop around do { intensities += eIntenisties[index]; index++; } while (index < eLength && eMasses[index] < maxMZ); } } return score + intensities/tic; }
//private double Search(ref double[] eMasses, ref double[] eIntenisties, double[] tMasses, double productTolerance, double tic, ref Dictionary<double, double> scores) //{ // double score = 0.0; // int eLength = eMasses.Length; // int tLength = tMasses.Length; // int e = 0; // foreach (double t in tMasses) // { // double storedScore; // if (scores.TryGetValue(t, out storedScore)) // { // score += storedScore; // continue; // } // double minMZ = t - productTolerance; // double maxMZ = t + productTolerance; // while (e < eLength && eMasses[e] < minMZ) // e++; // if (e >= eLength) // break; // if (eMasses[e] > maxMZ) // continue; // double intensities = 0; // int index = e; // switch variables to keep e the same for the next loop around // do // { // intensities += eIntenisties[index]; // index++; // } while (index < eLength && eMasses[index] < maxMZ); // storedScore = 1 + intensities/tic; // score += storedScore; // scores[t] = storedScore; // } // return score; //} /// <summary> /// The main searching algorithm of Morpheus /// </summary> /// <param name="eMasses">The experimental masses</param> /// <param name="eIntenisties">The experimental intensities</param> /// <param name="tMasses">The theoretical masses</param> /// <param name="productTolerance">The product mass tolerance</param> /// <param name="tic">The total ion current of the experimental peaks</param> /// <returns></returns> private double Search(double[] eMasses, double[] eIntenisties, double[] tMasses, Tolerance productTolerance, double tic) { double score = 0.0; double intensities = 0.0; int eLength = eMasses.Length; int tLength = tMasses.Length; int e = 0; bool forceCheck = productTolerance.GetMinimumValue(tMasses[tLength - 1]) >= eMasses[eLength - 1]; if (forceCheck) { foreach (double t in tMasses) { IRange <double> range = productTolerance.GetRange(t); double minMZ = range.Minimum; double maxMZ = range.Maximum; while (e < eLength && eMasses[e] < minMZ) { e++; } if (e >= eLength) { break; } if (eMasses[e] > maxMZ) { continue; } score++; int index = e; // switch variables to keep e the same for the next loop around do { intensities += eIntenisties[index]; index++; } while (index < eLength && eMasses[index] < maxMZ); } } else { foreach (double t in tMasses) { IRange <double> range = productTolerance.GetRange(t); double minMZ = range.Minimum; double maxMZ = range.Maximum; while (eMasses[e] < minMZ) { e++; } if (eMasses[e] > maxMZ) { continue; } score++; int index = e; // switch variables to keep e the same for the next loop around do { intensities += eIntenisties[index]; index++; } while (index < eLength && eMasses[index] < maxMZ); } } return(score + intensities / tic); }