public IEnumerable <Peptide> GenerateAllModificationCombinations() { // Get all the modifications that are isotopologues var isotopologues = GetUniqueModifications <ModificationWithMultiplePossibilitiesCollection>().ToArray(); // Base condition, no more isotopologues to make, so just return if (isotopologues.Length < 1) { yield break; } // Grab the the first isotopologue ModificationWithMultiplePossibilitiesCollection isotopologue = isotopologues[0]; // Loop over each modification in the isotopologue foreach (OldSchoolModification mod in isotopologue) { // Create a clone of the peptide, cloning modifications as well. Peptide peptide = new Peptide(this); // Replace the base isotopologue mod with the specific version peptide.ReplaceModification(isotopologue, mod); // There were more than one isotopologue, so we must go deeper if (isotopologues.Length > 1) { // Call the same rotuine on the newly generate peptide that has one less isotopologue foreach (var subpeptide in peptide.GenerateAllModificationCombinations()) { yield return(subpeptide); } } else { // Return this peptide yield return(peptide); } } }
protected override MetaMorpheusEngineResults RunSpecific() { Status("Extracting data points:"); // The final training point list int numMs1MassChargeCombinationsConsidered = 0; int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int numMs2MassChargeCombinationsConsidered = 0; int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>(); List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>(); int numIdentifications = goodIdentifications.Count; // Loop over identifications HashSet <string> sequences = new HashSet <string>(); object lockObj = new object(); object lockObj2 = new object(); Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff => { for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++) { PeptideSpectralMatch identification = goodIdentifications[matchIndex]; // Each identification has an MS2 spectrum attached to it. int ms2scanNumber = identification.ScanNumber; int peptideCharge = identification.ScanPrecursorCharge; if (identification.FullSequence == null) { continue; } var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First(); // Get the peptide, don't forget to add the modifications!!!! var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas; if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0)) { continue; } Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas); var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification); // If MS2 has low evidence for peptide, skip and go to next one if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification) { continue; } lock (lockObj2) { Ms2List.AddRange(ms2tuple.Item1); numMs2MassChargeCombinationsConsidered += ms2tuple.Item2; numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3; if (sequences.Contains(identification.FullSequence)) { continue; // Do not search same sequence multiple times in MS1 scans } sequences.Add(identification.FullSequence); } // Calculate isotopic distribution of the full peptide var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001); double[] theoreticalMasses = dist.Masses.ToArray(); double[] theoreticalIntensities = dist.Intensities.ToArray(); Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x))); var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification); var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification); lock (lockObj) { Ms1List.AddRange(ms1tupleBack.Item1); numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2; numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3; Ms1List.AddRange(ms1tupleForward.Item1); numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2; numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3; } } }); return(new DataPointAquisitionResults(this, Ms1List, Ms2List, numMs1MassChargeCombinationsConsidered, numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks )); }
private (List <LabeledMs2DataPoint>, int, int, int) SearchMS2Spectrum(IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2DataScan, Proteomics.Peptide peptide, int peptideCharge, PeptideSpectralMatch identification) { List <LabeledMs2DataPoint> result = new List <LabeledMs2DataPoint>(); int numMs2MassChargeCombinationsConsidered = 0; int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int numFragmentsIdentified = 0; if (ms2DataScan.MassSpectrum.Size == 0) { return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified); } // Key: mz value, Value: error var addedPeaks = new Dictionary <double, double>(); var countForThisMS2 = 0; var countForThisMS2a = 0; var scanWindowRange = ms2DataScan.ScanWindowRange; IHasChemicalFormula[] fragmentList = peptide.Fragment(fragmentTypesForCalibration, true).OfType <IHasChemicalFormula>().ToArray(); foreach (var fragment in fragmentList) { bool fragmentIdentified = false; bool computedIsotopologues = false; double[] masses = new double[0]; double[] intensities = new double[0]; // First look for monoisotopic masses, do not compute distribution spectrum! for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++) { var monoisotopicMZ = fragment.MonoisotopicMass.ToMz(chargeToLookAt); if (monoisotopicMZ > scanWindowRange.Maximum) { continue; } if (monoisotopicMZ < scanWindowRange.Minimum) { break; } var closestPeakMZ = ms2DataScan.MassSpectrum.GetClosestPeakXvalue(monoisotopicMZ); if (mzToleranceForMs2Search.Within(closestPeakMZ.Value, monoisotopicMZ) && !computedIsotopologues) { var dist = IsotopicDistribution.GetDistribution(fragment.ThisChemicalFormula, fineResolutionForIsotopeDistCalculation, 0.001); masses = dist.Masses.ToArray(); intensities = dist.Intensities.ToArray(); Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x))); computedIsotopologues = true; break; } } if (computedIsotopologues) { bool startingToAdd = false; for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++) { if (masses.First().ToMz(chargeToLookAt) > scanWindowRange.Maximum) { continue; } if (masses.Last().ToMz(chargeToLookAt) < scanWindowRange.Minimum) { break; } var trainingPointsToAverage = new List <LabeledMs2DataPoint>(); foreach (double a in masses) { double theMZ = a.ToMz(chargeToLookAt); var npwr = ms2DataScan.MassSpectrum.NumPeaksWithinRange(mzToleranceForMs2Search.GetMinimumValue(theMZ), mzToleranceForMs2Search.GetMaximumValue(theMZ)); if (npwr == 0) { break; } numMs2MassChargeCombinationsConsidered++; if (npwr > 1) { numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++; continue; } var closestPeakIndex = ms2DataScan.MassSpectrum.GetClosestPeakIndex(theMZ); var closestPeakMZ = ms2DataScan.MassSpectrum.XArray[closestPeakIndex.Value]; if (!addedPeaks.ContainsKey(closestPeakMZ)) { addedPeaks.Add(closestPeakMZ, Math.Abs(closestPeakMZ - theMZ)); trainingPointsToAverage.Add(new LabeledMs2DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(ms2DataScan.MassSpectrum.YArray[closestPeakIndex.Value]), theMZ, null)); } } // If started adding and suddnely stopped, go to next one, no need to look at higher charges if (trainingPointsToAverage.Count == 0 && startingToAdd) { break; } if (trainingPointsToAverage.Count < Math.Min(minMS2isotopicPeaksNeededForConfirmedIdentification, intensities.Count())) { } else { startingToAdd = true; if (!fragmentIdentified) { fragmentIdentified = true; numFragmentsIdentified += 1; } countForThisMS2 += trainingPointsToAverage.Count; countForThisMS2a++; result.Add(new LabeledMs2DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(), ms2DataScan.RetentionTime, Math.Log(ms2DataScan.TotalIonCurrent), ms2DataScan.InjectionTime.HasValue ? Math.Log(ms2DataScan.InjectionTime.Value) : double.NaN, trainingPointsToAverage.Select(b => b.logIntensity).Average(), trainingPointsToAverage.Select(b => b.expectedMZ).Average(), identification)); } } } } return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified); }
public static void TestCoIsolation() { CommonParameters CommonParameters = new CommonParameters { DigestionParams = new DigestionParams { Protease = new Protease("Custom Protease", new List <string> { "K" }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null), MinPeptideLength = null, }, ConserveMemory = false, ScoreCutoff = 1, DeconvolutionIntensityRatio = 50 }; var variableModifications = new List <ModificationWithMass>(); var fixedModifications = new List <ModificationWithMass>(); var proteinList = new List <Protein> { new Protein("MNNNKNDNK", null) }; var searchModes = new SinglePpmAroundZeroSearchMode(5); Proteomics.Peptide pep1 = new Proteomics.Peptide("NNNK"); Proteomics.Peptide pep2 = new Proteomics.Peptide("NDNK"); var dist1 = IsotopicDistribution.GetDistribution(pep1.GetChemicalFormula(), 0.1, 0.01); var dist2 = IsotopicDistribution.GetDistribution(pep2.GetChemicalFormula(), 0.1, 0.01); IMzmlScan[] Scans = new IMzmlScan[2]; double[] ms1intensities = new double[] { 0.8, 0.8, 0.2, 0.02, 0.2, 0.02 }; double[] ms1mzs = dist1.Masses.Concat(dist2.Masses).OrderBy(b => b).Select(b => b.ToMz(1)).ToArray(); double selectedIonMz = ms1mzs[1]; MzmlMzSpectrum MS1 = new MzmlMzSpectrum(ms1mzs, ms1intensities, false); Scans[0] = new MzmlScan(1, MS1, 1, false, Polarity.Positive, 1.0, new MzRange(300, 2000), "first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, null, "scan=1"); double[] ms2intensities = new double[] { 1, 1, 1, 1, 1 }; double[] ms2mzs = new double[] { 146.106.ToMz(1), 228.086.ToMz(1), 229.07.ToMz(1), 260.148.ToMz(1), 342.129.ToMz(1) }; MzmlMzSpectrum MS2 = new MzmlMzSpectrum(ms2mzs, ms2intensities, false); double isolationMZ = selectedIonMz; Scans[1] = new MzmlScanWithPrecursor(2, MS2, 2, false, Polarity.Positive, 2.0, new MzRange(100, 1500), "second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, selectedIonMz, null, null, isolationMZ, 2.5, DissociationType.HCD, 1, null, null, "scan=2"); var myMsDataFile = new FakeMsDataFile(Scans); bool DoPrecursorDeconvolution = true; bool UseProvidedPrecursorInfo = true; double DeconvolutionIntensityRatio = 50; int DeconvolutionMaxAssumedChargeState = 10; Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; List <ProductType> lp = new List <ProductType> { ProductType.B, ProductType.Y }; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchModes, false, CommonParameters, CommonParameters.ProductMassTolerance, new List <string>()).Run(); // Two matches for this single scan! Corresponding to two co-isolated masses Assert.AreEqual(2, allPsmsArray.Length); Assert.IsTrue(allPsmsArray[0].Score > 1); Assert.AreEqual(2, allPsmsArray[0].ScanNumber); var ojdfkj = (SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(new List <PeptideSpectralMatch> { allPsmsArray[0], allPsmsArray[1] }, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, new List <string>()).Run(); foreach (var huh in allPsmsArray) { if (huh != null) { huh.MatchToProteinLinkedPeptides(ojdfkj.CompactPeptideToProteinPeptideMatching); } } Assert.AreEqual("NNNK", allPsmsArray[0].BaseSequence); Assert.AreEqual("NDNK", allPsmsArray[1].BaseSequence); }