Пример #1
0
        public IEnumerable <Peptide> GenerateAllModificationCombinations()
        {
            // Get all the modifications that are isotopologues
            var isotopologues = GetUniqueModifications <ModificationWithMultiplePossibilitiesCollection>().ToArray();

            // Base condition, no more isotopologues to make, so just return
            if (isotopologues.Length < 1)
            {
                yield break;
            }

            // Grab the the first isotopologue
            ModificationWithMultiplePossibilitiesCollection isotopologue = isotopologues[0];

            // Loop over each modification in the isotopologue
            foreach (OldSchoolModification mod in isotopologue)
            {
                // Create a clone of the peptide, cloning modifications as well.
                Peptide peptide = new Peptide(this);

                // Replace the base isotopologue mod with the specific version
                peptide.ReplaceModification(isotopologue, mod);

                // There were more than one isotopologue, so we must go deeper
                if (isotopologues.Length > 1)
                {
                    // Call the same rotuine on the newly generate peptide that has one less isotopologue
                    foreach (var subpeptide in peptide.GenerateAllModificationCombinations())
                    {
                        yield return(subpeptide);
                    }
                }
                else
                {
                    // Return this peptide
                    yield return(peptide);
                }
            }
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            Status("Extracting data points:");
            // The final training point list

            int numMs1MassChargeCombinationsConsidered = 0;
            int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>();
            List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>();

            int numIdentifications = goodIdentifications.Count;

            // Loop over identifications

            HashSet <string> sequences = new HashSet <string>();

            object lockObj  = new object();
            object lockObj2 = new object();

            Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff =>
            {
                for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++)
                {
                    PeptideSpectralMatch identification = goodIdentifications[matchIndex];

                    // Each identification has an MS2 spectrum attached to it.
                    int ms2scanNumber = identification.ScanNumber;
                    int peptideCharge = identification.ScanPrecursorCharge;
                    if (identification.FullSequence == null)
                    {
                        continue;
                    }

                    var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First();

                    // Get the peptide, don't forget to add the modifications!!!!
                    var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas;
                    if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0))
                    {
                        continue;
                    }
                    Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas);

                    var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification);

                    // If MS2 has low evidence for peptide, skip and go to next one
                    if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification)
                    {
                        continue;
                    }

                    lock (lockObj2)
                    {
                        Ms2List.AddRange(ms2tuple.Item1);
                        numMs2MassChargeCombinationsConsidered += ms2tuple.Item2;
                        numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3;
                        if (sequences.Contains(identification.FullSequence))
                        {
                            continue; // Do not search same sequence multiple times in MS1 scans
                        }
                        sequences.Add(identification.FullSequence);
                    }

                    // Calculate isotopic distribution of the full peptide
                    var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001);

                    double[] theoreticalMasses      = dist.Masses.ToArray();
                    double[] theoreticalIntensities = dist.Intensities.ToArray();

                    Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                    var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification);

                    var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification);

                    lock (lockObj)
                    {
                        Ms1List.AddRange(ms1tupleBack.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3;
                        Ms1List.AddRange(ms1tupleForward.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3;
                    }
                }
            });

            return(new DataPointAquisitionResults(this,
                                                  Ms1List,
                                                  Ms2List,
                                                  numMs1MassChargeCombinationsConsidered,
                                                  numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks,
                                                  numMs2MassChargeCombinationsConsidered,
                                                  numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks
                                                  ));
        }
        private (List <LabeledMs2DataPoint>, int, int, int) SearchMS2Spectrum(IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2DataScan, Proteomics.Peptide peptide, int peptideCharge, PeptideSpectralMatch identification)
        {
            List <LabeledMs2DataPoint> result          = new List <LabeledMs2DataPoint>();
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numFragmentsIdentified = 0;

            if (ms2DataScan.MassSpectrum.Size == 0)
            {
                return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified);
            }

            // Key: mz value, Value: error
            var addedPeaks = new Dictionary <double, double>();

            var countForThisMS2  = 0;
            var countForThisMS2a = 0;

            var scanWindowRange = ms2DataScan.ScanWindowRange;

            IHasChemicalFormula[] fragmentList = peptide.Fragment(fragmentTypesForCalibration, true).OfType <IHasChemicalFormula>().ToArray();

            foreach (var fragment in fragmentList)
            {
                bool     fragmentIdentified    = false;
                bool     computedIsotopologues = false;
                double[] masses      = new double[0];
                double[] intensities = new double[0];
                // First look for monoisotopic masses, do not compute distribution spectrum!

                for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++)
                {
                    var monoisotopicMZ = fragment.MonoisotopicMass.ToMz(chargeToLookAt);
                    if (monoisotopicMZ > scanWindowRange.Maximum)
                    {
                        continue;
                    }
                    if (monoisotopicMZ < scanWindowRange.Minimum)
                    {
                        break;
                    }
                    var closestPeakMZ = ms2DataScan.MassSpectrum.GetClosestPeakXvalue(monoisotopicMZ);

                    if (mzToleranceForMs2Search.Within(closestPeakMZ.Value, monoisotopicMZ) && !computedIsotopologues)
                    {
                        var dist = IsotopicDistribution.GetDistribution(fragment.ThisChemicalFormula, fineResolutionForIsotopeDistCalculation, 0.001);

                        masses      = dist.Masses.ToArray();
                        intensities = dist.Intensities.ToArray();

                        Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x)));
                        computedIsotopologues = true;
                        break;
                    }
                }

                if (computedIsotopologues)
                {
                    bool startingToAdd = false;
                    for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++)
                    {
                        if (masses.First().ToMz(chargeToLookAt) > scanWindowRange.Maximum)
                        {
                            continue;
                        }
                        if (masses.Last().ToMz(chargeToLookAt) < scanWindowRange.Minimum)
                        {
                            break;
                        }
                        var trainingPointsToAverage = new List <LabeledMs2DataPoint>();
                        foreach (double a in masses)
                        {
                            double theMZ = a.ToMz(chargeToLookAt);
                            var    npwr  = ms2DataScan.MassSpectrum.NumPeaksWithinRange(mzToleranceForMs2Search.GetMinimumValue(theMZ), mzToleranceForMs2Search.GetMaximumValue(theMZ));
                            if (npwr == 0)
                            {
                                break;
                            }
                            numMs2MassChargeCombinationsConsidered++;
                            if (npwr > 1)
                            {
                                numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++;
                                continue;
                            }

                            var closestPeakIndex = ms2DataScan.MassSpectrum.GetClosestPeakIndex(theMZ);
                            var closestPeakMZ    = ms2DataScan.MassSpectrum.XArray[closestPeakIndex.Value];

                            if (!addedPeaks.ContainsKey(closestPeakMZ))
                            {
                                addedPeaks.Add(closestPeakMZ, Math.Abs(closestPeakMZ - theMZ));
                                trainingPointsToAverage.Add(new LabeledMs2DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(ms2DataScan.MassSpectrum.YArray[closestPeakIndex.Value]), theMZ, null));
                            }
                        }
                        // If started adding and suddnely stopped, go to next one, no need to look at higher charges
                        if (trainingPointsToAverage.Count == 0 && startingToAdd)
                        {
                            break;
                        }
                        if (trainingPointsToAverage.Count < Math.Min(minMS2isotopicPeaksNeededForConfirmedIdentification, intensities.Count()))
                        {
                        }
                        else
                        {
                            startingToAdd = true;
                            if (!fragmentIdentified)
                            {
                                fragmentIdentified      = true;
                                numFragmentsIdentified += 1;
                            }

                            countForThisMS2 += trainingPointsToAverage.Count;
                            countForThisMS2a++;
                            result.Add(new LabeledMs2DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(),
                                                               ms2DataScan.RetentionTime,
                                                               Math.Log(ms2DataScan.TotalIonCurrent),
                                                               ms2DataScan.InjectionTime.HasValue ? Math.Log(ms2DataScan.InjectionTime.Value) : double.NaN,
                                                               trainingPointsToAverage.Select(b => b.logIntensity).Average(),
                                                               trainingPointsToAverage.Select(b => b.expectedMZ).Average(),
                                                               identification));
                        }
                    }
                }
            }

            return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified);
        }
Пример #4
0
        public static void TestCoIsolation()
        {
            CommonParameters CommonParameters = new CommonParameters
            {
                DigestionParams = new DigestionParams
                {
                    Protease = new Protease("Custom Protease", new List <string> {
                        "K"
                    }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null),
                    MinPeptideLength = null,
                },
                ConserveMemory = false,
                ScoreCutoff    = 1,
                DeconvolutionIntensityRatio = 50
            };

            var variableModifications = new List <ModificationWithMass>();
            var fixedModifications    = new List <ModificationWithMass>();
            var proteinList           = new List <Protein> {
                new Protein("MNNNKNDNK", null)
            };

            var searchModes = new SinglePpmAroundZeroSearchMode(5);

            Proteomics.Peptide pep1 = new Proteomics.Peptide("NNNK");
            Proteomics.Peptide pep2 = new Proteomics.Peptide("NDNK");

            var dist1 = IsotopicDistribution.GetDistribution(pep1.GetChemicalFormula(), 0.1, 0.01);

            var dist2 = IsotopicDistribution.GetDistribution(pep2.GetChemicalFormula(), 0.1, 0.01);

            IMzmlScan[] Scans          = new IMzmlScan[2];
            double[]    ms1intensities = new double[] { 0.8, 0.8, 0.2, 0.02, 0.2, 0.02 };
            double[]    ms1mzs         = dist1.Masses.Concat(dist2.Masses).OrderBy(b => b).Select(b => b.ToMz(1)).ToArray();

            double selectedIonMz = ms1mzs[1];

            MzmlMzSpectrum MS1 = new MzmlMzSpectrum(ms1mzs, ms1intensities, false);

            Scans[0] = new MzmlScan(1, MS1, 1, false, Polarity.Positive, 1.0, new MzRange(300, 2000), "first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, null, "scan=1");

            double[]       ms2intensities = new double[] { 1, 1, 1, 1, 1 };
            double[]       ms2mzs         = new double[] { 146.106.ToMz(1), 228.086.ToMz(1), 229.07.ToMz(1), 260.148.ToMz(1), 342.129.ToMz(1) };
            MzmlMzSpectrum MS2            = new MzmlMzSpectrum(ms2mzs, ms2intensities, false);
            double         isolationMZ    = selectedIonMz;

            Scans[1] = new MzmlScanWithPrecursor(2, MS2, 2, false, Polarity.Positive, 2.0, new MzRange(100, 1500), "second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, selectedIonMz, null, null, isolationMZ, 2.5, DissociationType.HCD, 1, null, null, "scan=2");

            var myMsDataFile = new FakeMsDataFile(Scans);

            bool      DoPrecursorDeconvolution           = true;
            bool      UseProvidedPrecursorInfo           = true;
            double    DeconvolutionIntensityRatio        = 50;
            int       DeconvolutionMaxAssumedChargeState = 10;
            Tolerance DeconvolutionMassTolerance         = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchModes, false, CommonParameters, CommonParameters.ProductMassTolerance, new List <string>()).Run();

            // Two matches for this single scan! Corresponding to two co-isolated masses
            Assert.AreEqual(2, allPsmsArray.Length);

            Assert.IsTrue(allPsmsArray[0].Score > 1);
            Assert.AreEqual(2, allPsmsArray[0].ScanNumber);

            var ojdfkj = (SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(new List <PeptideSpectralMatch> {
                allPsmsArray[0], allPsmsArray[1]
            }, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, new List <string>()).Run();

            foreach (var huh in allPsmsArray)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(ojdfkj.CompactPeptideToProteinPeptideMatching);
                }
            }

            Assert.AreEqual("NNNK", allPsmsArray[0].BaseSequence);
            Assert.AreEqual("NDNK", allPsmsArray[1].BaseSequence);
        }