Example #1
0
        public static void WritePepXml()
        {
            string filePath = Path.Combine(Examples.BASE_DIRECTORY, "example.pepXML");

            Console.WriteLine("Writting to " + filePath);
            using (PepXmlWriter writer = new PepXmlWriter(filePath))
            {
                writer.WriteSampleProtease(Protease.Trypsin);

                writer.StartSearchSummary("OMSSA", true, true);

                writer.WriteProteinDatabase("Resources/yeast_uniprot_120226.fasta");

                writer.WriteSearchProtease(Protease.Trypsin, 3);

                writer.WriteModification(ModificationDictionary.GetModification("Acetyl"), ModificationSites.K | ModificationSites.NPep);
                writer.WriteModification(ModificationDictionary.GetModification("CAM"), ModificationSites.C);

                writer.WriteModification(ModificationDictionary.GetModification("Phospho"), ModificationSites.S | ModificationSites.T | ModificationSites.Y, false);

                writer.SetCurrentStage(PepXmlWriter.Stage.Spectra, true);

                writer.StartSpectrum(15, 1.234, 523.4324, 3);

                PeptideSpectralMatch psm = new PeptideSpectralMatch(PeptideSpectralMatchScoreType.OmssaEvalue);
                psm.Score = 1.5e-5;
                Protein protein = new Protein("", "Test Protein");
                psm.Peptide = new Peptide("DEREK",protein);
                psm.Charge = 3;
                writer.WritePSM(psm);

                writer.EndSpectrum();
            }
        }
        public override PeptideSpectralMatch Search(IMassSpectrum massSpectrum, Peptide peptide, FragmentTypes fragmentTypes, Tolerance productMassTolerance)
        {
            double[] eMasses = massSpectrum.MassSpectrum.GetMasses();
            double[] eIntenisties = massSpectrum.MassSpectrum.GetIntensities();
            double tic = massSpectrum.MassSpectrum.GetTotalIonCurrent();

            PeptideSpectralMatch psm = new PeptideSpectralMatch(DefaultPsmScoreType) {Peptide = peptide};
            double[] tMasses = peptide.Fragment(fragmentTypes).Select(frag => Mass.MzFromMass(frag.MonoisotopicMass, 1)).OrderBy(val => val).ToArray();
            double score = Search(eMasses, eIntenisties, tMasses, productMassTolerance, tic);
            psm.Score = score;

            return psm;
        }
Example #3
0
        public override IEnumerable<PeptideSpectralMatch> ReadNextPsm()
        {
            Protein prot;
            MSDataFile dataFile;
            foreach (OmssaPeptideSpectralMatch omssaPSM in _reader.GetRecords<OmssaPeptideSpectralMatch>())
            {
                Peptide peptide = new Peptide(omssaPSM.Sequence.ToUpper());
                SetFixedMods(peptide);
                SetDynamicMods(peptide, omssaPSM.Modifications);
                peptide.StartResidue = omssaPSM.StartResidue;
                peptide.EndResidue = omssaPSM.StopResidue;
                if (_proteins.TryGetValue(omssaPSM.Defline, out prot))
                {
                    peptide.Parent = prot;
                }

                PeptideSpectralMatch psm = new PeptideSpectralMatch();
                if (_extraColumns.Count > 0)
                {
                    foreach(string name in _extraColumns) {
                        psm.AddExtraData(name, _reader.GetField<string>(name));
                    }
                }
                psm.Peptide = peptide;
                psm.Score = omssaPSM.EValue;
                psm.Charge = omssaPSM.Charge;
                psm.ScoreType = PeptideSpectralMatchScoreType.EValue;
                psm.IsDecoy = omssaPSM.Defline.StartsWith("DECOY");
                psm.SpectrumNumber = omssaPSM.SpectrumNumber;
                psm.FileName = omssaPSM.FileName;

                string[] filenameparts = psm.FileName.Split('.');
                if (_dataFiles.TryGetValue(filenameparts[0], out dataFile))
                {
                    if (!dataFile.IsOpen)
                        dataFile.Open();
                    psm.Spectrum = dataFile[psm.SpectrumNumber] as MsnDataScan;
                }

                yield return psm;
            }
        }
Example #4
0
        public static void TestLastPeaks()
        {
            IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif);
            var                         prot            = new Protein("MMMM", null, null, null, mods);
            DigestionParams             digestionParams = new DigestionParams(minPeptideLength: 1);
            PeptideWithSetModifications thePep          = prot.Digest(digestionParams, new List <Modification>(), new List <Modification>()).First();

            var frags = new List <Product>();

            thePep.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags);
            var massArray = frags.Select(p => p.NeutralMass).ToArray();

            Array.Sort(massArray);
            double[]   intensities  = new double[] { 1, 1, 1 };
            double[]   mz           = new double[] { 1, 2, massArray[4].ToMz(1) };
            MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false);
            MsDataScan scan         = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null);

            PeptideSpectralMatch[]    globalPsms            = new PeptideSpectralMatch[1];
            Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 0, 1, null, new CommonParameters()) };
            CommonParameters          CommonParameters      = new CommonParameters(
                scoreCutoff: 1,
                productMassTolerance: new PpmTolerance(5),
                digestionParams: new DigestionParams(
                    maxMissedCleavages: 0,
                    minPeptideLength: 1,
                    maxModificationIsoforms: int.MaxValue,
                    initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain));
            bool writeSpectralLibrary = false;
            ClassicSearchEngine cse   = new ClassicSearchEngine(globalPsms, arrayOfSortedMS2Scans, new List <Modification>(), new List <Modification>(), null, null, null,
                                                                new List <Protein> {
                prot
            }, new OpenSearchMode(), CommonParameters, null, null, new List <string>(), writeSpectralLibrary);

            cse.Run();
            Assert.Less(globalPsms[0].Score, 2);
            Assert.Greater(globalPsms[0].Score, 1);
        }
Example #5
0
        public static void TestPsmHeader()
        {
            DigestionParams             digestionParams = new DigestionParams();
            PeptideWithSetModifications pepWithSetMods  = new Protein("MQQQQQQQ", "accession1").Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First();
            IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = new TestDataFile(pepWithSetMods, "quadratic");
            IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >   scann        = myMsDataFile.GetOneBasedScan(2) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >;
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(scann, 4, 1, null);
            PeptideSpectralMatch    psm  = new PeptideSpectralMatch(pepWithSetMods.CompactPeptide(TerminusType.None), 1, 2, 3, scan);

            var t            = psm.ToString();
            var tabsepheader = PeptideSpectralMatch.GetTabSeparatedHeader();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { pepWithSetMods.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pepWithSetMods
                  } }
            };

            psm.MatchToProteinLinkedPeptides(matching);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Tolerance          fragmentTolerance = new PpmTolerance(10);
            List <ProductType> lp = new List <ProductType> {
                ProductType.B
            };

            new LocalizationEngine(new List <PeptideSpectralMatch> {
                psm
            }, lp, myMsDataFile, fragmentTolerance, new List <string>(), false).Run();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            psm.SetFdrValues(6, 6, 6, 6, 6, 6, 0, 0, 0, false);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));
        }
Example #6
0
        public static void TestVeryCloseExperimentalsModern()
        {
            IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif);
            var             prot            = new Protein("MMMM", null, null, null, mods);
            DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1);
            var             thePep          = prot.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First();

            var massArray = thePep.CompactPeptide(TerminusType.None).ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> {
                ProductType.B, ProductType.Y
            });

            Array.Sort(massArray);
            double[]   intensities  = new double[] { 1, 1, 1, 1 };
            double[]   mz           = new double[] { 1, 2, massArray[4].ToMz(1), massArray[4].ToMz(1) + 1e-9 };
            MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false);
            MsDataScan scan         = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null);

            PeptideSpectralMatch[]    globalPsms            = new PeptideSpectralMatch[1];
            Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 600, 1, null) };
            CommonParameters          CommonParameters      = new CommonParameters(productMassTolerance: new PpmTolerance(5), scoreCutoff: 1, digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: int.MaxValue, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain));

            var indexEngine = new IndexingEngine(new List <Protein> {
                prot
            }, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <ProductType>
            {
                ProductType.B, ProductType.Y
            }, 1, DecoyType.Reverse, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters, 30000, new List <string>());
            var indexResults = (IndexingResults)indexEngine.Run();
            var cse          = new ModernSearchEngine(globalPsms, arrayOfSortedMS2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, 0, CommonParameters, new OpenSearchMode(), 0, new List <string>());

            cse.Run();
            Assert.Less(globalPsms[0].Score, 2);
            Assert.Greater(globalPsms[0].Score, 1);
        }
Example #7
0
        public static void TestIdenticalPeaks()
        {
            IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif);
            mods.Add(1, new List <Modification> {
                new ModificationWithMass("Hehe", null, motif, TerminusLocalization.NProt, 18.010565, null, null, null, null)
            });
            var             prot            = new Protein("MMMM", null, null, null, mods);
            DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1);
            var             ye = prot.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First();

            var massArray = ye.CompactPeptide(TerminusType.None).ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> {
                ProductType.B, ProductType.Y
            });

            Array.Sort(massArray);
            double[]   intensities  = new double[] { 1, 1, 1, 1 };
            double[]   mz           = new double[] { massArray[0].ToMz(1), massArray[2].ToMz(1), massArray[4].ToMz(1), 10000 };
            MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false);
            MsDataScan scan         = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null);

            PeptideSpectralMatch[]    globalPsms            = new PeptideSpectralMatch[1];
            Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 0, 0, null) };
            CommonParameters          CommonParameters      = new CommonParameters(
                productMassTolerance: new PpmTolerance(5),
                scoreCutoff: 1,
                digestionParams: new DigestionParams(
                    maxMissedCleavages: 0,
                    minPeptideLength: 1,
                    initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain));
            ClassicSearchEngine cse = new ClassicSearchEngine(globalPsms, arrayOfSortedMS2Scans, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <Protein> {
                prot
            }, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, new OpenSearchMode(), CommonParameters, new List <string>());

            cse.Run();
            Assert.AreEqual(globalPsms[0].MatchedFragmentIons.Count, 3);
        }
Example #8
0
        public static void TestIdenticalPeaks()
        {
            IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif);
            mods.Add(1, new List <Modification> {
                new Modification(_originalId: "Hehe", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 18.010565)
            });
            var             prot            = new Protein("MMMM", null, null, null, mods);
            DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1);
            var             ye = prot.Digest(digestionParams, new List <Modification>(), new List <Modification>()).First();

            var frags = new List <Product>();

            ye.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags);
            var massArray = frags.Select(p => p.NeutralMass).ToArray();

            Array.Sort(massArray);
            double[]   intensities  = new double[] { 1, 1, 1, 1 };
            double[]   mz           = new double[] { massArray[0].ToMz(1), massArray[2].ToMz(1), massArray[4].ToMz(1), 10000 };
            MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false);
            MsDataScan scan         = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null);

            PeptideSpectralMatch[]    globalPsms            = new PeptideSpectralMatch[1];
            Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 0, 1, null, new CommonParameters()) };
            CommonParameters          CommonParameters      = new CommonParameters(
                productMassTolerance: new PpmTolerance(5),
                scoreCutoff: 1,
                digestionParams: new DigestionParams(
                    maxMissedCleavages: 0,
                    minPeptideLength: 1,
                    initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain));
            ClassicSearchEngine cse = new ClassicSearchEngine(globalPsms, arrayOfSortedMS2Scans, new List <Modification>(), new List <Modification>(), null, null, null, new List <Protein> {
                prot
            }, new OpenSearchMode(), CommonParameters, null, new List <string>());

            cse.Run();
            Assert.AreEqual(3, globalPsms[0].MatchedFragmentIons.Count);
        }
Example #9
0
        public static void TestClassicSearchEngineTopDown()
        {
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(protease: "top-down"),
                scoreCutoff: 1,
                assumeOrphanPeaksAreZ1Fragments: false);

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            // test output file name (should be proteoform and not peptide)
            Assert.That(GlobalVariables.AnalyteType == "Proteoform");

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();
            var proteinList           = new List <Protein>
            {
                new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312")
            };

            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML"));

            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            bool writeSpetralLibrary            = false;

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), writeSpetralLibrary).Run();

            var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();

            Assert.That(psm.MatchedFragmentIons.Count == 47);
        }
        public override SortedMaxSizedContainer<PeptideSpectralMatch> Search(IMassSpectrum spectrum, IEnumerable<Peptide> peptides, FragmentTypes fragmentTypes, Tolerance productMassTolerance)
        {
            SortedMaxSizedContainer<PeptideSpectralMatch> results = new SortedMaxSizedContainer<PeptideSpectralMatch>(MaxMatchesPerSpectrum);

            double[] eMasses = spectrum.MassSpectrum.GetMasses();
            double[] eIntenisties = spectrum.MassSpectrum.GetIntensities();
            double tic = spectrum.MassSpectrum.GetTotalIonCurrent();
            ;

            foreach (var peptide in peptides)
            {
                PeptideSpectralMatch psm = new PeptideSpectralMatch(DefaultPsmScoreType) {Peptide = peptide};
                double[] tMasses =
                    peptide.Fragment(fragmentTypes)
                        .Select(frag => Mass.MzFromMass(frag.MonoisotopicMass, 1))
                        .OrderBy(val => val)
                        .ToArray();
                double score = Search(eMasses, eIntenisties, tMasses, productMassTolerance, tic);
                psm.Score = score;
                results.Add(psm);
            }

            return results;
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            Status("Extracting data points:");
            // The final training point list

            int numMs1MassChargeCombinationsConsidered = 0;
            int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>();
            List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>();

            int numIdentifications = goodIdentifications.Count;

            // Loop over identifications

            HashSet <string> sequences = new HashSet <string>();

            object lockObj  = new object();
            object lockObj2 = new object();

            Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff =>
            {
                for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++)
                {
                    PeptideSpectralMatch identification = goodIdentifications[matchIndex];

                    // Each identification has an MS2 spectrum attached to it.
                    int ms2scanNumber = identification.ScanNumber;
                    int peptideCharge = identification.ScanPrecursorCharge;
                    if (identification.FullSequence == null)
                    {
                        continue;
                    }

                    var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First();

                    // Get the peptide, don't forget to add the modifications!!!!
                    var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas;
                    if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0))
                    {
                        continue;
                    }
                    Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas);

                    var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification);

                    // If MS2 has low evidence for peptide, skip and go to next one
                    if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification)
                    {
                        continue;
                    }

                    lock (lockObj2)
                    {
                        Ms2List.AddRange(ms2tuple.Item1);
                        numMs2MassChargeCombinationsConsidered += ms2tuple.Item2;
                        numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3;
                        if (sequences.Contains(identification.FullSequence))
                        {
                            continue; // Do not search same sequence multiple times in MS1 scans
                        }
                        sequences.Add(identification.FullSequence);
                    }

                    // Calculate isotopic distribution of the full peptide
                    var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001);

                    double[] theoreticalMasses      = dist.Masses.ToArray();
                    double[] theoreticalIntensities = dist.Intensities.ToArray();

                    Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                    var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification);

                    var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification);

                    lock (lockObj)
                    {
                        Ms1List.AddRange(ms1tupleBack.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3;
                        Ms1List.AddRange(ms1tupleForward.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3;
                    }
                }
            });

            return(new DataPointAquisitionResults(this,
                                                  Ms1List,
                                                  Ms2List,
                                                  numMs1MassChargeCombinationsConsidered,
                                                  numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks,
                                                  numMs2MassChargeCombinationsConsidered,
                                                  numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks
                                                  ));
        }
        private (List <LabeledMs2DataPoint>, int, int, int) SearchMS2Spectrum(IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2DataScan, Proteomics.Peptide peptide, int peptideCharge, PeptideSpectralMatch identification)
        {
            List <LabeledMs2DataPoint> result          = new List <LabeledMs2DataPoint>();
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numFragmentsIdentified = 0;

            if (ms2DataScan.MassSpectrum.Size == 0)
            {
                return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified);
            }

            // Key: mz value, Value: error
            var addedPeaks = new Dictionary <double, double>();

            var countForThisMS2  = 0;
            var countForThisMS2a = 0;

            var scanWindowRange = ms2DataScan.ScanWindowRange;

            IHasChemicalFormula[] fragmentList = peptide.Fragment(fragmentTypesForCalibration, true).OfType <IHasChemicalFormula>().ToArray();

            foreach (var fragment in fragmentList)
            {
                bool     fragmentIdentified    = false;
                bool     computedIsotopologues = false;
                double[] masses      = new double[0];
                double[] intensities = new double[0];
                // First look for monoisotopic masses, do not compute distribution spectrum!

                for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++)
                {
                    var monoisotopicMZ = fragment.MonoisotopicMass.ToMz(chargeToLookAt);
                    if (monoisotopicMZ > scanWindowRange.Maximum)
                    {
                        continue;
                    }
                    if (monoisotopicMZ < scanWindowRange.Minimum)
                    {
                        break;
                    }
                    var closestPeakMZ = ms2DataScan.MassSpectrum.GetClosestPeakXvalue(monoisotopicMZ);

                    if (mzToleranceForMs2Search.Within(closestPeakMZ.Value, monoisotopicMZ) && !computedIsotopologues)
                    {
                        var dist = IsotopicDistribution.GetDistribution(fragment.ThisChemicalFormula, fineResolutionForIsotopeDistCalculation, 0.001);

                        masses      = dist.Masses.ToArray();
                        intensities = dist.Intensities.ToArray();

                        Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x)));
                        computedIsotopologues = true;
                        break;
                    }
                }

                if (computedIsotopologues)
                {
                    bool startingToAdd = false;
                    for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++)
                    {
                        if (masses.First().ToMz(chargeToLookAt) > scanWindowRange.Maximum)
                        {
                            continue;
                        }
                        if (masses.Last().ToMz(chargeToLookAt) < scanWindowRange.Minimum)
                        {
                            break;
                        }
                        var trainingPointsToAverage = new List <LabeledMs2DataPoint>();
                        foreach (double a in masses)
                        {
                            double theMZ = a.ToMz(chargeToLookAt);
                            var    npwr  = ms2DataScan.MassSpectrum.NumPeaksWithinRange(mzToleranceForMs2Search.GetMinimumValue(theMZ), mzToleranceForMs2Search.GetMaximumValue(theMZ));
                            if (npwr == 0)
                            {
                                break;
                            }
                            numMs2MassChargeCombinationsConsidered++;
                            if (npwr > 1)
                            {
                                numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++;
                                continue;
                            }

                            var closestPeakIndex = ms2DataScan.MassSpectrum.GetClosestPeakIndex(theMZ);
                            var closestPeakMZ    = ms2DataScan.MassSpectrum.XArray[closestPeakIndex.Value];

                            if (!addedPeaks.ContainsKey(closestPeakMZ))
                            {
                                addedPeaks.Add(closestPeakMZ, Math.Abs(closestPeakMZ - theMZ));
                                trainingPointsToAverage.Add(new LabeledMs2DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(ms2DataScan.MassSpectrum.YArray[closestPeakIndex.Value]), theMZ, null));
                            }
                        }
                        // If started adding and suddnely stopped, go to next one, no need to look at higher charges
                        if (trainingPointsToAverage.Count == 0 && startingToAdd)
                        {
                            break;
                        }
                        if (trainingPointsToAverage.Count < Math.Min(minMS2isotopicPeaksNeededForConfirmedIdentification, intensities.Count()))
                        {
                        }
                        else
                        {
                            startingToAdd = true;
                            if (!fragmentIdentified)
                            {
                                fragmentIdentified      = true;
                                numFragmentsIdentified += 1;
                            }

                            countForThisMS2 += trainingPointsToAverage.Count;
                            countForThisMS2a++;
                            result.Add(new LabeledMs2DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(),
                                                               ms2DataScan.RetentionTime,
                                                               Math.Log(ms2DataScan.TotalIonCurrent),
                                                               ms2DataScan.InjectionTime.HasValue ? Math.Log(ms2DataScan.InjectionTime.Value) : double.NaN,
                                                               trainingPointsToAverage.Select(b => b.logIntensity).Average(),
                                                               trainingPointsToAverage.Select(b => b.expectedMZ).Average(),
                                                               identification));
                        }
                    }
                }
            }

            return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified);
        }
        private (List <LabeledMs1DataPoint>, int, int) SearchMS1Spectra(double[] theoreticalMasses, double[] theoreticalIntensities, int ms2spectrumIndex, int direction, int peptideCharge, PeptideSpectralMatch identification)
        {
            List <LabeledMs1DataPoint> result          = new List <LabeledMs1DataPoint>();
            int numMs1MassChargeCombinationsConsidered = 0;
            int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;

            int theIndex;

            theIndex = direction == 1 ? ms2spectrumIndex : ms2spectrumIndex - 1;

            bool addedAscan = true;

            int highestKnownChargeForThisPeptide = peptideCharge;

            while (theIndex >= 1 && theIndex <= myMsDataFile.NumSpectra && addedAscan)
            {
                int countForThisScan = 0;
                if (myMsDataFile.GetOneBasedScan(theIndex).MsnOrder > 1)
                {
                    theIndex += direction;
                    continue;
                }
                addedAscan = false;
                var fullMS1scan     = myMsDataFile.GetOneBasedScan(theIndex);
                var scanWindowRange = fullMS1scan.ScanWindowRange;
                var fullMS1spectrum = fullMS1scan.MassSpectrum;
                if (fullMS1spectrum.Size == 0)
                {
                    break;
                }

                bool startingToAddCharges = false;
                int  chargeToLookAt       = 1;
                do
                {
                    if (theoreticalMasses[0].ToMz(chargeToLookAt) > scanWindowRange.Maximum)
                    {
                        chargeToLookAt++;
                        continue;
                    }
                    if (theoreticalMasses[0].ToMz(chargeToLookAt) < scanWindowRange.Minimum)
                    {
                        break;
                    }
                    var trainingPointsToAverage = new List <LabeledMs1DataPoint>();
                    foreach (double a in theoreticalMasses)
                    {
                        double theMZ = a.ToMz(chargeToLookAt);

                        var npwr = fullMS1spectrum.NumPeaksWithinRange(mzToleranceForMs1Search.GetMinimumValue(theMZ), mzToleranceForMs1Search.GetMaximumValue(theMZ));
                        if (npwr == 0)
                        {
                            break;
                        }
                        numMs1MassChargeCombinationsConsidered++;
                        if (npwr > 1)
                        {
                            numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++;
                            continue;
                        }

                        var closestPeakIndex = fullMS1spectrum.GetClosestPeakIndex(theMZ);
                        var closestPeakMZ    = fullMS1spectrum.XArray[closestPeakIndex.Value];

                        highestKnownChargeForThisPeptide = Math.Max(highestKnownChargeForThisPeptide, chargeToLookAt);
                        trainingPointsToAverage.Add(new LabeledMs1DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(fullMS1spectrum.YArray[closestPeakIndex.Value]), theMZ, null));
                    }
                    // If started adding and suddnely stopped, go to next one, no need to look at higher charges
                    if (trainingPointsToAverage.Count == 0 && startingToAddCharges)
                    {
                        break;
                    }
                    if ((trainingPointsToAverage.Count == 0 || (trainingPointsToAverage.Count == 1 && theoreticalIntensities[0] < 0.65)) && (peptideCharge <= chargeToLookAt))
                    {
                        break;
                    }
                    if ((trainingPointsToAverage.Count == 1 && theoreticalIntensities[0] < 0.65) ||
                        trainingPointsToAverage.Count < Math.Min(minMS1isotopicPeaksNeededForConfirmedIdentification, theoreticalIntensities.Count()))
                    {
                    }
                    else
                    {
                        addedAscan           = true;
                        startingToAddCharges = true;
                        countForThisScan++;
                        result.Add(new LabeledMs1DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(),
                                                           fullMS1scan.RetentionTime,
                                                           Math.Log(fullMS1scan.TotalIonCurrent),
                                                           fullMS1scan.InjectionTime.HasValue ? Math.Log(fullMS1scan.InjectionTime.Value) : double.NaN,
                                                           trainingPointsToAverage.Select(b => b.logIntensity).Average(),
                                                           trainingPointsToAverage.Select(b => b.expectedMZ).Average(),
                                                           identification));
                    }
                    chargeToLookAt++;
                } while (chargeToLookAt <= highestKnownChargeForThisPeptide + 1);
                theIndex += direction;
            }
            return(result, numMs1MassChargeCombinationsConsidered, numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks);
        }
Example #14
0
        public static void TestAnalysisEngineTests()
        {
            List <DigestionMotif> motifs = new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            };
            Protease protease = new Protease("Custom Protease5", CleavageSpecificity.Full, null, null, motifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(
                    protease: protease.Name,
                    maxMissedCleavages: 0,
                    minPeptideLength: 1,
                    maxModificationIsoforms: 1042),
                scoreCutoff: 1,
                productMassTolerance: new PpmTolerance(10));
            var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fsp.Add(("", CommonParameters));

            List <Modification> localizeableModifications = new List <Modification>();
            List <Modification> variableModifications     = new List <Modification>();
            List <Modification> fixedModifications        = new List <Modification>();

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>();

            foreach (var mod in fixedModifications)
            {
                modsDictionary.Add(mod, 0);
            }
            int i = 1;

            foreach (var mod in variableModifications)
            {
                modsDictionary.Add(mod, (ushort)i);
                i++;
            }
            foreach (var mod in localizeableModifications)
            {
                modsDictionary.Add(mod, (ushort)i);
                i++;
            }

            var proteinList = new List <Protein> {
                new Protein("MNNNKQQQ", "accession")
            };
            var modPep = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value1 = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            PeptideWithSetModifications compactPeptide1 = value1.First();

            Assert.AreEqual("QQQ", value1.First().BaseSequence);
            var modPep2 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).First();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                modPep2
            };
            PeptideWithSetModifications compactPeptide2 = value2.First();

            Assert.AreEqual("MNNNK", value2.First().BaseSequence);

            var modPep3 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[1];
            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                modPep3
            };
            PeptideWithSetModifications compactPeptide3 = value3.First();

            Assert.AreEqual("NNNK", value3.First().BaseSequence);


            Ms2ScanWithSpecificMass scanA = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 2, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null), 1, 1, null, new CommonParameters());
            Ms2ScanWithSpecificMass scanB = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 3, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=2", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null), 2 + 132.040, 1, null, new CommonParameters());
            Ms2ScanWithSpecificMass scanC = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 4, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=3", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null), 3, 1, null, new CommonParameters());

            PeptideSpectralMatch matchA = new PeptideSpectralMatch(compactPeptide1, 0, 0, 0, scanA, CommonParameters, new List <MatchedFragmentIon>());
            PeptideSpectralMatch matchB = new PeptideSpectralMatch(compactPeptide2, 0, 0, 0, scanB, CommonParameters, new List <MatchedFragmentIon>());
            PeptideSpectralMatch matchC = new PeptideSpectralMatch(compactPeptide3, 0, 0, 0, scanC, CommonParameters, new List <MatchedFragmentIon>());

            var newPsms = new List <PeptideSpectralMatch> {
                matchA, matchB, matchC
            };

            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> {
                value1.First(), value2.First(), value3.First()
            });

            var searchMode = new SinglePpmAroundZeroSearchMode(5);
            Action <List <PeptideSpectralMatch>, string, List <string> > action2 = (List <PeptideSpectralMatch> l, string s, List <string> sdf) => {; };

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var arrayOfMs2ScansSortedByMass = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            Action <BinTreeStructure, string> action1 = (BinTreeStructure l, string s) =>
            {
                Assert.AreEqual(1, l.FinalBins.Count);
            };

            FdrAnalysisEngine engine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, CommonParameters, fsp, new List <string> {
                "ff"
            });

            engine.Run();
        }
Example #15
0
        public static void TestDeltaValues()
        {
            CommonParameters CommonParameters = new CommonParameters(scoreCutoff: 1, useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5));

            SearchParameters SearchParameters = new SearchParameters
            {
                MassDiffAcceptorType = MassDiffAcceptorType.Exact,
            };
            List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList();
            List <Modification> fixedModifications    = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.ModificationType, b.IdWithMotif))).ToList();

            // Generate data for files
            Protein TargetProtein1    = new Protein("TIDEANTHE", "accession1");
            Protein TargetProtein2    = new Protein("TIDELVE", "accession2");
            Protein TargetProtein3    = new Protein("TIDENIE", "accession3");
            Protein TargetProteinLost = new Protein("PEPTIDEANTHE", "accession4");
            Protein DecoyProteinFound = new Protein("PETPLEDQGTHE", "accessiond", isDecoy: true);

            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                DecoyProteinFound.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0]
            });

            var proteinList = new List <Protein> {
                TargetProtein1, TargetProtein2, TargetProtein3, TargetProteinLost, DecoyProteinFound
            };

            var searchModes = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            //check better when using delta
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();

            var indexEngine  = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.None, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>());
            var indexResults = (IndexingResults)indexEngine.Run();
            MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            PeptideSpectralMatch[] allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run();

            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            FdrAnalysisResults fdrResultsModernDelta  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run());

            Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3);
            Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3);

            CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5));

            //check worse when using score
            FdrAnalysisResults fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            FdrAnalysisResults fdrResultsModern  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());

            Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 0);
            Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 0);

            //check that when delta is bad, we used the score
            // Generate data for files
            Protein DecoyProtein1     = new Protein("TLEDAGGTHE", "accession1d", isDecoy: true);
            Protein DecoyProtein2     = new Protein("TLEDLVE", "accession2d", isDecoy: true);
            Protein DecoyProtein3     = new Protein("TLEDNIE", "accession3d", isDecoy: true);
            Protein DecoyProteinShiny = new Protein("GGGGGG", "accessionShinyd", isDecoy: true);

            myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                DecoyProteinShiny.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
            });

            proteinList = new List <Protein>
            {
                TargetProtein1, DecoyProtein1,
                TargetProtein2, DecoyProtein2,
                TargetProtein3, DecoyProtein3,
                DecoyProteinShiny,
            };

            listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            //check no change when using delta
            allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();

            CommonParameters = new CommonParameters(useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5));

            indexEngine        = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.None, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>());
            indexResults       = (IndexingResults)indexEngine.Run();
            massDiffAcceptor   = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);
            allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run();

            fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            fdrResultsModernDelta  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run());
            Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3);
            Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3);

            CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5));

            //check no change when using score
            fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            fdrResultsModern  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run());
            Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 3);
            Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 3);
        }
Example #16
0
        public static void TestComputePEPValue()
        {
            var              variableModifications = new List <Modification>();
            var              fixedModifications    = new List <Modification>();
            var              origDataFile          = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager    myFileManager         = new MyFileManager(true);
            CommonParameters CommonParameters      = new CommonParameters(digestionParams: new DigestionParams());
            var              myMsDataFile          = myFileManager.LoadFile(origDataFile, CommonParameters);
            var              searchModes           = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                      ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);
            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, CommonParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();
            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());

            var nonNullPsms             = allPsmsArray.Where(p => p != null).ToList();
            var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList();
            var accessionCounts         = PEP_Analysis.GetAccessionCounts(nonNullPsms);

            var maxScore    = nonNullPsms.Select(n => n.Score).Max();
            var maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First();

            Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>();

            List <string> sequences = new List <string>();

            foreach (PeptideSpectralMatch psm in nonNullPsms)
            {
                var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList();
                sequences.Add(String.Join("|", ss));
            }

            var s = sequences.GroupBy(i => i);

            foreach (var grp in s)
            {
                sequenceToPsmCount.Add(grp.Key, grp.Count());
            }

            var maxPsmData = PEP_Analysis.CreateOnePsmDataFromPsm(maxScorePsm, accessionCounts, sequenceToPsmCount);

            Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count, Is.EqualTo(maxPsmData.Ambiguity));
            Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05));
            Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05));

            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount));
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount));
            Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch));
            Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount));
            Assert.That(maxScorePsm.ScanPrecursorCharge, Is.EqualTo(maxPsmData.ScanPrecursorCharge));

            PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms);

            int trueCount = 0;

            foreach (var item in allPsmsArray.Where(p => p != null))
            {
                var b = item.FdrInfo.PEP;
                if (b >= 0.5)
                {
                    trueCount++;
                }
            }

            Assert.GreaterOrEqual(32, trueCount);
        }
Example #17
0
        public static void TryFailSequenceCoverage()
        {
            var prot1 = new Protein("MMKMMK", "prot1");

            ModificationMotif.TryGetMotif("M", out ModificationMotif motifM);
            Modification mod1 = new Modification(_originalId: "mod1", _modificationType: "mt", _target: motifM, _locationRestriction: "N-terminal.", _monoisotopicMass: 10);
            Modification mod2 = new Modification(_originalId: "mod2", _modificationType: "mt", _target: motifM, _locationRestriction: "Peptide N-terminal.", _monoisotopicMass: 10);
            Modification mod3 = new Modification(_originalId: "mod3", _modificationType: "mt", _target: motifM, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);

            ModificationMotif.TryGetMotif("K", out ModificationMotif motifK);
            Modification mod4 = new Modification(_originalId: "mod4", _modificationType: "mt", _target: motifK, _locationRestriction: "Peptide C-terminal.", _monoisotopicMass: 10);
            Modification mod5 = new Modification(_originalId: "mod5", _modificationType: "mt", _target: motifK, _locationRestriction: "C-terminal.", _monoisotopicMass: 10);

            Dictionary <int, Modification> modsFor1 = new Dictionary <int, Modification>
            {
                { 1, mod1 },
                { 3, mod3 },
                { 5, mod4 },
            };
            Dictionary <int, Modification> modsFor2 = new Dictionary <int, Modification>
            {
                { 1, mod2 },
                { 5, mod5 },
            };
            Dictionary <int, Modification> modsFor3 = new Dictionary <int, Modification>
            {
                { 1, mod1 },
                { 5, mod3 },
                { 8, mod5 }
            };

            DigestionParams digestionParams = new DigestionParams();
            var             pwsm1           = new PeptideWithSetModifications(prot1, digestionParams, 1, 3, CleavageSpecificity.Unknown, "", 0, modsFor1, 0);
            var             pwsm2           = new PeptideWithSetModifications(prot1, digestionParams, 4, 6, CleavageSpecificity.Unknown, "", 0, modsFor2, 0);
            var             pwsm3           = new PeptideWithSetModifications(prot1, digestionParams, 1, 6, CleavageSpecificity.Unknown, "", 0, modsFor3, 0);

            HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications>
            {
                pwsm1,
                pwsm2,
                pwsm3,
            };

            IScan scan = new ThisTestScan();
            var   psm1 = new PeptideSpectralMatch(pwsm1, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            var psm2 = new PeptideSpectralMatch(pwsm2, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            var psm3 = new PeptideSpectralMatch(pwsm3, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);


            List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3,
            };

            newPsms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine  ppe  = new ProteinParsimonyEngine(newPsms, true, new CommonParameters(), new List <string>());
            ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run();

            ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>());

            psafe.Run();

            fjkd.ProteinGroups.First().CalculateSequenceCoverage();

            var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First();

            Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList);
            var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();

            Assert.AreEqual("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]", firstSequenceCoverageDisplayListWithMods);

            var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();

            Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa3"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa5"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]"));
        }
        public static void AddCompIonsCommonParams()
        {
            CommonParameters cp = new CommonParameters(null, DissociationType.HCD, DissociationType.Unknown, DissociationType.Unknown, null, true, true, 3, 12, true, true, 1,
                                                       5, 200, 0.01, null, null, false, false, true, false, null, null, null, -1, null, null, null, 1, true, 4, 1);

            var myMsDataFile              = new TestDataFile();
            var variableModifications     = new List <Modification>();
            var fixedModifications        = new List <Modification>();
            var localizeableModifications = new List <Modification>();
            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>();

            foreach (var mod in fixedModifications)
            {
                modsDictionary.Add(mod, 0);
            }

            int ii = 1;

            foreach (var mod in variableModifications)
            {
                modsDictionary.Add(mod, (ushort)ii);
                ii++;
            }
            foreach (var mod in localizeableModifications)
            {
                modsDictionary.Add(mod, (ushort)ii);
                ii++;
            }

            var proteinList = new List <Protein> {
                new Protein("MNNNKQQQ", null)
            };

            SearchParameters SearchParameters = new SearchParameters
            {
                MassDiffAcceptorType = MassDiffAcceptorType.Exact,
                SearchTarget         = true,
            };
            List <DigestionMotif> motifs = new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            };
            Protease protease = new Protease("Test", CleavageSpecificity.Full, null, null, motifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fsp.Add(("", cp));

            var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, new List <SilacLabel>(), null, null,
                                                 1, DecoyType.Reverse, cp, fsp, SearchParameters.MaxFragmentSize, false, new List <FileInfo>(), TargetContaminantAmbiguity.RemoveContaminant, new List <string>());

            var indexResults = (IndexingResults)indexEngine.Run();

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(cp.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            // without complementary ions
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            var mse = new ModernSearchEngine(allPsmsArray, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, cp, fsp, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, new List <string>()).Run();
        }
        public static void TestReverseDecoyGenerationDuringSearch()
        {
            CommonParameters CommonParameters = new CommonParameters();

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();

            var proteinList = new List <Protein>
            {
                new Protein("KKAEDGINK", ""), new Protein("AVNSISLK", ""), new Protein("EKAEAEAEK", ""), new Protein("DITANLR", ""), new Protein("QNAIGTAK", ""),
                new Protein("FHKSQLNK", ""), new Protein("KQVAQWNK", ""), new Protein("NTRIEELK", ""), new Protein("RQPAQPR", ""),
            };
            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML"));


            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();


            var path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\myPrositLib.msp");

            var testLibrary = new SpectralLibrary(new List <string> {
                path
            });



            //test when doing spectral library search without generating library
            PeptideSpectralMatch[] allPsmsArray1 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray1, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, testLibrary, new List <string>(), false).Run();
            var psm1 = allPsmsArray1.Where(p => p != null).ToList();

            Assert.That(psm1[0].IsDecoy == false && psm1[0].FullSequence == "DITANLR");
            Assert.That(psm1[1].IsDecoy == true && psm1[1].FullSequence == "LSISNVAK");
            Assert.That(psm1[2].IsDecoy == true && psm1[2].FullSequence == "LSISNVAK");
            Assert.That(psm1[3].IsDecoy == false && psm1[3].FullSequence == "RQPAQPR");
            Assert.That(psm1[4].IsDecoy == false && psm1[4].FullSequence == "KKAEDGINK");
            Assert.That(psm1[5].IsDecoy == false && psm1[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm1[6].IsDecoy == false && psm1[6].FullSequence == "EKAEAEAEK");


            proteinList.Add(new Protein("LSISNVAK", "", isDecoy: true));
            //test when doing spectral library search with generating library; non spectral search won't generate decoy by "decoy on the fly" , so proteinlist used by non spectral library search would contain decoys
            PeptideSpectralMatch[] allPsmsArray2 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray2, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, testLibrary, new List <string>(), true).Run();
            var psm2 = allPsmsArray2.Where(p => p != null).ToList();

            Assert.That(psm2[0].IsDecoy == false && psm2[0].FullSequence == "DITANLR");
            Assert.That(psm2[1].IsDecoy == true && psm2[1].FullSequence == "LSISNVAK");
            Assert.That(psm2[2].IsDecoy == true && psm2[2].FullSequence == "LSISNVAK");
            Assert.That(psm2[3].IsDecoy == false && psm2[3].FullSequence == "RQPAQPR");
            Assert.That(psm2[4].IsDecoy == false && psm2[4].FullSequence == "KKAEDGINK");
            Assert.That(psm2[5].IsDecoy == false && psm2[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm2[6].IsDecoy == false && psm2[6].FullSequence == "EKAEAEAEK");

            //test when doing non spectral library search without generating library
            PeptideSpectralMatch[] allPsmsArray3 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray3, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run();
            var psm3 = allPsmsArray3.Where(p => p != null).ToList();

            Assert.That(psm3[0].IsDecoy == false && psm3[0].FullSequence == "DITANLR");
            Assert.That(psm3[1].IsDecoy == true && psm3[1].FullSequence == "LSISNVAK");
            Assert.That(psm3[2].IsDecoy == true && psm3[2].FullSequence == "LSISNVAK");
            Assert.That(psm3[3].IsDecoy == false && psm3[3].FullSequence == "RQPAQPR");
            Assert.That(psm3[4].IsDecoy == false && psm3[4].FullSequence == "KKAEDGINK");
            Assert.That(psm3[5].IsDecoy == false && psm3[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm3[6].IsDecoy == false && psm3[6].FullSequence == "EKAEAEAEK");


            //test when doing non spectral library search with generating library
            PeptideSpectralMatch[] allPsmsArray4 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray4, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run();
            var psm4 = allPsmsArray4.Where(p => p != null).ToList();

            Assert.That(psm4[0].IsDecoy == false && psm4[0].FullSequence == "DITANLR");
            Assert.That(psm4[1].IsDecoy == true && psm4[1].FullSequence == "LSISNVAK");
            Assert.That(psm4[2].IsDecoy == true && psm4[2].FullSequence == "LSISNVAK");
            Assert.That(psm4[3].IsDecoy == false && psm4[3].FullSequence == "RQPAQPR");
            Assert.That(psm4[4].IsDecoy == false && psm4[4].FullSequence == "KKAEDGINK");
            Assert.That(psm4[5].IsDecoy == false && psm4[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm4[6].IsDecoy == false && psm4[6].FullSequence == "EKAEAEAEK");


            //compare psm's target/decoy results in 4 conditions. they should be same as new decoy methods shouldn't change the t/d results
            for (int i = 0; i < psm1.Count; i++)
            {
                Assert.That(psm1[i].FullSequence == psm2[i].FullSequence && psm3[i].FullSequence == psm3[i].FullSequence && psm2[i].FullSequence == psm3[i].FullSequence);
                Assert.That(psm1[i].IsDecoy == psm2[i].IsDecoy && psm3[i].IsDecoy == psm3[i].IsDecoy && psm2[i].IsDecoy == psm3[i].IsDecoy);
            }

            //compare MetaMorpheus scores in 4 conditions; for some psms, they should have a little higher score when "generating library" as they switch to all charges ions matching function
            for (int j = 0; j < psm1.Count; j++)
            {
                if (psm1[j].FullSequence == psm2[j].FullSequence && psm1[j].MatchedFragmentIons.Count != psm2[j].MatchedFragmentIons.Count)
                {
                    Assert.That(psm1[j].Score < psm2[j].Score);
                }
            }
        }
        public static void TestMatchIonsOfAllChargesTopDown()
        {
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(protease: "top-down"),
                scoreCutoff: 1,
                assumeOrphanPeaksAreZ1Fragments: false);

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            // test output file name (should be proteoform and not peptide)
            Assert.That(GlobalVariables.AnalyteType == "Proteoform");

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();
            var proteinList           = new List <Protein>
            {
                new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312")
            };

            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML"));

            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            //search by new method of looking for all charges
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run();

            var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();

            Assert.That(psm.MatchedFragmentIons.Count == 62);


            //search by old method of looking for only one charge
            PeptideSpectralMatch[] allPsmsArray_oneCharge = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray_oneCharge, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run();

            var psm_oneCharge = allPsmsArray_oneCharge.Where(p => p != null).FirstOrDefault();

            Assert.That(psm_oneCharge.MatchedFragmentIons.Count == 47);

            //compare 2 scores , they should have same integer but new search has a little higher score than old search
            Assert.That(psm.Score > psm_oneCharge.Score);
            Assert.AreEqual(Math.Truncate(psm.Score), 47);
            Assert.AreEqual(Math.Truncate(psm_oneCharge.Score), 47);

            //compare 2 results and evaluate the different matched ions
            var peptideTheorProducts = new List <Product>();
            var differences          = psm.MatchedFragmentIons.Except(psm_oneCharge.MatchedFragmentIons);

            psm.BestMatchingPeptides.First().Peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts);
            foreach (var ion in differences)
            {
                foreach (var product in peptideTheorProducts)
                {
                    if (product.Annotation.ToString().Equals(ion.NeutralTheoreticalProduct.Annotation.ToString()))
                    {
                        //to see if the different matched ions are qualified
                        Assert.That(CommonParameters.ProductMassTolerance.Within(ion.Mz.ToMass(ion.Charge), product.NeutralMass));
                    }
                }
            }
        }
Example #21
0
        public void WritePSM(PeptideSpectralMatch psm, int hitRank = 1)
        {
            _writer.WriteStartElement("search_hit");
            _writer.WriteAttributeString("hit_rank", hitRank.ToString());
            _writer.WriteAttributeString("peptide", psm.Peptide.Sequence);
            _writer.WriteAttributeString("peptide_prev_aa" , (psm.Peptide.PreviousAminoAcid != null) ? psm.Peptide.PreviousAminoAcid.Letter.ToString() : "-");
            _writer.WriteAttributeString("peptide_next_aa", (psm.Peptide.NextAminoAcid != null) ? psm.Peptide.NextAminoAcid.Letter.ToString() : "-");

            double pepMonoMass = psm.Peptide.MonoisotopicMass;
            double massDifference = spectrumNeutralMass - pepMonoMass;
            _writer.WriteAttributeString("calc_neutral_pep_mass", pepMonoMass.ToString());
            _writer.WriteAttributeString("massdiff", massDifference.ToString());

            Protein protein = psm.Peptide.Parent as Protein;

            if(protein != null) {
                _writer.WriteAttributeString("protein", protein.Description);
                _writer.WriteAttributeString("protein_descr", protein.Description);
            }

            _writer.WriteAttributeString("num_tot_proteins", "1");
            _writer.WriteAttributeString("is_rejected", "0");

            _writer.WriteStartElement("search_score");
            _writer.WriteAttributeString("name", Enum.GetName(typeof(PeptideSpectralMatchScoreType), psm.ScoreType));
            _writer.WriteAttributeString("value", psm.Score.ToString());
            _writer.WriteEndElement(); // search_score

            _writer.WriteEndElement(); // search_hit
        }
        public static void TestMatchIonsOfAllChargesBottomUp()
        {
            CommonParameters CommonParameters = new CommonParameters();

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();

            var proteinList = new List <Protein>
            {
                new Protein("AAAHSSLK", ""), new Protein("RQPAQPR", ""), new Protein("EKAEAEAEK", "")
            };
            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML"));


            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            //search by new method of looking for all charges
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run();
            var psm = allPsmsArray.Where(p => p != null).ToList();

            Assert.That(psm[1].MatchedFragmentIons.Count == 14);
            //there are ions with same product type and same fragment number but different charges
            Assert.That(psm[1].MatchedFragmentIons[8].NeutralTheoreticalProduct.ProductType == psm[1].MatchedFragmentIons[9].NeutralTheoreticalProduct.ProductType &&
                        psm[1].MatchedFragmentIons[8].NeutralTheoreticalProduct.FragmentNumber == psm[1].MatchedFragmentIons[9].NeutralTheoreticalProduct.FragmentNumber &&
                        psm[1].MatchedFragmentIons[8].Charge != psm[1].MatchedFragmentIons[9].Charge);
            Assert.That(psm[2].MatchedFragmentIons.Count == 14);
            Assert.That(psm[4].MatchedFragmentIons.Count == 16);

            //search by old method of looking for only one charge
            PeptideSpectralMatch[] allPsmsArray_oneCharge = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray_oneCharge, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run();
            var psm_oneCharge = allPsmsArray_oneCharge.Where(p => p != null).ToList();

            //compare 2 scores , they should have same integer part but new search has a little higher score than old search
            Assert.That(psm[1].Score > psm_oneCharge[1].Score);
            Assert.AreEqual(Math.Truncate(psm[1].Score), 12);
            Assert.AreEqual(Math.Truncate(psm_oneCharge[1].Score), 12);

            //compare 2 results and evaluate the different matched ions
            var peptideTheorProducts = new List <Product>();

            Assert.That(psm_oneCharge[1].MatchedFragmentIons.Count == 12);
            var differences = psm[1].MatchedFragmentIons.Except(psm_oneCharge[1].MatchedFragmentIons);

            psm[1].BestMatchingPeptides.First().Peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts);
            foreach (var ion in differences)
            {
                foreach (var product in peptideTheorProducts)
                {
                    if (product.Annotation.ToString().Equals(ion.NeutralTheoreticalProduct.Annotation.ToString()))
                    {
                        //to see if the different matched ions are qualified
                        Assert.That(CommonParameters.ProductMassTolerance.Within(ion.Mz.ToMass(ion.Charge), product.NeutralMass));
                    }
                }
            }

            //test specific condition: unknown fragment mass; this only happens rarely for sequences with unknown amino acids
            var myMsDataFile1          = new TestDataFile();
            var variableModifications1 = new List <Modification>();
            var fixedModifications1    = new List <Modification>();
            var proteinList1           = new List <Protein> {
                new Protein("QXQ", null)
            };
            var productMassTolerance = new AbsoluteTolerance(0.01);
            var searchModes          = new OpenSearchMode();

            Tolerance DeconvolutionMassTolerance1 = new PpmTolerance(5);

            var listOfSortedms2Scans1 = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            List <DigestionMotif> motifs = new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            };
            Protease protease = new Protease("Custom Protease3", CleavageSpecificity.Full, null, null, motifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            CommonParameters CommonParameters1 = new CommonParameters(
                digestionParams: new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1),
                scoreCutoff: 1,
                addCompIons: false);
            var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fsp.Add(("", CommonParameters));
            PeptideSpectralMatch[] allPsmsArray1 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            bool writeSpectralLibrary = true;

            new ClassicSearchEngine(allPsmsArray1, listOfSortedms2Scans1, variableModifications1, fixedModifications1, null, null, null,
                                    proteinList1, searchModes, CommonParameters1, fsp, null, new List <string>(), writeSpectralLibrary).Run();

            var psm1 = allPsmsArray1.Where(p => p != null).ToList();

            Assert.AreEqual(psm1.Count, 222);
        }
Example #23
0
        private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
        {
            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return;
            }

            // calculate FDR on a per-protease basis (targets and decoys for a specific protease)
            var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease);

            foreach (var proteasePsms in psmsGroupedByProtease)
            {
                var psms = proteasePsms.ToList();

                // generate the null distribution for e-value calculations
                double globalMeanScore = 0;
                int    globalMeanCount = 0;

                if (CalculateEValue && psms.Any())
                {
                    List <double> combinedScores = new List <double>();

                    foreach (PeptideSpectralMatch psm in psms)
                    {
                        psm.AllScores.Sort();
                        combinedScores.AddRange(psm.AllScores);

                        //remove top scoring peptide
                        if (combinedScores.Any())
                        {
                            combinedScores.RemoveAt(combinedScores.Count - 1);
                        }
                    }

                    if (combinedScores.Any())
                    {
                        globalMeanScore = combinedScores.Average();
                        globalMeanCount = (int)((double)combinedScores.Count / psms.Count);
                    }
                    else
                    {
                        // should be a very rare case... if there are PSMs but each PSM only has one hit
                        globalMeanScore = 0;
                        globalMeanCount = 0;
                    }
                }

                //Calculate delta scores for the psms (regardless of if we are using them)
                foreach (PeptideSpectralMatch psm in psms)
                {
                    if (psm != null)
                    {
                        psm.CalculateDeltaScore(ScoreCutoff);
                    }
                }

                //determine if Score or DeltaScore performs better
                if (UseDeltaScore)
                {
                    const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR

                    List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);
                    scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);

                    //sort by best method
                    myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs;
                    psms = myAnalysisResults.DeltaScoreImprovement ?
                           psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() :
                           psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }
                else //sort by score
                {
                    psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }

                double cumulativeTarget = 0;
                double cumulativeDecoy  = 0;

                //set up arrays for local FDRs
                double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1];
                double[] cumulativeDecoyPerNotch  = new double[MassDiffAcceptorNumNotches + 1];

                //Assign FDR values to PSMs
                for (int i = 0; i < psms.Count; i++)
                {
                    // Stop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }

                    PeptideSpectralMatch psm = psms[i];
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.IsDecoy)
                    {
                        // the PSM can be ambiguous between a target and a decoy sequence
                        // in that case, count it as the fraction of decoy hits
                        // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy
                        double decoyHits = 0;
                        double totalHits = 0;
                        var    hits      = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence);
                        foreach (var hit in hits)
                        {
                            if (hit.First().Peptide.Protein.IsDecoy)
                            {
                                decoyHits++;
                            }
                            totalHits++;
                        }

                        cumulativeDecoy += decoyHits / totalHits;
                        cumulativeDecoyPerNotch[notch] += decoyHits / totalHits;
                    }
                    else
                    {
                        cumulativeTarget++;
                        cumulativeTargetPerNotch[notch]++;
                    }

                    double qValue      = Math.Min(1, cumulativeDecoy / cumulativeTarget);
                    double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]);

                    double maximumLikelihood = 0;
                    double eValue            = 0;
                    double eScore            = 0;
                    if (CalculateEValue)
                    {
                        eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood);
                        eScore = -Math.Log(eValue, 10);
                    }

                    psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue);
                }

                // set q-value thresholds such that a lower scoring PSM can't have
                // a higher confidence than a higher scoring PSM
                //Populate min qValues
                double   qValueThreshold      = 1.0;
                double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1];
                for (int i = 0; i < qValueNotchThreshold.Length; i++)
                {
                    qValueNotchThreshold[i] = 1.0;
                }

                for (int i = psms.Count - 1; i >= 0; i--)
                {
                    PeptideSpectralMatch psm = psms[i];

                    // threshold q-values
                    if (psm.FdrInfo.QValue > qValueThreshold)
                    {
                        psm.FdrInfo.QValue = qValueThreshold;
                    }
                    else if (psm.FdrInfo.QValue < qValueThreshold)
                    {
                        qValueThreshold = psm.FdrInfo.QValue;
                    }

                    // threshold notch q-values
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch])
                    {
                        psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch];
                    }
                    else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch])
                    {
                        qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch;
                    }
                }
            }

            if (AnalysisType == "PSM")
            {
                CountPsm();
            }
        }
Example #24
0
        public static void TryFailSequenceCoverage()
        {
            var prot1 = new Protein("MMKMMK", "prot1");

            ModificationMotif.TryGetMotif("M", out ModificationMotif motifM);
            ModificationWithMass mod1 = new ModificationWithMass("mod1", "mt", motifM, TerminusLocalization.NProt, 10);
            ModificationWithMass mod2 = new ModificationWithMass("mod2", "mt", motifM, TerminusLocalization.NPep, 10);
            ModificationWithMass mod3 = new ModificationWithMass("mod3", "mt", motifM, TerminusLocalization.Any, 10);

            ModificationMotif.TryGetMotif("K", out ModificationMotif motifK);
            ModificationWithMass mod4 = new ModificationWithMass("mod4", "mt", motifK, TerminusLocalization.PepC, 10);
            ModificationWithMass mod5 = new ModificationWithMass("mod5", "mt", motifK, TerminusLocalization.ProtC, 10);

            Dictionary <int, ModificationWithMass> modsFor1 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod1 },
                { 3, mod3 },
                { 5, mod4 },
            };
            Dictionary <int, ModificationWithMass> modsFor2 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod2 },
                { 5, mod5 },
            };
            Dictionary <int, ModificationWithMass> modsFor3 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod1 },
                { 5, mod3 },
                { 8, mod5 }
            };

            DigestionParams digestionParams = new DigestionParams();
            var             pwsm1           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 3, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor1, numFixedMods: 0);
            var             pwsm2           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 4, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor2, numFixedMods: 0);
            var             pwsm3           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor3, numFixedMods: 0);

            HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications>
            {
                pwsm1,
                pwsm2,
                pwsm3,
            };

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { pwsm1.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm1
                  } },
                { pwsm2.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm2
                  } },
                { pwsm3.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm3
                  } },
            };

            IScan scan = new ThisTestScan();
            var   psm1 = new PeptideSpectralMatch(pwsm1.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm1.MatchToProteinLinkedPeptides(matching);
            var psm2 = new PeptideSpectralMatch(pwsm2.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm2.MatchToProteinLinkedPeptides(matching);
            var psm3 = new PeptideSpectralMatch(pwsm3.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm3.MatchToProteinLinkedPeptides(matching);

            List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3,
            };

            ProteinParsimonyEngine  ppe  = new ProteinParsimonyEngine(matching, true, new CommonParameters(), new List <string>());
            ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run();

            ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>());

            psafe.Run();

            fjkd.ProteinGroups.First().CalculateSequenceCoverage();

            var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First();

            Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList);
            var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();

            Assert.AreEqual("[mod1]-MM[mod3]KM[mod3]MK-[mod5]", firstSequenceCoverageDisplayListWithMods);

            var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();

            Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1,info:occupancy=1.00(2/2)]"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa3"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa5"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5,info:occupancy=1.00(2/2)]"));
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            double progress           = 0;
            int    oldPercentProgress = 0;

            ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds));
            TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp);

            byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff;

            Parallel.ForEach(Partitioner.Create(0, listOfSortedms2Scans.Length), new ParallelOptions {
                MaxDegreeOfParallelism = CommonParameters.MaxThreadsToUsePerFile
            }, range =>
            {
                byte[] scoringTable = new byte[peptideIndex.Count];
                HashSet <int> idsOfPeptidesPossiblyObserved = new HashSet <int>();

                for (int i = range.Item1; i < range.Item2; i++)
                {
                    // empty the scoring table to score the new scan (conserves memory compared to allocating a new array)
                    Array.Clear(scoringTable, 0, scoringTable.Length);
                    idsOfPeptidesPossiblyObserved.Clear();
                    var scan = listOfSortedms2Scans[i];

                    //get bins to add points to
                    List <int> allBinsToSearch = GetBinsToSearch(scan);

                    for (int j = 0; j < allBinsToSearch.Count; j++)
                    {
                        fragmentIndex[allBinsToSearch[j]].ForEach(id => scoringTable[id]++);
                    }

                    //populate ids of possibly observed with those containing allowed precursor masses
                    List <int> binsToSearch   = new List <int>();
                    int obsPrecursorFloorMz   = (int)Math.Floor(CommonParameters.PrecursorMassTolerance.GetMinimumValue(scan.PrecursorMass) * fragmentBinsPerDalton);
                    int obsPrecursorCeilingMz = (int)Math.Ceiling(CommonParameters.PrecursorMassTolerance.GetMaximumValue(scan.PrecursorMass) * fragmentBinsPerDalton);
                    for (int fragmentBin = obsPrecursorFloorMz; fragmentBin <= obsPrecursorCeilingMz; fragmentBin++)
                    {
                        binsToSearch.Add(fragmentBin);
                    }

                    foreach (ProductType pt in lp)
                    {
                        int binShift;
                        switch (pt)
                        {
                        case ProductType.B:
                            binShift = bBinShift;
                            break;

                        case ProductType.Y:
                            binShift = 0;
                            break;

                        case ProductType.C:
                            binShift = cBinShift;
                            break;

                        case ProductType.Zdot:
                            binShift = zdotBinShift;
                            break;

                        default:
                            throw new NotImplementedException();
                        }
                        for (int j = 0; j < binsToSearch.Count; j++)
                        {
                            int bin = binsToSearch[j] - binShift;
                            if (bin < fragmentIndex.Length && fragmentIndex[bin] != null)
                            {
                                fragmentIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id));
                            }
                        }
                    }

                    for (int j = 0; j < binsToSearch.Count; j++)
                    {
                        int bin = binsToSearch[j];
                        if (bin < fragmentIndexPrecursor.Length && fragmentIndexPrecursor[bin] != null)
                        {
                            fragmentIndexPrecursor[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id));
                        }
                    }

                    // done with initial scoring; refine scores and create PSMs
                    if (idsOfPeptidesPossiblyObserved.Any())
                    {
                        int maxInitialScore = idsOfPeptidesPossiblyObserved.Max(id => scoringTable[id]) + 1;
                        while (maxInitialScore > CommonParameters.ScoreCutoff)
                        {
                            maxInitialScore--;
                            foreach (var id in idsOfPeptidesPossiblyObserved.Where(id => scoringTable[id] == maxInitialScore))
                            {
                                var candidatePeptide    = peptideIndex[id];
                                double[] fragmentMasses = candidatePeptide.ProductMassesMightHaveDuplicatesAndNaNs(lp).Distinct().Where(p => !Double.IsNaN(p)).OrderBy(p => p).ToArray();

                                double peptideScore = CalculatePeptideScore(scan.TheScan, CommonParameters.ProductMassTolerance, fragmentMasses, scan.PrecursorMass, dissociationTypes, addCompIons, maximumMassThatFragmentIonScoreIsDoubled);

                                Tuple <int, double> notchAndPrecursor = Accepts(scan.PrecursorMass, candidatePeptide, terminusType, massDiffAcceptor);
                                if (notchAndPrecursor.Item1 >= 0)
                                {
                                    CompactPeptideWithModifiedMass cp = new CompactPeptideWithModifiedMass(candidatePeptide, notchAndPrecursor.Item2);

                                    if (globalPsms[i] == null)
                                    {
                                        globalPsms[i] = new PeptideSpectralMatch(cp, notchAndPrecursor.Item1, peptideScore, i, scan);
                                    }
                                    else
                                    {
                                        globalPsms[i].AddOrReplace(cp, peptideScore, notchAndPrecursor.Item1, CommonParameters.ReportAllAmbiguity);
                                    }
                                }
                            }
                            if (globalPsms[i] != null)
                            {
                                break;
                            }
                        }
                    }

                    // report search progress
                    progress++;
                    var percentProgress = (int)((progress / listOfSortedms2Scans.Length) * 100);

                    if (percentProgress > oldPercentProgress)
                    {
                        oldPercentProgress = percentProgress;
                        ReportProgress(new ProgressEventArgs(percentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds));
                    }
                }
            });
            return(new MetaMorpheusEngineResults(this));
        }
        public static void TestCompIons_ModernSearch()
        {
            var myMsDataFile              = new TestDataFile();
            var variableModifications     = new List <Modification>();
            var fixedModifications        = new List <Modification>();
            var localizeableModifications = new List <Modification>();
            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>();

            foreach (var mod in fixedModifications)
            {
                modsDictionary.Add(mod, 0);
            }

            int ii = 1;

            foreach (var mod in variableModifications)
            {
                modsDictionary.Add(mod, (ushort)ii);
                ii++;
            }
            foreach (var mod in localizeableModifications)
            {
                modsDictionary.Add(mod, (ushort)ii);
                ii++;
            }

            var proteinList = new List <Protein> {
                new Protein("MNNNKQQQ", null)
            };

            SearchParameters SearchParameters = new SearchParameters
            {
                MassDiffAcceptorType = MassDiffAcceptorType.Exact,
                SearchTarget         = true,
            };
            List <DigestionMotif> motifs = new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            };
            Protease protease = new Protease("singleN4", CleavageSpecificity.Full, null, null, motifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(protease: protease.Name, minPeptideLength: 1), scoreCutoff: 1);
            var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fsp.Add(("", CommonParameters));

            CommonParameters withCompIons = new CommonParameters(digestionParams: new DigestionParams(protease: protease.Name, minPeptideLength: 1), scoreCutoff: 1, addCompIons: true);
            var fspComp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fspComp.Add(("", CommonParameters));

            var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, null, null,
                                                 1, DecoyType.Reverse, CommonParameters, fsp, SearchParameters.MaxFragmentSize, false, new List <FileInfo>(), TargetContaminantAmbiguity.RemoveContaminant, new List <string>());

            var indexResults = (IndexingResults)indexEngine.Run();

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            // without complementary ions
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ModernSearchEngine(allPsmsArray, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, fsp, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, new List <string>()).Run();

            // with complementary ions
            PeptideSpectralMatch[] allPsmsArray2 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ModernSearchEngine(allPsmsArray2, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, withCompIons, fspComp, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, new List <string>()).Run();

            // Single search mode
            Assert.AreEqual(allPsmsArray.Length, allPsmsArray2.Length);

            // Single ms2 scan
            Assert.AreEqual(allPsmsArray.Length, allPsmsArray2.Length);
            Assert.That(allPsmsArray[0] != null);
            Assert.That(allPsmsArray2[0] != null);

            Assert.IsTrue(allPsmsArray2[0].Score > 1);

            Assert.AreEqual(allPsmsArray[0].ScanNumber, allPsmsArray2[0].ScanNumber);

            Assert.IsTrue(allPsmsArray2[0].Score <= allPsmsArray[0].Score * 2 && allPsmsArray2[0].Score > allPsmsArray[0].Score + 3);
        }
Example #27
0
        public static void MatchInternalFragmentIons(PeptideSpectralMatch[] fileSpecificPsms, Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass, CommonParameters combinedParams, int minInternalFragmentLength)
        {
            //for each PSM with an ID
            for (int index = 0; index < fileSpecificPsms.Length; index++)
            {
                PeptideSpectralMatch psm = fileSpecificPsms[index];
                if (psm != null && psm.BestMatchingPeptides.Count() > 0)
                {
                    //Get the scan
                    Ms2ScanWithSpecificMass scanForThisPsm   = arrayOfMs2ScansSortedByMass[index];
                    DissociationType        dissociationType = combinedParams.DissociationType == DissociationType.Autodetect ?
                                                               scanForThisPsm.TheScan.DissociationType.Value : combinedParams.DissociationType;

                    //Get the theoretical peptides
                    List <PeptideWithSetModifications> ambiguousPeptides = new List <PeptideWithSetModifications>();
                    List <int> notches = new List <int>();
                    foreach (var(Notch, Peptide) in psm.BestMatchingPeptides)
                    {
                        ambiguousPeptides.Add(Peptide);
                        notches.Add(Notch);
                    }

                    //get matched ions for each peptide
                    List <List <MatchedFragmentIon> > matchedIonsForAllAmbiguousPeptides = new List <List <MatchedFragmentIon> >();
                    List <Product> internalFragments = new List <Product>();
                    foreach (PeptideWithSetModifications peptide in ambiguousPeptides)
                    {
                        internalFragments.Clear();
                        peptide.FragmentInternally(combinedParams.DissociationType, minInternalFragmentLength, internalFragments);
                        //TODO: currently, internal and terminal ions can match to the same observed peaks (much like how b- and y-ions can match to the same peaks). Investigate if we should change that...
                        matchedIonsForAllAmbiguousPeptides.Add(MetaMorpheusEngine.MatchFragmentIons(scanForThisPsm, internalFragments, combinedParams));
                    }

                    //Find the max number of matched ions
                    int maxNumMatchedIons = matchedIonsForAllAmbiguousPeptides.Max(x => x.Count);

                    //remove peptides if they have fewer than max-1 matched ions, thus requiring at least two internal ions to disambiguate an ID
                    //if not removed, then add the matched internal ions
                    HashSet <PeptideWithSetModifications> PeptidesToMatchingInternalFragments = new HashSet <PeptideWithSetModifications>();
                    for (int peptideIndex = 0; peptideIndex < ambiguousPeptides.Count; peptideIndex++)
                    {
                        //if we should remove the theoretical, remove it
                        if (matchedIonsForAllAmbiguousPeptides[peptideIndex].Count + 1 < maxNumMatchedIons)
                        {
                            psm.RemoveThisAmbiguousPeptide(notches[peptideIndex], ambiguousPeptides[peptideIndex]);
                        }
                        // otherwise add the matched internal ions to the total ions
                        else
                        {
                            PeptideWithSetModifications currentPwsm = ambiguousPeptides[peptideIndex];
                            //check that we haven't already added the matched ions for this peptide
                            if (!PeptidesToMatchingInternalFragments.Contains(currentPwsm))
                            {
                                PeptidesToMatchingInternalFragments.Add(currentPwsm);                                                    //record that we've seen this peptide
                                psm.PeptidesToMatchingFragments[currentPwsm].AddRange(matchedIonsForAllAmbiguousPeptides[peptideIndex]); //add the matched ions
                            }
                        }
                    }
                }
            }
        }
Example #28
0
        public static void TestPTMOutput()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motif);
            variableModifications.Add(new Modification(_originalId: "resMod", _modificationType: "HaHa", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQ", "accession")
            };
            var protease = new Protease("CustomProtease", CleavageSpecificity.Full, null, null, new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            });

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1);

            var modPep = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            PeptideWithSetModifications compactPeptide1 = value.First();

            Assert.AreEqual("QQQ", value.First().FullSequence);//this might be base

            var firstProtDigest = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).ToList();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[0]
            };
            PeptideWithSetModifications compactPeptide2 = value2.First();

            Assert.AreEqual("MNNNSK", value2.First().FullSequence);//this might be base

            HashSet <PeptideWithSetModifications> value2mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[1]
            };
            PeptideWithSetModifications compactPeptide2mod = value2mod.Last();

            Assert.AreEqual("MNNNS[HaHa:resMod on S]K", value2mod.Last().FullSequence);//this might be base

            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[2]
            };
            PeptideWithSetModifications compactPeptide3 = value3.First();

            Assert.AreEqual("NNNSK", value3.First().FullSequence);//this might be base
            HashSet <PeptideWithSetModifications> value3mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[3]
            };

            PeptideWithSetModifications compactPeptide3mod = value3mod.Last();

            Assert.AreEqual("NNNS[HaHa:resMod on S]K", value3mod.Last().FullSequence);//this might be base

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var protein in proteinList)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <Modification>(), variableModifications))
                {
                    peptideList.Add(peptide);
                }
            }

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File", new CommonParameters());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3
            };

            psms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(psms, true, new CommonParameters(), new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod on S,info:occupancy=0.67(2/3)];", proteinGroups.First().ModsInfo[0]);
        }
Example #29
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            if (SearchParameters.DoQuantification)
            {
                // disable quantification if a .mgf is being used
                if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
                {
                    SearchParameters.DoQuantification = false;
                }
                //if we're doing SILAC, assign and add the silac labels to the residue dictionary
                else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                {
                    char heavyLabel = 'a'; //char to assign
                    //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls
                    if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                    {
                        //original silacLabels object is null, so we need to initialize it
                        SearchParameters.SilacLabels = new List <SilacLabel>();
                        if (SearchParameters.StartTurnoverLabel != null)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel;
                            SearchParameters.SilacLabels.Add(SearchParameters.StartTurnoverLabel);
                        }
                        if (SearchParameters.EndTurnoverLabel != null)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel;
                            SearchParameters.SilacLabels.Add(SearchParameters.EndTurnoverLabel);
                        }
                    }
                    else
                    {
                        //change the silac residues to lower case amino acids (currently null)
                        List <SilacLabel> updatedLabels = new List <SilacLabel>();
                        for (int i = 0; i < SearchParameters.SilacLabels.Count; i++)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            updatedLabels.Add(updatedLabel.updatedLabel);
                        }
                        SearchParameters.SilacLabels = updatedLabels;
                    }
                }
            }
            //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis
            if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf
            {
                SearchParameters.SilacLabels = null;
            }

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            SanitizeProteinDatabase(proteinList, SearchParameters.TCAmbiguity);

            // load spectral libraries
            var spectralLibrary = LoadSpectralLibraries(taskId, dbFilenameList);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy)
                                            + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            //generate an array to store category specific fdr values (for speedy semi/nonspecific searches)
            int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero

            List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories];
            for (int i = 0; i < numFdrCategories; i++)
            {
                allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>();
            }

            FlashLfqResults flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <PeptideWithSetModifications> peptideIndex = null;
                        List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions,
                                                                                ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                             SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, FileSpecificParameters,
                                                             SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex  = null;
                        List <int>[] precursorIndex = null;

                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition,
                                               combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        if (GlobalVariables.StopLoops)
                        {
                            break;
                        }
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals
                    for (int i = 0; i < numFdrCategories; i++)                                                                      //only add if we're using for FDR, else ignore it as null.
                    {
                        fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                    }

                    //create params for N, C, or both if semi
                    List <CommonParameters> paramsToUse = new List <CommonParameters> {
                        combinedParams
                    };
                    if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything
                    {
                        paramsToUse.Clear();
                        List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> {
                            FragmentationTerminus.N, FragmentationTerminus.C
                        };
                        foreach (FragmentationTerminus terminus in terminiToUse) //set both termini
                        {
                            paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus));
                        }
                    }

                    //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides
                    List <int>[] coisolationIndex = new List <int>[] { new List <int>() };
                    if (arrayOfMs2ScansSortedByMass.Length != 0)
                    {
                        int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber);
                        coisolationIndex = new List <int> [maxScanNumber + 1];
                        for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++)
                        {
                            int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber;
                            if (coisolationIndex[scanNumber] == null)
                            {
                                coisolationIndex[scanNumber] = new List <int> {
                                    i
                                };
                            }
                            else
                            {
                                coisolationIndex[scanNumber].Add(i);
                            }
                        }
                        coisolationIndex = coisolationIndex.Where(x => x != null).ToArray();
                    }

                    //foreach terminus we're going to look at
                    foreach (CommonParameters paramToUse in paramsToUse)
                    {
                        //foreach database partition
                        for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++)
                        {
                            List <PeptideWithSetModifications> peptideIndex = null;

                            List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions,
                                                                                    ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions));

                            List <int>[] fragmentIndex  = null;
                            List <int>[] precursorIndex = null;

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                 SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, FileSpecificParameters,
                                                                 SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                            {
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex,
                                                              precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor,
                                                              SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId));
                            if (GlobalVariables.StopLoops)
                            {
                                break;
                            }
                        }
                    }
                    lock (psmLock)
                    {
                        for (int i = 0; i < allCategorySpecificPsms.Length; i++)
                        {
                            if (allCategorySpecificPsms[i] != null)
                            {
                                allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]);
                            }
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                         SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId, SearchParameters.WriteSpectralLibrary);
                    newClassicSearchEngine.Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }

                //look for internal fragments
                if (SearchParameters.MinAllowedInternalFragmentLength != 0)
                {
                    MatchInternalFragmentIons(fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams, SearchParameters.MinAllowedInternalFragmentLength);
                }

                // calculate/set spectral angles if there is a spectral library being used
                if (spectralLibrary != null)
                {
                    Status("Calculating spectral library similarity...", thisId);
                }
                SpectralLibrarySearchFunction.CalculateSpectralAngles(spectralLibrary, fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams);

                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms);
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            if (spectralLibrary != null)
            {
                spectralLibrary.CloseConnections();
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            //resolve category specific fdrs (for speedy semi and nonspecific
            if (SearchParameters.SearchType == SearchType.NonSpecific)
            {
                allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters);
            }

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters
            {
                SearchTaskResults             = MyTaskResults,
                SearchTaskId                  = taskId,
                SearchParameters              = SearchParameters,
                ProteinList                   = proteinList,
                AllPsms                       = allPsms,
                VariableModifications         = variableModifications,
                FixedModifications            = fixedModifications,
                ListOfDigestionParams         = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)),
                CurrentRawFileList            = currentRawFileList,
                MyFileManager                 = myFileManager,
                NumNotches                    = numNotches,
                OutputFolder                  = OutputFolder,
                IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"),
                FlashLfqResults               = flashLfqResults,
                FileSettingsList              = fileSettingsList,
                NumMs2SpectraPerFile          = numMs2SpectraPerFile,
                DatabaseFilenameList          = dbFilenameList
            };
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask
            {
                Parameters             = parameters,
                FileSpecificParameters = this.FileSpecificParameters,
                CommonParameters       = CommonParameters
            };

            return(postProcessing.Run());
        }
Example #30
0
        public static void FdrTestMethod()
        {
            MassDiffAcceptor searchModes = new DotMassDiffAcceptor(null, new List <double> {
                0, 1.0029
            }, new PpmTolerance(5));
            List <string> nestedIds = new List <string>();

            Protein         p = new Protein("MNKNNKNNNKNNNNK", null);
            DigestionParams digestionParams = new DigestionParams();
            var             digested        = p.Digest(digestionParams, new List <Modification>(), new List <Modification>()).ToList();

            PeptideWithSetModifications pep1 = digested[0];
            PeptideWithSetModifications pep2 = digested[1];
            PeptideWithSetModifications pep3 = digested[2];
            PeptideWithSetModifications pep4 = digested[3];

            TestDataFile t = new TestDataFile(new List <PeptideWithSetModifications> {
                pep1, pep2, pep3
            });

            MsDataScan mzLibScan1         = t.GetOneBasedScan(2);
            Ms2ScanWithSpecificMass scan1 = new Ms2ScanWithSpecificMass(mzLibScan1, pep1.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters());
            PeptideSpectralMatch    psm1  = new PeptideSpectralMatch(pep1, 0, 3, 0, scan1, digestionParams, new List <MatchedFragmentIon>());

            MsDataScan mzLibScan2         = t.GetOneBasedScan(4);
            Ms2ScanWithSpecificMass scan2 = new Ms2ScanWithSpecificMass(mzLibScan2, pep2.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters());
            PeptideSpectralMatch    psm2  = new PeptideSpectralMatch(pep2, 1, 2, 1, scan2, digestionParams, new List <MatchedFragmentIon>());

            MsDataScan mzLibScan3         = t.GetOneBasedScan(6);
            Ms2ScanWithSpecificMass scan3 = new Ms2ScanWithSpecificMass(mzLibScan3, pep3.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters());
            PeptideSpectralMatch    psm3  = new PeptideSpectralMatch(pep3, 0, 1, 2, scan3, digestionParams, new List <MatchedFragmentIon>());

            psm3.AddOrReplace(pep4, 1, 1, true, new List <MatchedFragmentIon>(), 0);

            var newPsms = new List <PeptideSpectralMatch> {
                psm1, psm2, psm3
            };

            foreach (PeptideSpectralMatch psm in newPsms)
            {
                psm.ResolveAllAmbiguities();
            }

            FdrAnalysisEngine fdr = new FdrAnalysisEngine(newPsms, searchModes.NumNotches, new CommonParameters(), nestedIds);

            fdr.Run();

            Assert.AreEqual(2, searchModes.NumNotches);
            Assert.AreEqual(0, newPsms[0].FdrInfo.CumulativeDecoyNotch);
            Assert.AreEqual(1, newPsms[0].FdrInfo.CumulativeTargetNotch);
            Assert.AreEqual(0, newPsms[1].FdrInfo.CumulativeDecoyNotch);
            Assert.AreEqual(1, newPsms[1].FdrInfo.CumulativeTargetNotch);
            Assert.AreEqual(0, newPsms[2].FdrInfo.CumulativeDecoyNotch);
            Assert.AreEqual(1, newPsms[2].FdrInfo.CumulativeTargetNotch);

            Assert.AreEqual(0, newPsms[0].FdrInfo.CumulativeDecoy);
            Assert.AreEqual(1, newPsms[0].FdrInfo.CumulativeTarget);
            Assert.AreEqual(0, newPsms[1].FdrInfo.CumulativeDecoy);
            Assert.AreEqual(2, newPsms[1].FdrInfo.CumulativeTarget);
            Assert.AreEqual(0, newPsms[2].FdrInfo.CumulativeDecoy);
            Assert.AreEqual(3, newPsms[2].FdrInfo.CumulativeTarget);
        }
Example #31
0
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            double progress           = 0;
            int    oldPercentProgress = 0;

            ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing modern search... " + CurrentPartition + "/" + commonParameters.TotalPartitions, nestedIds));

            byte byteScoreCutoff = (byte)commonParameters.ScoreCutoff;

            if (commonParameters.CalculateEValue)
            {
                byteScoreCutoff = 1;
            }

            int maxThreadsPerFile = commonParameters.MaxThreadsToUsePerFile;

            int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray();
            Parallel.ForEach(threads, (i) =>
            {
                byte[] scoringTable = new byte[PeptideIndex.Count];
                List <int> idsOfPeptidesPossiblyObserved = new List <int>();

                for (; i < ListOfSortedMs2Scans.Length; i += maxThreadsPerFile)
                {
                    // Stop loop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        return;
                    }

                    // empty the scoring table to score the new scan (conserves memory compared to allocating a new array)
                    Array.Clear(scoringTable, 0, scoringTable.Length);
                    idsOfPeptidesPossiblyObserved.Clear();
                    Ms2ScanWithSpecificMass scan = ListOfSortedMs2Scans[i];

                    // get fragment bins for this scan
                    List <int> allBinsToSearch = GetBinsToSearch(scan);

                    // get allowed theoretical masses from the known experimental mass
                    // note that this is the OPPOSITE of the classic search (which calculates experimental masses from theoretical values)
                    // this is just PRELIMINARY precursor-mass filtering
                    // additional checks are made later to ensure that the theoretical precursor mass is acceptable
                    IEnumerable <AllowedIntervalWithNotch> notches = MassDiffAcceptor.GetAllowedPrecursorMassIntervalsFromObservedMass(scan.PrecursorMass);

                    double lowestMassPeptideToLookFor  = notches.Min(p => p.AllowedInterval.Minimum);
                    double highestMassPeptideToLookFor = notches.Max(p => p.AllowedInterval.Maximum);

                    // first-pass scoring
                    IndexedScoring(allBinsToSearch, scoringTable, byteScoreCutoff, idsOfPeptidesPossiblyObserved, scan.PrecursorMass, lowestMassPeptideToLookFor, highestMassPeptideToLookFor, PeptideIndex, MassDiffAcceptor, MaxMassThatFragmentIonScoreIsDoubled, commonParameters.DissociationType);

                    // done with indexed scoring; refine scores and create PSMs
                    foreach (int id in idsOfPeptidesPossiblyObserved)
                    {
                        PeptideWithSetModifications peptide = PeptideIndex[id];

                        List <Product> peptideTheorProducts = peptide.Fragment(commonParameters.DissociationType, FragmentationTerminus.Both).ToList();

                        List <MatchedFragmentIon> matchedIons = MatchFragmentIons(scan, peptideTheorProducts, commonParameters);

                        double thisScore = CalculatePeptideScore(scan.TheScan, matchedIons);
                        int notch        = MassDiffAcceptor.Accepts(scan.PrecursorMass, peptide.MonoisotopicMass);

                        bool meetsScoreCutoff = thisScore >= commonParameters.ScoreCutoff;
                        bool scoreImprovement = PeptideSpectralMatches[i] == null || (thisScore - PeptideSpectralMatches[i].RunnerUpScore) > -PeptideSpectralMatch.ToleranceForScoreDifferentiation;

                        if (meetsScoreCutoff && scoreImprovement || commonParameters.CalculateEValue)
                        {
                            if (PeptideSpectralMatches[i] == null)
                            {
                                PeptideSpectralMatches[i] = new PeptideSpectralMatch(peptide, notch, thisScore, i, scan, commonParameters.DigestionParams, matchedIons);
                            }
                            else
                            {
                                PeptideSpectralMatches[i].AddOrReplace(peptide, thisScore, notch, commonParameters.ReportAllAmbiguity, matchedIons, 0);
                            }

                            if (commonParameters.CalculateEValue)
                            {
                                PeptideSpectralMatches[i].AllScores.Add(thisScore);
                            }
                        }
                    }

                    // report search progress
                    progress++;
                    var percentProgress = (int)((progress / ListOfSortedMs2Scans.Length) * 100);

                    if (percentProgress > oldPercentProgress)
                    {
                        oldPercentProgress = percentProgress;
                        ReportProgress(new ProgressEventArgs(percentProgress, "Performing modern search... " + CurrentPartition + "/" + commonParameters.TotalPartitions, nestedIds));
                    }
                }
            });

            // remove peptides below the score cutoff that were stored to calculate expectation values
            if (commonParameters.CalculateEValue)
            {
                for (int i = 0; i < PeptideSpectralMatches.Length; i++)
                {
                    if (PeptideSpectralMatches[i] != null && PeptideSpectralMatches[i].Score < commonParameters.ScoreCutoff)
                    {
                        PeptideSpectralMatches[i] = null;
                    }
                }
            }

            foreach (PeptideSpectralMatch psm in PeptideSpectralMatches.Where(p => p != null))
            {
                psm.ResolveAllAmbiguities();
            }

            return(new MetaMorpheusEngineResults(this));
        }
Example #32
0
        private static Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> GetInfo(bool localizeable)
        {
            CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: 2, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain, maxModsForPeptides: 1), scoreCutoff: 1);


            // Alanine = Glycine + CH2
            Protein protein1 = new Protein("MA", "protein1");
            Protein protein2 = new Protein("MG", "protein2");
            Protein protein3;
            double  monoisotopicMass = Chemistry.ChemicalFormula.ParseFormula("CH2").MonoisotopicMass;

            ModificationMotif.TryGetMotif("G", out ModificationMotif motif1);
            ModificationMotif.TryGetMotif("A", out ModificationMotif motif2);
            TerminusLocalization        modificationSites          = TerminusLocalization.Any;
            List <ModificationWithMass> allKnownFixedModifications = new List <ModificationWithMass>
            {
                new ModificationWithMass("CH2 on Glycine", null, motif1, modificationSites, monoisotopicMass)
            };
            List <ModificationWithMass> variableModifications;

            ModificationWithMass alanineMod = new ModificationWithMass("CH2 on Alanine", null, motif2, modificationSites, monoisotopicMass);

            if (localizeable)
            {
                variableModifications = new List <ModificationWithMass>();
                IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> >
                {
                    { 2, new List <Modification> {
                          alanineMod
                      } }
                };
                protein3 = new Protein("MA", "protein3", oneBasedModifications: oneBasedModifications);
            }
            else
            {
                variableModifications = new List <ModificationWithMass>();
                variableModifications = new List <ModificationWithMass> {
                    alanineMod
                };
                protein3 = new Protein("MA", "protein3");
            }

            var pepWithSetModifications1 = protein1.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First();

            var pepWithSetModifications2 = protein2.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First();

            var pepWithSetModifications3 = protein3.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).Last();

            CompactPeptide compactPeptide1         = new CompactPeptide(pepWithSetModifications1, TerminusType.None);
            CompactPeptide compactPeptideDuplicate = new CompactPeptide(pepWithSetModifications2, TerminusType.None);

            Assert.AreEqual(compactPeptide1, compactPeptideDuplicate);
            CompactPeptide compactPeptide2 = new CompactPeptide(pepWithSetModifications3, TerminusType.None);

            string                  fullFilePath    = null;
            int                     precursorCharge = 0;
            TestDataFile            testDataFile    = new TestDataFile();
            MsDataScan              mzLibScan       = testDataFile.GetOneBasedScan(2);
            Ms2ScanWithSpecificMass scan            = new Ms2ScanWithSpecificMass(mzLibScan, 0, precursorCharge, fullFilePath);
            int                     scanIndex       = 0;
            double                  score           = 0;
            int                     notch           = 0;
            PeptideSpectralMatch    psm1            = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            PeptideSpectralMatch psm2 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            PeptideSpectralMatch psm3 = new PeptideSpectralMatch(compactPeptide2, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3
            };

            MassDiffAcceptor massDiffAcceptors            = new SinglePpmAroundZeroSearchMode(5);
            SequencesToActualProteinPeptidesEngine stappe = new SequencesToActualProteinPeptidesEngine(newPsms, new List <Protein> {
                protein1, protein2, protein3
            },
                                                                                                       allKnownFixedModifications, variableModifications, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>());

            var haha = (SequencesToActualProteinPeptidesEngineResults)stappe.Run();
            var compactPeptideToProteinPeptideMatching = haha.CompactPeptideToProteinPeptideMatching;

            Assert.AreEqual(2, compactPeptideToProteinPeptideMatching.Count);

            psm1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);

            bool noOneHitWonders = false;

            return(new Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase>
                   (
                       newPsms, compactPeptideToProteinPeptideMatching, massDiffAcceptors, noOneHitWonders, compactPeptide1, compactPeptide2
                   ));
        }
Example #33
0
        public static List <PeptideSpectralMatch> ResolveFdrCategorySpecificPsms(List <PeptideSpectralMatch>[] AllPsms, int numNotches, string taskId, CommonParameters commonParameters)
        {
            //update all psms with peptide info
            AllPsms.ToList()
            .Where(psmArray => psmArray != null).ToList()
            .ForEach(psmArray => psmArray.Where(psm => psm != null).ToList()
                     .ForEach(psm => psm.ResolveAllAmbiguities()));

            foreach (List <PeptideSpectralMatch> psmsArray in AllPsms)
            {
                if (psmsArray != null)
                {
                    List <PeptideSpectralMatch> cleanedPsmsArray = psmsArray.Where(b => b != null).OrderByDescending(b => b.Score)
                                                                   .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                                                                   .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

                    new FdrAnalysisEngine(cleanedPsmsArray, numNotches, commonParameters, new List <string> {
                        taskId
                    }).Run();

                    for (int i = 0; i < psmsArray.Count; i++)
                    {
                        if (psmsArray[i] != null)
                        {
                            if (psmsArray[i].FdrInfo == null) //if it was grouped in the cleanedPsmsArray
                            {
                                psmsArray[i] = null;
                            }
                        }
                    }
                }
            }

            int[]      ranking           = new int[AllPsms.Length]; //high int is good ranking
            List <int> indexesOfInterest = new List <int>();

            for (int i = 0; i < ranking.Length; i++)
            {
                if (AllPsms[i] != null)
                {
                    ranking[i] = AllPsms[i].Where(x => x != null).Count(x => x.FdrInfo.QValue <= 0.01); //set ranking as number of psms above 1% FDR
                    indexesOfInterest.Add(i);
                }
            }

            //get the index of the category with the highest ranking
            int majorCategoryIndex = indexesOfInterest[0];

            for (int i = 1; i < indexesOfInterest.Count; i++)
            {
                int currentCategoryIndex = indexesOfInterest[i];
                if (ranking[currentCategoryIndex] > ranking[majorCategoryIndex])
                {
                    majorCategoryIndex = currentCategoryIndex;
                }
            }

            //update other category q-values
            //There's a chance of weird categories getting a random decoy before a random target, but we don't want to give that target a q value of zero.
            //We can't just take the q of the first decoy, because if the target wasn't random (score = 40), but there are no other targets before the decoy (score = 5), then we're incorrectly dinging the target
            //The current solution is such that if a minor category has a lower q value than it's corresponding score in the major category, then its q-value is changed to what it would be in the major category
            List <PeptideSpectralMatch> majorCategoryPsms = AllPsms[majorCategoryIndex].Where(x => x != null).OrderByDescending(x => x.Score).ToList(); //get sorted major category

            for (int i = 0; i < indexesOfInterest.Count; i++)
            {
                int minorCategoryIndex = indexesOfInterest[i];
                if (minorCategoryIndex != majorCategoryIndex)
                {
                    List <PeptideSpectralMatch> minorCategoryPsms = AllPsms[minorCategoryIndex].Where(x => x != null).OrderByDescending(x => x.Score).ToList(); //get sorted minor category
                    int minorPsmIndex = 0;
                    int majorPsmIndex = 0;
                    while (minorPsmIndex < minorCategoryPsms.Count && majorPsmIndex < majorCategoryPsms.Count) //while in the lists
                    {
                        PeptideSpectralMatch majorPsm = majorCategoryPsms[majorPsmIndex];
                        PeptideSpectralMatch minorPsm = minorCategoryPsms[minorPsmIndex];
                        //major needs to be a lower score than the minor
                        if (majorPsm.Score > minorPsm.Score)
                        {
                            majorPsmIndex++;
                        }
                        else
                        {
                            if (majorPsm.FdrInfo.QValue > minorPsm.FdrInfo.QValue)
                            {
                                minorPsm.FdrInfo.QValue = majorPsm.FdrInfo.QValue;
                            }
                            minorPsmIndex++;
                        }
                    }
                    //wrap up if we hit the end of the major category
                    while (minorPsmIndex < minorCategoryPsms.Count)
                    {
                        PeptideSpectralMatch majorPsm = majorCategoryPsms[majorPsmIndex - 1]; //-1 because it's out of index right now
                        PeptideSpectralMatch minorPsm = minorCategoryPsms[minorPsmIndex];
                        if (majorPsm.FdrInfo.QValue > minorPsm.FdrInfo.QValue)
                        {
                            minorPsm.FdrInfo.QValue = majorPsm.FdrInfo.QValue;
                        }
                        minorPsmIndex++;
                    }
                }
            }

            int numTotalSpectraWithPrecursors        = AllPsms[indexesOfInterest[0]].Count;
            List <PeptideSpectralMatch> bestPsmsList = new List <PeptideSpectralMatch>();

            for (int i = 0; i < numTotalSpectraWithPrecursors; i++)
            {
                PeptideSpectralMatch bestPsm = null;
                double lowestQ   = double.MaxValue;
                int    bestIndex = -1;
                foreach (int index in indexesOfInterest) //foreach category
                {
                    PeptideSpectralMatch currentPsm = AllPsms[index][i];
                    if (currentPsm != null)
                    {
                        double currentQValue = currentPsm.FdrInfo.QValue;
                        if (currentQValue < lowestQ || //if the new one is better
                            (currentQValue == lowestQ && currentPsm.Score > bestPsm.Score))
                        {
                            if (bestIndex != -1)
                            {
                                //remove the old one so we don't use it for fdr later
                                AllPsms[bestIndex][i] = null;
                            }
                            bestPsm   = currentPsm;
                            lowestQ   = currentQValue;
                            bestIndex = index;
                        }
                        else //remove the old one so we don't use it for fdr later
                        {
                            AllPsms[index][i] = null;
                        }
                    }
                }
                if (bestPsm != null)
                {
                    bestPsmsList.Add(bestPsm);
                }
            }

            //It's probable that psms from some categories were removed by psms from other categories.
            //however, the fdr is still affected by their presence, since it was calculated before their removal.
            foreach (List <PeptideSpectralMatch> psmsArray in AllPsms)
            {
                if (psmsArray != null)
                {
                    List <PeptideSpectralMatch> cleanedPsmsArray = psmsArray.Where(b => b != null).OrderByDescending(b => b.Score)
                                                                   .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                                                                   .ToList();

                    new FdrAnalysisEngine(cleanedPsmsArray, numNotches, commonParameters, new List <string> {
                        taskId
                    }).Run();
                }
            }

            return(bestPsmsList.OrderBy(b => b.FdrInfo.QValue).ThenByDescending(b => b.Score).ToList());
        }
Example #34
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // TODO: print error messages loading GPTMD mods
            List <Modification> gptmdModifications       = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList();
            IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList();

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            var numRawFiles = currentRawFileList.Count;

            // write prose settings
            ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; ");

            // temporary search type for writing prose
            // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this
            MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance);

            ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");

            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            // start the G-PTM-D task
            Status("Running G-PTM-D...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewDatabases = new List <DbForTask>()
            };
            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            MyFileManager myFileManager = new MyFileManager(true);

            object lock1 = new object();
            object lock2 = new object();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                // Stop if canceled
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                MassDiffAcceptor searchMode     = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance);

                NewCollection(Path.GetFileName(origDataFile), new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                myFileManager.DoneWithFile(origDataFile);
                PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParams, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }).Run();
                allPsms.AddRange(allPsmsArray.Where(p => p != null));
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }));
            }
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass))
                      .Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> {
                taskId
            }).Run();

            var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");

            WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>());
            FinishedWritingFile(writtenFile, new List <string> {
                taskId
            });

            // get file-specific precursor mass tolerances for the GPTMD engine
            var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>();

            for (int i = 0; i < currentRawFileList.Count; i++)
            {
                string    filePath      = currentRawFileList[i];
                Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance;
                if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null)
                {
                    fileTolerance = fileSettingsList[i].PrecursorMassTolerance;
                }
                filePathToPrecursorMassTolerance.Add(filePath, fileTolerance);
            }

            // run GPTMD engine
            var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> {
                taskId
            }).Run();

            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return(MyTaskResults);
            }

            // write GPTMD databases
            if (dbFilenameList.Any(b => !b.IsContaminant))
            {
                List <string> databaseNames = new List <string>();
                foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant))
                {
                    var  dbName       = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath);
                    var  theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant();
                    bool compressed   = theExtension.EndsWith("gz");
                    databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName);
                }
                string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName);

                FinishedWritingFile(outputXMLdbFullName, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false));
                MyTaskResults.AddTaskSummaryText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddTaskSummaryText("Mods types and counts:");
                MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            if (dbFilenameList.Any(b => b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");
                List <string> databaseNames = new List <string>();
                foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(contaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants);

                FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true));
                MyTaskResults.AddTaskSummaryText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddTaskSummaryText("Mods types and counts:");
                MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            return(MyTaskResults);
        }
Example #35
0
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            bool semiSpecificSearch = CommonParameters.DigestionParams.SearchModeType == CleavageSpecificity.Semi;

            double progress           = 0;
            int    oldPercentProgress = 0;

            ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing nonspecific search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds));

            byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff;

            int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile;

            int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray();
            Parallel.ForEach(threads, (i) =>
            {
                byte[] scoringTable = new byte[PeptideIndex.Count];
                HashSet <int> idsOfPeptidesPossiblyObserved = new HashSet <int>();

                for (; i < ListOfSortedMs2Scans.Length; i += maxThreadsPerFile)
                {
                    // Stop loop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        return;
                    }

                    // empty the scoring table to score the new scan (conserves memory compared to allocating a new array)
                    Array.Clear(scoringTable, 0, scoringTable.Length);
                    idsOfPeptidesPossiblyObserved.Clear();
                    Ms2ScanWithSpecificMass scan = ListOfSortedMs2Scans[i];

                    //get bins to add points to
                    List <int> allBinsToSearch = GetBinsToSearch(scan, FragmentIndex, CommonParameters.DissociationType);

                    //the entire indexed scoring is done here
                    for (int j = 0; j < allBinsToSearch.Count; j++)
                    {
                        FragmentIndex[allBinsToSearch[j]].ForEach(id => scoringTable[id]++);
                    }

                    //populate ids of possibly observed with those containing allowed precursor masses
                    List <AllowedIntervalWithNotch> validIntervals = MassDiffAcceptor.GetAllowedPrecursorMassIntervalsFromObservedMass(scan.PrecursorMass).ToList(); //get all valid notches
                    foreach (AllowedIntervalWithNotch interval in validIntervals)
                    {
                        int obsPrecursorFloorMz   = (int)Math.Floor(interval.AllowedInterval.Minimum * FragmentBinsPerDalton);
                        int obsPrecursorCeilingMz = (int)Math.Ceiling(interval.AllowedInterval.Maximum * FragmentBinsPerDalton);

                        foreach (ProductType pt in ProductTypesToSearch)
                        {
                            int dissociationBinShift = (int)Math.Round((WaterMonoisotopicMass - DissociationTypeCollection.GetMassShiftFromProductType(pt)) * FragmentBinsPerDalton);
                            int lowestBin            = obsPrecursorFloorMz - dissociationBinShift;
                            int highestBin           = obsPrecursorCeilingMz - dissociationBinShift;
                            for (int bin = lowestBin; bin <= highestBin; bin++)
                            {
                                if (bin < FragmentIndex.Length && FragmentIndex[bin] != null)
                                {
                                    FragmentIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id));
                                }
                            }
                        }

                        for (int bin = obsPrecursorFloorMz; bin <= obsPrecursorCeilingMz; bin++) //no bin shift, since they're precursor masses
                        {
                            if (bin < PrecursorIndex.Length && PrecursorIndex[bin] != null)
                            {
                                PrecursorIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id));
                            }
                        }
                    }

                    // done with initial scoring; refine scores and create PSMs
                    if (idsOfPeptidesPossiblyObserved.Any())
                    {
                        int maxInitialScore = idsOfPeptidesPossiblyObserved.Max(id => scoringTable[id]) + 1;
                        while (maxInitialScore > CommonParameters.ScoreCutoff) //go through all until we hit the end
                        {
                            maxInitialScore--;
                            foreach (int id in idsOfPeptidesPossiblyObserved.Where(id => scoringTable[id] == maxInitialScore))
                            {
                                PeptideWithSetModifications peptide = PeptideIndex[id];
                                List <Product> peptideTheorProducts = peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus).ToList();

                                Tuple <int, PeptideWithSetModifications> notchAndUpdatedPeptide = Accepts(peptideTheorProducts, scan.PrecursorMass, peptide, CommonParameters.DigestionParams.FragmentationTerminus, MassDiffAcceptor, semiSpecificSearch);
                                int notch = notchAndUpdatedPeptide.Item1;
                                if (notch >= 0)
                                {
                                    peptide = notchAndUpdatedPeptide.Item2;
                                    peptideTheorProducts = peptide.Fragment(CommonParameters.DissociationType, FragmentationTerminus.Both).ToList();
                                    List <MatchedFragmentIon> matchedIons = MatchFragmentIons(scan, peptideTheorProducts, ModifiedParametersNoComp);

                                    double thisScore = CalculatePeptideScore(scan.TheScan, matchedIons);
                                    if (thisScore > CommonParameters.ScoreCutoff)
                                    {
                                        PeptideSpectralMatch[] localPeptideSpectralMatches = GlobalCategorySpecificPsms[(int)FdrClassifier.GetCleavageSpecificityCategory(peptide.CleavageSpecificityForFdrCategory)];
                                        if (localPeptideSpectralMatches[i] == null)
                                        {
                                            localPeptideSpectralMatches[i] = new PeptideSpectralMatch(peptide, notch, thisScore, i, scan, CommonParameters.DigestionParams, matchedIons);
                                        }
                                        else
                                        {
                                            localPeptideSpectralMatches[i].AddOrReplace(peptide, thisScore, notch, CommonParameters.ReportAllAmbiguity, matchedIons, 0);
                                        }
                                    }
                                }
                            }
                        }
                    }
                    // report search progress
                    progress++;
                    int percentProgress = (int)((progress / ListOfSortedMs2Scans.Length) * 100);

                    if (percentProgress > oldPercentProgress)
                    {
                        oldPercentProgress = percentProgress;
                        ReportProgress(new ProgressEventArgs(percentProgress, "Performing nonspecific search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds));
                    }
                }
            });
            return(new MetaMorpheusEngineResults(this));
        }