예제 #1
0
        private static Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> GetInfo(bool localizeable)
        {
            CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: 2, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain, maxModsForPeptides: 1), scoreCutoff: 1);


            // Alanine = Glycine + CH2
            Protein protein1 = new Protein("MA", "protein1");
            Protein protein2 = new Protein("MG", "protein2");
            Protein protein3;
            double  monoisotopicMass = Chemistry.ChemicalFormula.ParseFormula("CH2").MonoisotopicMass;

            ModificationMotif.TryGetMotif("G", out ModificationMotif motif1);
            ModificationMotif.TryGetMotif("A", out ModificationMotif motif2);
            TerminusLocalization        modificationSites          = TerminusLocalization.Any;
            List <ModificationWithMass> allKnownFixedModifications = new List <ModificationWithMass>
            {
                new ModificationWithMass("CH2 on Glycine", null, motif1, modificationSites, monoisotopicMass)
            };
            List <ModificationWithMass> variableModifications;

            ModificationWithMass alanineMod = new ModificationWithMass("CH2 on Alanine", null, motif2, modificationSites, monoisotopicMass);

            if (localizeable)
            {
                variableModifications = new List <ModificationWithMass>();
                IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> >
                {
                    { 2, new List <Modification> {
                          alanineMod
                      } }
                };
                protein3 = new Protein("MA", "protein3", oneBasedModifications: oneBasedModifications);
            }
            else
            {
                variableModifications = new List <ModificationWithMass>();
                variableModifications = new List <ModificationWithMass> {
                    alanineMod
                };
                protein3 = new Protein("MA", "protein3");
            }

            var pepWithSetModifications1 = protein1.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First();

            var pepWithSetModifications2 = protein2.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First();

            var pepWithSetModifications3 = protein3.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).Last();

            CompactPeptide compactPeptide1         = new CompactPeptide(pepWithSetModifications1, TerminusType.None);
            CompactPeptide compactPeptideDuplicate = new CompactPeptide(pepWithSetModifications2, TerminusType.None);

            Assert.AreEqual(compactPeptide1, compactPeptideDuplicate);
            CompactPeptide compactPeptide2 = new CompactPeptide(pepWithSetModifications3, TerminusType.None);

            string                  fullFilePath    = null;
            int                     precursorCharge = 0;
            TestDataFile            testDataFile    = new TestDataFile();
            MsDataScan              mzLibScan       = testDataFile.GetOneBasedScan(2);
            Ms2ScanWithSpecificMass scan            = new Ms2ScanWithSpecificMass(mzLibScan, 0, precursorCharge, fullFilePath);
            int                     scanIndex       = 0;
            double                  score           = 0;
            int                     notch           = 0;
            PeptideSpectralMatch    psm1            = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            PeptideSpectralMatch psm2 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            PeptideSpectralMatch psm3 = new PeptideSpectralMatch(compactPeptide2, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3
            };

            MassDiffAcceptor massDiffAcceptors            = new SinglePpmAroundZeroSearchMode(5);
            SequencesToActualProteinPeptidesEngine stappe = new SequencesToActualProteinPeptidesEngine(newPsms, new List <Protein> {
                protein1, protein2, protein3
            },
                                                                                                       allKnownFixedModifications, variableModifications, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>());

            var haha = (SequencesToActualProteinPeptidesEngineResults)stappe.Run();
            var compactPeptideToProteinPeptideMatching = haha.CompactPeptideToProteinPeptideMatching;

            Assert.AreEqual(2, compactPeptideToProteinPeptideMatching.Count);

            psm1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);

            bool noOneHitWonders = false;

            return(new Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase>
                   (
                       newPsms, compactPeptideToProteinPeptideMatching, massDiffAcceptors, noOneHitWonders, compactPeptide1, compactPeptide2
                   ));
        }
예제 #2
0
        public static void TestAnalysisEngineTests()
        {
            CommonParameters CommonParameters = new CommonParameters
            {
                DigestionParams = new DigestionParams
                {
                    Protease = new Protease("Custom Protease", new List <string> {
                        "K"
                    }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null),
                    MinPeptideLength        = null,
                    MaxMissedCleavages      = 0,
                    MaxModificationIsoforms = 1042,
                },
                ConserveMemory       = false,
                ScoreCutoff          = 1,
                ProductMassTolerance = new PpmTolerance(10),
            };

            List <ModificationWithMass> localizeableModifications = new List <ModificationWithMass>();
            List <ModificationWithMass> variableModifications     = new List <ModificationWithMass>();
            List <ModificationWithMass> fixedModifications        = new List <ModificationWithMass>();

            Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>();

            foreach (var mod in fixedModifications)
            {
                modsDictionary.Add(mod, 0);
            }
            int i = 1;

            foreach (var mod in variableModifications)
            {
                modsDictionary.Add(mod, (ushort)i);
                i++;
            }
            foreach (var mod in localizeableModifications)
            {
                modsDictionary.Add(mod, (ushort)i);
                i++;
            }

            var proteinList = new List <Protein> {
                new Protein("MNNNKQQQ", "accession")
            };
            var modPep = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value1 = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            CompactPeptide compactPeptide1 = new CompactPeptide(value1.First(), TerminusType.None);

            Assert.AreEqual("QQQ", value1.First().BaseSequence);
            var modPep2 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).First();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                modPep2
            };
            CompactPeptide compactPeptide2 = new CompactPeptide(value2.First(), TerminusType.None);

            Assert.AreEqual("MNNNK", value2.First().BaseSequence);

            var modPep3 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[1];
            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                modPep3
            };
            CompactPeptide compactPeptide3 = new CompactPeptide(value3.First(), TerminusType.None);

            Assert.AreEqual("NNNK", value3.First().BaseSequence);

            //newPsms[0] = new List<PsmParent>[] { new List<PsmParent>{ new PsmModern(compactPeptide1, null, 1,  1, 2, 2, 1,1, 1, 1, 3,0) },
            //                                     new List<PsmParent>{  new PsmModern(compactPeptide2, null, 2,2+132.040,3,3,2,2,2,2,2,0) },
            //                                     new List<PsmParent>{ new PsmModern(compactPeptide3, null, 3, 3, 4, 3, 3, 3, 3, 3, 3, 0)} };

            Ms2ScanWithSpecificMass scanA = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(2, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null, null, "scan=1"), 1, 1, null);
            Ms2ScanWithSpecificMass scanB = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(3, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null, null, "scan=2"), 2 + 132.040, 1, null);
            Ms2ScanWithSpecificMass scanC = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(4, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null, null, "scan=3"), 3, 1, null);

            PeptideSpectralMatch matchA = new PeptideSpectralMatch(compactPeptide1, 0, 0, 0, scanA);
            PeptideSpectralMatch matchB = new PeptideSpectralMatch(compactPeptide2, 0, 0, 0, scanB);
            PeptideSpectralMatch matchC = new PeptideSpectralMatch(compactPeptide3, 0, 0, 0, scanC);

            var newPsms = new List <PeptideSpectralMatch> {
                matchA, matchB, matchC
            };

            IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> {
                value1.First(), value2.First(), value3.First()
            });

            var searchMode = new SinglePpmAroundZeroSearchMode(5);
            Action <List <PeptideSpectralMatch>, string, List <string> > action2 = (List <PeptideSpectralMatch> l, string s, List <string> sdf) => {; };

            bool      DoPrecursorDeconvolution           = true;
            bool      UseProvidedPrecursorInfo           = true;
            double    DeconvolutionIntensityRatio        = 4;
            int       DeconvolutionMaxAssumedChargeState = 10;
            Tolerance DeconvolutionMassTolerance         = new PpmTolerance(5);

            var arrayOfMs2ScansSortedByMass = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            Action <BinTreeStructure, string> action1 = (BinTreeStructure l, string s) =>
            {
                Assert.AreEqual(1, l.FinalBins.Count);
            };

            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(newPsms, proteinList, fixedModifications, variableModifications, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, new List <IDigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, new List <string>());

            var res = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();
            var compactPeptideToProteinPeptideMatching = res.CompactPeptideToProteinPeptideMatching;

            foreach (var huh in newPsms)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
                }
            }

            FdrAnalysisEngine engine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, false, new List <string> {
                "ff"
            });

            engine.Run();
        }
        public static void MultiProteaseIndistiguishableTest()
        {
            string[] sequences =
            {
                "ABCEFG",
                "EFGABC",
            };

            List <Tuple <string, TerminusType> > sequencesInducingCleavage = new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("C", TerminusType.C)
            };
            List <Tuple <string, TerminusType> > sequencesInducingCleavage2 = new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("G", TerminusType.C)
            };

            var protease = new Protease("testA", sequencesInducingCleavage, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            var protease2 = new Protease("testB", sequencesInducingCleavage2, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease2.Name, protease2);
            var peptideList = new HashSet <PeptideWithSetModifications>();

            var p = new List <Protein>();
            List <Tuple <string, string> > gn = new List <Tuple <string, string> >();

            for (int i = 0; i < sequences.Length; i++)
            {
                p.Add(new Protein(sequences[i], (i + 1).ToString(), null, gn, new Dictionary <int, List <Modification> >()));
            }

            DigestionParams digestionParams  = new DigestionParams(protease: protease.Name, minPeptideLength: 1);
            DigestionParams digestionParams2 = new DigestionParams(protease: protease2.Name, minPeptideLength: 1);

            foreach (var protein in p)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()))
                {
                    switch (peptide.BaseSequence)
                    {
                    case "ABC": peptideList.Add(peptide); break;

                    case "EFG": peptideList.Add(peptide); break;
                    }
                }
                foreach (var peptide in protein.Digest(digestionParams2, new List <ModificationWithMass>(), new List <ModificationWithMass>()))
                {
                    switch (peptide.BaseSequence)
                    {
                    case "ABC": peptideList.Add(peptide); break;

                    case "EFG": peptideList.Add(peptide); break;
                    }
                }
            }

            // creates the initial dictionary of "peptide" and "virtual peptide" matches
            var dictionary = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            CompactPeptide[] peptides = new CompactPeptide[peptideList.Count];

            PeptideWithSetModifications[] PWSM = new PeptideWithSetModifications[peptideList.Count];

            // creates peptide list
            for (int i = 0; i < peptideList.Count; i++)
            {
                peptides[i] = new CompactPeptide(peptideList.ElementAt(i), TerminusType.None);
                PWSM[i]     = peptideList.ElementAt(i);
            }

            dictionary.Add(peptides[0], new HashSet <PeptideWithSetModifications> {
                PWSM[0], PWSM[3]
            });
            dictionary.Add(peptides[1], new HashSet <PeptideWithSetModifications> {
                PWSM[1], PWSM[2]
            });

            // builds psm list to match to peptides
            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>();

            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File");

            foreach (var kvp in dictionary)
            {
                foreach (var peptide in kvp.Value)
                {
                    switch (peptide.BaseSequence)
                    {
                    case "ABC":
                        if (peptide.DigestionParams == digestionParams)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams));
                            break;
                        }
                        if (peptide.DigestionParams == digestionParams2)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams2));
                            break;
                        }
                        else
                        {
                            break;
                        }

                    case "EFG":
                        if (peptide.DigestionParams == digestionParams)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams));
                            break;
                        }
                        if (peptide.DigestionParams == digestionParams2)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams2));
                            break;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            List <ProductType> IonTypes  = new List <ProductType>();
            ProductType        BnoB1ions = ProductType.BnoB1ions;
            ProductType        Yions     = ProductType.Y;

            IonTypes.Add(BnoB1ions);
            IonTypes.Add(Yions);

            HashSet <DigestionParams> digestionParamsList = new HashSet <DigestionParams>();

            digestionParamsList.Add(digestionParams);
            digestionParamsList.Add(digestionParams2);
            ModificationMotif.TryGetMotif("M", out ModificationMotif motif1);
            ModificationWithMass        mod        = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957);
            List <ModificationWithMass> modVarList = new List <ModificationWithMass> {
                mod
            };

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif2);
            List <ModificationWithMass> modFixedList = new List <ModificationWithMass> {
                mod
            };
            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine =
                new SequencesToActualProteinPeptidesEngine(psms, p, modFixedList, modVarList, IonTypes, digestionParamsList, true, new CommonParameters(), null);
            var results = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();
            var CompactPeptidesToProteinPeptidesMatching = results.CompactPeptideToProteinPeptideMatching;

            Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.Count);

            Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.Count);
            Assert.AreEqual("ABC", CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.ElementAt(0).BaseSequence);
            Assert.AreEqual("ABC", CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.ElementAt(1).BaseSequence);

            Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.Count);
            Assert.AreEqual("EFG", CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.ElementAt(0).BaseSequence);
            Assert.AreEqual("EFG", CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.ElementAt(1).BaseSequence);

            ProteinParsimonyEngine ppe = new ProteinParsimonyEngine(CompactPeptidesToProteinPeptidesMatching, false, new CommonParameters(), null);
            var proteinAnalysisResults = (ProteinParsimonyResults)ppe.Run();

            List <ProteinGroup> proteinGroups = proteinAnalysisResults.ProteinGroups;

            Assert.AreEqual(2, proteinGroups.Count);

            Assert.AreEqual(2, proteinGroups.ElementAt(0).AllPeptides.Count);
            Assert.AreEqual(2, proteinGroups.ElementAt(0).UniquePeptides.Count);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(0).AllPeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("testA", proteinGroups.ElementAt(0).AllPeptides.ElementAt(0).DigestionParams.Protease.Name);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(0).AllPeptides.ElementAt(1).BaseSequence);
            Assert.AreEqual("testA", proteinGroups.ElementAt(0).AllPeptides.ElementAt(1).DigestionParams.Protease.Name);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(0).UniquePeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(0).UniquePeptides.ElementAt(1).BaseSequence);

            Assert.AreEqual(2, proteinGroups.ElementAt(1).AllPeptides.Count);
            Assert.AreEqual(2, proteinGroups.ElementAt(1).UniquePeptides.Count);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(1).AllPeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("testB", proteinGroups.ElementAt(1).AllPeptides.ElementAt(0).DigestionParams.Protease.Name);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(1).AllPeptides.ElementAt(1).BaseSequence);
            Assert.AreEqual("testB", proteinGroups.ElementAt(1).AllPeptides.ElementAt(1).DigestionParams.Protease.Name);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(1).UniquePeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(1).UniquePeptides.ElementAt(1).BaseSequence);
        }
        public static void TestModificationAnalysis()
        {
            IScan scan = new ThisTestScan();

            ModificationMotif.TryGetMotif("N", out ModificationMotif motif1);
            ModificationWithMass mod1 = new ModificationWithMass("mod1", null, motif1, TerminusLocalization.Any, 10);

            ModificationMotif.TryGetMotif("L", out ModificationMotif motif2);
            ModificationWithMass mod2 = new ModificationWithMass("mod2", null, motif2, TerminusLocalization.Any, 10);

            IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> >
            {
                { 2, new List <Modification> {
                      mod1
                  } },
                { 5, new List <Modification> {
                      mod2
                  } },
                { 7, new List <Modification> {
                      mod1
                  } },
            };
            Protein protein1 = new Protein("MNLDLDNDL", "prot1", oneBasedModifications: oneBasedModifications);

            Dictionary <int, ModificationWithMass> allModsOneIsNterminus1 = new Dictionary <int, ModificationWithMass>
            {
                { 2, mod1 },
            };
            PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus1);
            CompactPeptideBase          pep1  = new CompactPeptide(pwsm1, TerminusType.None);

            Dictionary <int, ModificationWithMass> allModsOneIsNterminus2 = new Dictionary <int, ModificationWithMass>
            {
                { 2, mod1 },
                { 7, mod1 },
            };
            PeptideWithSetModifications pwsm2 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus2);
            CompactPeptideBase          pep2  = new CompactPeptide(pwsm2, TerminusType.None);

            Dictionary <int, ModificationWithMass> allModsOneIsNterminus3 = new Dictionary <int, ModificationWithMass>
            {
                { 7, mod1 },
            };
            PeptideWithSetModifications pwsm3 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus3);
            CompactPeptideBase          pep3  = new CompactPeptide(pwsm3, TerminusType.None);

            Dictionary <int, ModificationWithMass> allModsOneIsNterminus4 = new Dictionary <int, ModificationWithMass>
            {
                { 8, mod1 },
            };
            PeptideWithSetModifications pwsm4 = new PeptideWithSetModifications(0, protein1, 1, 9, allModsOneIsNterminus4);
            CompactPeptideBase          pep4  = new CompactPeptide(pwsm4, TerminusType.None);

            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(
                    maxMissedCleavages: 0,
                    minPeptideLength: 1,
                    maxModificationIsoforms: int.MaxValue),
                scoreCutoff: 1);

            var newPsms = new List <PeptideSpectralMatch>
            {
                new PeptideSpectralMatch(pep1, 0, 10, 0, scan, CommonParameters.DigestionParams),
                new PeptideSpectralMatch(pep1, 0, 10, 0, scan, CommonParameters.DigestionParams),
                new PeptideSpectralMatch(pep2, 0, 10, 0, scan, CommonParameters.DigestionParams),
                new PeptideSpectralMatch(pep3, 0, 10, 0, scan, CommonParameters.DigestionParams),
                new PeptideSpectralMatch(pep4, 0, 10, 0, scan, CommonParameters.DigestionParams),
            };

            MassDiffAcceptor searchMode  = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList = new List <Protein> {
                protein1
            };

            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine
                                                                                                (newPsms, proteinList, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <ProductType>
            {
                ProductType.B, ProductType.Y
            }, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>());
            var nice = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();

            foreach (var psm in newPsms)
            {
                psm.MatchToProteinLinkedPeptides(nice.CompactPeptideToProteinPeptideMatching);
            }
            FdrAnalysisEngine fdrAnalysisEngine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, CommonParameters, new List <string>());

            fdrAnalysisEngine.Run();
            ModificationAnalysisEngine modificationAnalysisEngine = new ModificationAnalysisEngine(newPsms, new CommonParameters(), new List <string>());
            var res = (ModificationAnalysisResults)modificationAnalysisEngine.Run();

            Assert.AreEqual(2, res.AllModsOnProteins.Count());
            Assert.AreEqual(2, res.AllModsOnProteins[mod1.id]);
            Assert.AreEqual(1, res.AllModsOnProteins[mod2.id]);

            Assert.AreEqual(1, res.ModsSeenAndLocalized.Count());
            Assert.AreEqual(2, res.ModsSeenAndLocalized[mod1.id]);

            Assert.AreEqual(0, res.AmbiguousButLocalizedModsSeen.Count());

            Assert.AreEqual(0, res.UnlocalizedMods.Count());

            Assert.AreEqual(0, res.UnlocalizedFormulas.Count());
        }
예제 #5
0
        public static void TestDeltaValues()
        {
            CommonParameters CommonParameters = new CommonParameters(scoreCutoff: 1, useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5));

            SearchParameters SearchParameters = new SearchParameters
            {
                MassDiffAcceptorType = MassDiffAcceptorType.Exact,
            };
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();

            // Generate data for files
            Protein TargetProtein1    = new Protein("TIDEANTHE", "accession1");
            Protein TargetProtein2    = new Protein("TIDELVE", "accession2");
            Protein TargetProtein3    = new Protein("TIDENIE", "accession3");
            Protein TargetProteinLost = new Protein("PEPTIDEANTHE", "accession4");
            Protein DecoyProteinFound = new Protein("PETPLEDQGTHE", "accessiond", isDecoy: true);

            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                DecoyProteinFound.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0]
            });

            var proteinList = new List <Protein> {
                TargetProtein1, TargetProtein2, TargetProtein3, TargetProteinLost, DecoyProteinFound
            };

            var searchModes = new SinglePpmAroundZeroSearchMode(5);

            bool      DoPrecursorDeconvolution           = true;
            bool      UseProvidedPrecursorInfo           = true;
            double    DeconvolutionIntensityRatio        = 4;
            int       DeconvolutionMaxAssumedChargeState = 10;
            Tolerance DeconvolutionMassTolerance         = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            //check better when using delta
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, searchModes, CommonParameters, new List <string>()).Run();

            var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, new List <ProductType>
            {
                ProductType.B, ProductType.Y
            }, 1, DecoyType.None, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters, 30000, new List <string>());
            var indexResults = (IndexingResults)indexEngine.Run();
            MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            PeptideSpectralMatch[] allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run();

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching =
                new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            if (proteinList.Any())
            {
                SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(allPsmsArray.ToList(), proteinList, fixedModifications, variableModifications, new List <ProductType>
                {
                    ProductType.B, ProductType.Y
                }, new List <DigestionParams> {
                    CommonParameters.DigestionParams
                }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>());
                var res = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();
                compactPeptideToProteinPeptideMatching = res.CompactPeptideToProteinPeptideMatching;
            }

            foreach (var psm in allPsmsArray)
            {
                psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            }
            foreach (var psm in allPsmsArrayModern)
            {
                psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            }
            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            FdrAnalysisResults fdrResultsModernDelta  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run());

            Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3);
            Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3);

            CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5));

            //check worse when using score
            FdrAnalysisResults fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            FdrAnalysisResults fdrResultsModern  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());

            Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 0);
            Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 0);

            //check that when delta is bad, we used the score
            // Generate data for files
            Protein DecoyProtein1     = new Protein("TLEDAGGTHE", "accession1d", isDecoy: true);
            Protein DecoyProtein2     = new Protein("TLEDLVE", "accession2d", isDecoy: true);
            Protein DecoyProtein3     = new Protein("TLEDNIE", "accession3d", isDecoy: true);
            Protein DecoyProteinShiny = new Protein("GGGGGG", "accessionShinyd", isDecoy: true);

            myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
                DecoyProteinShiny.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0],
            });

            proteinList = new List <Protein>
            {
                TargetProtein1, DecoyProtein1,
                TargetProtein2, DecoyProtein2,
                TargetProtein3, DecoyProtein3,
                DecoyProteinShiny,
            };

            listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            //check no change when using delta
            allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, searchModes, CommonParameters, new List <string>()).Run();

            CommonParameters = new CommonParameters(useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5));

            indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, new List <ProductType>
            {
                ProductType.B, ProductType.Y
            }, 1, DecoyType.None, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters, 30000, new List <string>());
            indexResults       = (IndexingResults)indexEngine.Run();
            massDiffAcceptor   = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);
            allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run();

            var compactPeptideToProteinPeptideMatching2 = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            if (proteinList.Any())
            {
                SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine2 = new SequencesToActualProteinPeptidesEngine(allPsmsArray.ToList(), proteinList, fixedModifications, variableModifications, new List <ProductType> {
                    ProductType.B, ProductType.Y
                }, new List <DigestionParams> {
                    CommonParameters.DigestionParams
                }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>());
                var res = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine2.Run();
                compactPeptideToProteinPeptideMatching2 = res.CompactPeptideToProteinPeptideMatching;
            }

            foreach (var psm in allPsmsArray)
            {
                psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching2);
            }
            foreach (var psm in allPsmsArrayModern)
            {
                psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching2);
            }
            fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            fdrResultsModernDelta  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run());
            Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3);
            Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3);

            CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5));

            //check no change when using score
            fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run());
            fdrResultsModern  = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run());
            Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 3);
            Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 3);
        }
예제 #6
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // load modifications
            Status("Loading modifications...", taskId);
            List <ModificationWithMass> variableModifications        = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications           = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes              = GlobalVariables.AllModTypesKnown.ToList();
            List <ModificationWithMass>           gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList();
            IEnumerable <Tuple <double, double> > combos             = LoadCombos(gptmdModifications).ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            var numRawFiles = currentRawFileList.Count;

            // write prose settings
            ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; ");

            // temporary search type for writing prose
            // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this
            MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance);

            ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");

            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            // start the G-PTM-D task
            Status("Running G-PTM-D...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewDatabases = new List <DbForTask>()
            };
            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            MyFileManager myFileManager = new MyFileManager(true);

            object lock1 = new object();
            object lock2 = new object();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                // Stop if canceled
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                MassDiffAcceptor searchMode     = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance);

                NewCollection(Path.GetFileName(origDataFile), new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams);
                Status("Getting ms2 scans...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();
                myFileManager.DoneWithFile(origDataFile);
                PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, combinedParams, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }).Run();

                allPsms.AddRange(allPsmsArray.Where(p => p != null));

                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }));
            }
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            // Group and order psms
            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string> {
                taskId
            });

            var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest);
            }

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass))
                      .Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> {
                taskId
            }).Run();

            var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");

            WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>(), double.PositiveInfinity);
            FinishedWritingFile(writtenFile, new List <string> {
                taskId
            });

            // get file-specific precursor mass tolerances for the GPTMD engine
            var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>();

            for (int i = 0; i < currentRawFileList.Count; i++)
            {
                string    filePath      = currentRawFileList[i];
                Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance;
                if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null)
                {
                    fileTolerance = fileSettingsList[i].PrecursorMassTolerance;
                }
                filePathToPrecursorMassTolerance.Add(filePath, fileTolerance);
            }

            // run GPTMD engine
            var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> {
                taskId
            }).Run();

            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return(MyTaskResults);
            }

            // write GPTMD databases
            if (dbFilenameList.Any(b => !b.IsContaminant))
            {
                List <string> databaseNames = new List <string>();
                foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant))
                {
                    var  dbName       = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath);
                    var  theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant();
                    bool compressed   = theExtension.EndsWith("gz");
                    databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName);
                }
                string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName);

                FinishedWritingFile(outputXMLdbFullName, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false));
                MyTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddNiceText("Mods types and counts:");
                MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            if (dbFilenameList.Any(b => b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");
                List <string> databaseNames = new List <string>();
                foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(contaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants);

                FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true));
                MyTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddNiceText("Mods types and counts:");
                MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            return(MyTaskResults);
        }
예제 #7
0
        public static void TestModificationAnalysisWithNonLocalizedPtms()
        {
            IScan scan = new ThisTestScan();

            ModificationMotif.TryGetMotif("N", out ModificationMotif motif1);
            ModificationWithMass mod1 = new ModificationWithMass("mod1", "mt", motif1, TerminusLocalization.Any, 10, neutralLosses: new List <double> {
                10
            });

            IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> >
            {
                { 2, new List <Modification> {
                      mod1
                  } },
                { 7, new List <Modification> {
                      mod1
                  } },
            };
            Protein protein1 = new Protein("MNLDLDNDL", "prot1", oneBasedModifications: oneBasedModifications);

            Dictionary <int, ModificationWithMass> allModsOneIsNterminus1 = new Dictionary <int, ModificationWithMass>
            {
                { 2, mod1 },
            };
            PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus1);
            CompactPeptideBase          pep1  = new CompactPeptide(pwsm1, TerminusType.None);

            Dictionary <int, ModificationWithMass> allModsOneIsNterminus3 = new Dictionary <int, ModificationWithMass>
            {
                { 7, mod1 },
            };
            PeptideWithSetModifications pwsm3 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus3);
            CompactPeptideBase          pep3  = new CompactPeptide(pwsm3, TerminusType.None);

            var newPsms = new List <PeptideSpectralMatch>
            {
                new PeptideSpectralMatch(pep1, 0, 10, 0, scan),
                new PeptideSpectralMatch(pep3, 0, 10, 0, scan),
            };

            MassDiffAcceptor searchMode  = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList = new List <Protein> {
                protein1
            };

            CommonParameters CommonParameters = new CommonParameters
            {
                DigestionParams = new DigestionParams
                {
                    MinPeptideLength        = null,
                    MaxMissedCleavages      = 0,
                    MaxModificationIsoforms = int.MaxValue
                },
                ConserveMemory = false,
                ScoreCutoff    = 1,
            };
            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(newPsms, proteinList, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <ProductType> {
                ProductType.B, ProductType.Y
            }, new List <IDigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, new List <string>());

            var nice = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();

            foreach (var psm in newPsms)
            {
                psm.MatchToProteinLinkedPeptides(nice.CompactPeptideToProteinPeptideMatching);
            }

            Assert.AreEqual(2, nice.CompactPeptideToProteinPeptideMatching[pep1].Count);

            FdrAnalysisEngine fdrAnalysisEngine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, false, new List <string>());

            fdrAnalysisEngine.Run();
            ModificationAnalysisEngine modificationAnalysisEngine = new ModificationAnalysisEngine(newPsms, new List <string>());
            var res = (ModificationAnalysisResults)modificationAnalysisEngine.Run();

            Assert.AreEqual(1, res.AllModsOnProteins.Count());
            Assert.AreEqual(2, res.AllModsOnProteins[mod1.id]);

            Assert.AreEqual(0, res.ModsSeenAndLocalized.Count());

            Assert.AreEqual(0, res.AmbiguousButLocalizedModsSeen.Count);

            Assert.AreEqual(1, res.UnlocalizedMods[mod1.id]); // Saw it, but not sure where!

            Assert.AreEqual(0, res.UnlocalizedFormulas.Count());
        }
예제 #8
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList)
        {
            myTaskResults = new MyTaskResults(this)
            {
                newDatabases = new List <DbForTask>()
            };
            Status("Loading modifications...", new List <string> {
                taskId
            });

            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList();

            List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList();

            IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList();

            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.B);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }

            Status("Loading proteins...", new List <string> {
                taskId
            });
            Dictionary <string, Modification> um = null;
            //Decoys are currently not being searched with DecoyType.None
            var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out um)).ToList();

            var numRawFiles = currentRawFileList.Count;

            proseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            if (CommonParameters.DigestionParams.MaxPeptideLength == null)
            {
                proseCreatedWhileRunning.Append("maximum peptide length = unspecified; ");
            }
            else
            {
                proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            }
            proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; ");
            //puppet searchmode for writing files. Actual searchmode is filespecific
            MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance);

            proseCreatedWhileRunning.Append("parent mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");
            proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. ");
            proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            Status("Running G-PTM-D...", new List <string> {
                taskId
            });

            HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)));

            MyFileManager myFileManager = new MyFileManager(true);

            object          lock1           = new object();
            object          lock2           = new object();
            ParallelOptions parallelOptions = new ParallelOptions();

            if (CommonParameters.MaxParallelFilesToAnalyze.HasValue)
            {
                parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value;
            }
            Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex =>
            {
                var origDataFile = currentRawFileList[spectraFileIndex];
                ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                MassDiffAcceptor searchMode      = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance);

                NewCollection(Path.GetFileName(origDataFile), new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks);
                Status("Getting ms2 scans...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();
                myFileManager.DoneWithFile(origDataFile);
                PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, false, combinedParams, combinedParams.ProductMassTolerance, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }).Run();
                lock (lock2)
                {
                    allPsms.AddRange(allPsmsArray);
                }
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }));
            });
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            // Group and order psms

            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngineTest = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, new List <string> {
                taskId
            });

            var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngineTest.Run();
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest);
                }
            }

            allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, false, new List <string> {
                taskId
            }).Run();

            var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");

            WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>());
            SucessfullyFinishedWritingFile(writtenFile, new List <string> {
                taskId
            });

            var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, CommonParameters.PrecursorMassTolerance, new List <string> {
                taskId
            }).Run();

            if (dbFilenameList.Any(b => !b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => !b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");

                List <string> databaseNames = new List <string>();
                foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(nonContaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName);

                SucessfullyFinishedWritingFile(outputXMLdbFullName, new List <string> {
                    taskId
                });

                myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullName, false));
                myTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                myTaskResults.AddNiceText("Mods types and counts:");
                myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            if (dbFilenameList.Any(b => b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");
                List <string> databaseNames = new List <string>();
                foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(contaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants);

                SucessfullyFinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> {
                    taskId
                });

                myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true));
                myTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                myTaskResults.AddNiceText("Mods types and counts:");
                myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            return(myTaskResults);
        }