private static Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> GetInfo(bool localizeable) { CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: 2, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain, maxModsForPeptides: 1), scoreCutoff: 1); // Alanine = Glycine + CH2 Protein protein1 = new Protein("MA", "protein1"); Protein protein2 = new Protein("MG", "protein2"); Protein protein3; double monoisotopicMass = Chemistry.ChemicalFormula.ParseFormula("CH2").MonoisotopicMass; ModificationMotif.TryGetMotif("G", out ModificationMotif motif1); ModificationMotif.TryGetMotif("A", out ModificationMotif motif2); TerminusLocalization modificationSites = TerminusLocalization.Any; List <ModificationWithMass> allKnownFixedModifications = new List <ModificationWithMass> { new ModificationWithMass("CH2 on Glycine", null, motif1, modificationSites, monoisotopicMass) }; List <ModificationWithMass> variableModifications; ModificationWithMass alanineMod = new ModificationWithMass("CH2 on Alanine", null, motif2, modificationSites, monoisotopicMass); if (localizeable) { variableModifications = new List <ModificationWithMass>(); IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> > { { 2, new List <Modification> { alanineMod } } }; protein3 = new Protein("MA", "protein3", oneBasedModifications: oneBasedModifications); } else { variableModifications = new List <ModificationWithMass>(); variableModifications = new List <ModificationWithMass> { alanineMod }; protein3 = new Protein("MA", "protein3"); } var pepWithSetModifications1 = protein1.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First(); var pepWithSetModifications2 = protein2.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First(); var pepWithSetModifications3 = protein3.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).Last(); CompactPeptide compactPeptide1 = new CompactPeptide(pepWithSetModifications1, TerminusType.None); CompactPeptide compactPeptideDuplicate = new CompactPeptide(pepWithSetModifications2, TerminusType.None); Assert.AreEqual(compactPeptide1, compactPeptideDuplicate); CompactPeptide compactPeptide2 = new CompactPeptide(pepWithSetModifications3, TerminusType.None); string fullFilePath = null; int precursorCharge = 0; TestDataFile testDataFile = new TestDataFile(); MsDataScan mzLibScan = testDataFile.GetOneBasedScan(2); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(mzLibScan, 0, precursorCharge, fullFilePath); int scanIndex = 0; double score = 0; int notch = 0; PeptideSpectralMatch psm1 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams); psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); PeptideSpectralMatch psm2 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams); psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); PeptideSpectralMatch psm3 = new PeptideSpectralMatch(compactPeptide2, notch, score, scanIndex, scan, CommonParameters.DigestionParams); psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); var newPsms = new List <PeptideSpectralMatch> { psm1, psm2, psm3 }; MassDiffAcceptor massDiffAcceptors = new SinglePpmAroundZeroSearchMode(5); SequencesToActualProteinPeptidesEngine stappe = new SequencesToActualProteinPeptidesEngine(newPsms, new List <Protein> { protein1, protein2, protein3 }, allKnownFixedModifications, variableModifications, new List <ProductType> { ProductType.B, ProductType.Y }, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>()); var haha = (SequencesToActualProteinPeptidesEngineResults)stappe.Run(); var compactPeptideToProteinPeptideMatching = haha.CompactPeptideToProteinPeptideMatching; Assert.AreEqual(2, compactPeptideToProteinPeptideMatching.Count); psm1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); bool noOneHitWonders = false; return(new Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> ( newPsms, compactPeptideToProteinPeptideMatching, massDiffAcceptors, noOneHitWonders, compactPeptide1, compactPeptide2 )); }
public static void TestAnalysisEngineTests() { CommonParameters CommonParameters = new CommonParameters { DigestionParams = new DigestionParams { Protease = new Protease("Custom Protease", new List <string> { "K" }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null), MinPeptideLength = null, MaxMissedCleavages = 0, MaxModificationIsoforms = 1042, }, ConserveMemory = false, ScoreCutoff = 1, ProductMassTolerance = new PpmTolerance(10), }; List <ModificationWithMass> localizeableModifications = new List <ModificationWithMass>(); List <ModificationWithMass> variableModifications = new List <ModificationWithMass>(); List <ModificationWithMass> fixedModifications = new List <ModificationWithMass>(); Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>(); foreach (var mod in fixedModifications) { modsDictionary.Add(mod, 0); } int i = 1; foreach (var mod in variableModifications) { modsDictionary.Add(mod, (ushort)i); i++; } foreach (var mod in localizeableModifications) { modsDictionary.Add(mod, (ushort)i); i++; } var proteinList = new List <Protein> { new Protein("MNNNKQQQ", "accession") }; var modPep = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).Last(); HashSet <PeptideWithSetModifications> value1 = new HashSet <PeptideWithSetModifications> { modPep }; CompactPeptide compactPeptide1 = new CompactPeptide(value1.First(), TerminusType.None); Assert.AreEqual("QQQ", value1.First().BaseSequence); var modPep2 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).First(); HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> { modPep2 }; CompactPeptide compactPeptide2 = new CompactPeptide(value2.First(), TerminusType.None); Assert.AreEqual("MNNNK", value2.First().BaseSequence); var modPep3 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[1]; HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> { modPep3 }; CompactPeptide compactPeptide3 = new CompactPeptide(value3.First(), TerminusType.None); Assert.AreEqual("NNNK", value3.First().BaseSequence); //newPsms[0] = new List<PsmParent>[] { new List<PsmParent>{ new PsmModern(compactPeptide1, null, 1, 1, 2, 2, 1,1, 1, 1, 3,0) }, // new List<PsmParent>{ new PsmModern(compactPeptide2, null, 2,2+132.040,3,3,2,2,2,2,2,0) }, // new List<PsmParent>{ new PsmModern(compactPeptide3, null, 3, 3, 4, 3, 3, 3, 3, 3, 3, 0)} }; Ms2ScanWithSpecificMass scanA = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(2, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null, null, "scan=1"), 1, 1, null); Ms2ScanWithSpecificMass scanB = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(3, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null, null, "scan=2"), 2 + 132.040, 1, null); Ms2ScanWithSpecificMass scanC = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(4, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null, null, "scan=3"), 3, 1, null); PeptideSpectralMatch matchA = new PeptideSpectralMatch(compactPeptide1, 0, 0, 0, scanA); PeptideSpectralMatch matchB = new PeptideSpectralMatch(compactPeptide2, 0, 0, 0, scanB); PeptideSpectralMatch matchC = new PeptideSpectralMatch(compactPeptide3, 0, 0, 0, scanC); var newPsms = new List <PeptideSpectralMatch> { matchA, matchB, matchC }; IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { value1.First(), value2.First(), value3.First() }); var searchMode = new SinglePpmAroundZeroSearchMode(5); Action <List <PeptideSpectralMatch>, string, List <string> > action2 = (List <PeptideSpectralMatch> l, string s, List <string> sdf) => {; }; bool DoPrecursorDeconvolution = true; bool UseProvidedPrecursorInfo = true; double DeconvolutionIntensityRatio = 4; int DeconvolutionMaxAssumedChargeState = 10; Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var arrayOfMs2ScansSortedByMass = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); Action <BinTreeStructure, string> action1 = (BinTreeStructure l, string s) => { Assert.AreEqual(1, l.FinalBins.Count); }; SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(newPsms, proteinList, fixedModifications, variableModifications, new List <ProductType> { ProductType.B, ProductType.Y }, new List <IDigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, new List <string>()); var res = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run(); var compactPeptideToProteinPeptideMatching = res.CompactPeptideToProteinPeptideMatching; foreach (var huh in newPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } } FdrAnalysisEngine engine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, false, new List <string> { "ff" }); engine.Run(); }
public static void MultiProteaseIndistiguishableTest() { string[] sequences = { "ABCEFG", "EFGABC", }; List <Tuple <string, TerminusType> > sequencesInducingCleavage = new List <Tuple <string, TerminusType> > { new Tuple <string, TerminusType>("C", TerminusType.C) }; List <Tuple <string, TerminusType> > sequencesInducingCleavage2 = new List <Tuple <string, TerminusType> > { new Tuple <string, TerminusType>("G", TerminusType.C) }; var protease = new Protease("testA", sequencesInducingCleavage, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null); ProteaseDictionary.Dictionary.Add(protease.Name, protease); var protease2 = new Protease("testB", sequencesInducingCleavage2, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null); ProteaseDictionary.Dictionary.Add(protease2.Name, protease2); var peptideList = new HashSet <PeptideWithSetModifications>(); var p = new List <Protein>(); List <Tuple <string, string> > gn = new List <Tuple <string, string> >(); for (int i = 0; i < sequences.Length; i++) { p.Add(new Protein(sequences[i], (i + 1).ToString(), null, gn, new Dictionary <int, List <Modification> >())); } DigestionParams digestionParams = new DigestionParams(protease: protease.Name, minPeptideLength: 1); DigestionParams digestionParams2 = new DigestionParams(protease: protease2.Name, minPeptideLength: 1); foreach (var protein in p) { foreach (var peptide in protein.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>())) { switch (peptide.BaseSequence) { case "ABC": peptideList.Add(peptide); break; case "EFG": peptideList.Add(peptide); break; } } foreach (var peptide in protein.Digest(digestionParams2, new List <ModificationWithMass>(), new List <ModificationWithMass>())) { switch (peptide.BaseSequence) { case "ABC": peptideList.Add(peptide); break; case "EFG": peptideList.Add(peptide); break; } } } // creates the initial dictionary of "peptide" and "virtual peptide" matches var dictionary = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); CompactPeptide[] peptides = new CompactPeptide[peptideList.Count]; PeptideWithSetModifications[] PWSM = new PeptideWithSetModifications[peptideList.Count]; // creates peptide list for (int i = 0; i < peptideList.Count; i++) { peptides[i] = new CompactPeptide(peptideList.ElementAt(i), TerminusType.None); PWSM[i] = peptideList.ElementAt(i); } dictionary.Add(peptides[0], new HashSet <PeptideWithSetModifications> { PWSM[0], PWSM[3] }); dictionary.Add(peptides[1], new HashSet <PeptideWithSetModifications> { PWSM[1], PWSM[2] }); // builds psm list to match to peptides List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>(); MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File"); foreach (var kvp in dictionary) { foreach (var peptide in kvp.Value) { switch (peptide.BaseSequence) { case "ABC": if (peptide.DigestionParams == digestionParams) { psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams)); break; } if (peptide.DigestionParams == digestionParams2) { psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams2)); break; } else { break; } case "EFG": if (peptide.DigestionParams == digestionParams) { psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams)); break; } if (peptide.DigestionParams == digestionParams2) { psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams2)); break; } else { break; } } } } List <ProductType> IonTypes = new List <ProductType>(); ProductType BnoB1ions = ProductType.BnoB1ions; ProductType Yions = ProductType.Y; IonTypes.Add(BnoB1ions); IonTypes.Add(Yions); HashSet <DigestionParams> digestionParamsList = new HashSet <DigestionParams>(); digestionParamsList.Add(digestionParams); digestionParamsList.Add(digestionParams2); ModificationMotif.TryGetMotif("M", out ModificationMotif motif1); ModificationWithMass mod = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957); List <ModificationWithMass> modVarList = new List <ModificationWithMass> { mod }; ModificationMotif.TryGetMotif("M", out ModificationMotif motif2); List <ModificationWithMass> modFixedList = new List <ModificationWithMass> { mod }; SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(psms, p, modFixedList, modVarList, IonTypes, digestionParamsList, true, new CommonParameters(), null); var results = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run(); var CompactPeptidesToProteinPeptidesMatching = results.CompactPeptideToProteinPeptideMatching; Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.Count); Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.Count); Assert.AreEqual("ABC", CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.ElementAt(0).BaseSequence); Assert.AreEqual("ABC", CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.ElementAt(1).BaseSequence); Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.Count); Assert.AreEqual("EFG", CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.ElementAt(0).BaseSequence); Assert.AreEqual("EFG", CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.ElementAt(1).BaseSequence); ProteinParsimonyEngine ppe = new ProteinParsimonyEngine(CompactPeptidesToProteinPeptidesMatching, false, new CommonParameters(), null); var proteinAnalysisResults = (ProteinParsimonyResults)ppe.Run(); List <ProteinGroup> proteinGroups = proteinAnalysisResults.ProteinGroups; Assert.AreEqual(2, proteinGroups.Count); Assert.AreEqual(2, proteinGroups.ElementAt(0).AllPeptides.Count); Assert.AreEqual(2, proteinGroups.ElementAt(0).UniquePeptides.Count); Assert.AreEqual("ABC", proteinGroups.ElementAt(0).AllPeptides.ElementAt(0).BaseSequence); Assert.AreEqual("testA", proteinGroups.ElementAt(0).AllPeptides.ElementAt(0).DigestionParams.Protease.Name); Assert.AreEqual("EFG", proteinGroups.ElementAt(0).AllPeptides.ElementAt(1).BaseSequence); Assert.AreEqual("testA", proteinGroups.ElementAt(0).AllPeptides.ElementAt(1).DigestionParams.Protease.Name); Assert.AreEqual("ABC", proteinGroups.ElementAt(0).UniquePeptides.ElementAt(0).BaseSequence); Assert.AreEqual("EFG", proteinGroups.ElementAt(0).UniquePeptides.ElementAt(1).BaseSequence); Assert.AreEqual(2, proteinGroups.ElementAt(1).AllPeptides.Count); Assert.AreEqual(2, proteinGroups.ElementAt(1).UniquePeptides.Count); Assert.AreEqual("ABC", proteinGroups.ElementAt(1).AllPeptides.ElementAt(0).BaseSequence); Assert.AreEqual("testB", proteinGroups.ElementAt(1).AllPeptides.ElementAt(0).DigestionParams.Protease.Name); Assert.AreEqual("EFG", proteinGroups.ElementAt(1).AllPeptides.ElementAt(1).BaseSequence); Assert.AreEqual("testB", proteinGroups.ElementAt(1).AllPeptides.ElementAt(1).DigestionParams.Protease.Name); Assert.AreEqual("ABC", proteinGroups.ElementAt(1).UniquePeptides.ElementAt(0).BaseSequence); Assert.AreEqual("EFG", proteinGroups.ElementAt(1).UniquePeptides.ElementAt(1).BaseSequence); }
public static void TestModificationAnalysis() { IScan scan = new ThisTestScan(); ModificationMotif.TryGetMotif("N", out ModificationMotif motif1); ModificationWithMass mod1 = new ModificationWithMass("mod1", null, motif1, TerminusLocalization.Any, 10); ModificationMotif.TryGetMotif("L", out ModificationMotif motif2); ModificationWithMass mod2 = new ModificationWithMass("mod2", null, motif2, TerminusLocalization.Any, 10); IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> > { { 2, new List <Modification> { mod1 } }, { 5, new List <Modification> { mod2 } }, { 7, new List <Modification> { mod1 } }, }; Protein protein1 = new Protein("MNLDLDNDL", "prot1", oneBasedModifications: oneBasedModifications); Dictionary <int, ModificationWithMass> allModsOneIsNterminus1 = new Dictionary <int, ModificationWithMass> { { 2, mod1 }, }; PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus1); CompactPeptideBase pep1 = new CompactPeptide(pwsm1, TerminusType.None); Dictionary <int, ModificationWithMass> allModsOneIsNterminus2 = new Dictionary <int, ModificationWithMass> { { 2, mod1 }, { 7, mod1 }, }; PeptideWithSetModifications pwsm2 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus2); CompactPeptideBase pep2 = new CompactPeptide(pwsm2, TerminusType.None); Dictionary <int, ModificationWithMass> allModsOneIsNterminus3 = new Dictionary <int, ModificationWithMass> { { 7, mod1 }, }; PeptideWithSetModifications pwsm3 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus3); CompactPeptideBase pep3 = new CompactPeptide(pwsm3, TerminusType.None); Dictionary <int, ModificationWithMass> allModsOneIsNterminus4 = new Dictionary <int, ModificationWithMass> { { 8, mod1 }, }; PeptideWithSetModifications pwsm4 = new PeptideWithSetModifications(0, protein1, 1, 9, allModsOneIsNterminus4); CompactPeptideBase pep4 = new CompactPeptide(pwsm4, TerminusType.None); CommonParameters CommonParameters = new CommonParameters( digestionParams: new DigestionParams( maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: int.MaxValue), scoreCutoff: 1); var newPsms = new List <PeptideSpectralMatch> { new PeptideSpectralMatch(pep1, 0, 10, 0, scan, CommonParameters.DigestionParams), new PeptideSpectralMatch(pep1, 0, 10, 0, scan, CommonParameters.DigestionParams), new PeptideSpectralMatch(pep2, 0, 10, 0, scan, CommonParameters.DigestionParams), new PeptideSpectralMatch(pep3, 0, 10, 0, scan, CommonParameters.DigestionParams), new PeptideSpectralMatch(pep4, 0, 10, 0, scan, CommonParameters.DigestionParams), }; MassDiffAcceptor searchMode = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = new List <Protein> { protein1 }; SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine (newPsms, proteinList, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <ProductType> { ProductType.B, ProductType.Y }, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>()); var nice = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run(); foreach (var psm in newPsms) { psm.MatchToProteinLinkedPeptides(nice.CompactPeptideToProteinPeptideMatching); } FdrAnalysisEngine fdrAnalysisEngine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, CommonParameters, new List <string>()); fdrAnalysisEngine.Run(); ModificationAnalysisEngine modificationAnalysisEngine = new ModificationAnalysisEngine(newPsms, new CommonParameters(), new List <string>()); var res = (ModificationAnalysisResults)modificationAnalysisEngine.Run(); Assert.AreEqual(2, res.AllModsOnProteins.Count()); Assert.AreEqual(2, res.AllModsOnProteins[mod1.id]); Assert.AreEqual(1, res.AllModsOnProteins[mod2.id]); Assert.AreEqual(1, res.ModsSeenAndLocalized.Count()); Assert.AreEqual(2, res.ModsSeenAndLocalized[mod1.id]); Assert.AreEqual(0, res.AmbiguousButLocalizedModsSeen.Count()); Assert.AreEqual(0, res.UnlocalizedMods.Count()); Assert.AreEqual(0, res.UnlocalizedFormulas.Count()); }
public static void TestDeltaValues() { CommonParameters CommonParameters = new CommonParameters(scoreCutoff: 1, useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5)); SearchParameters SearchParameters = new SearchParameters { MassDiffAcceptorType = MassDiffAcceptorType.Exact, }; List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); // Generate data for files Protein TargetProtein1 = new Protein("TIDEANTHE", "accession1"); Protein TargetProtein2 = new Protein("TIDELVE", "accession2"); Protein TargetProtein3 = new Protein("TIDENIE", "accession3"); Protein TargetProteinLost = new Protein("PEPTIDEANTHE", "accession4"); Protein DecoyProteinFound = new Protein("PETPLEDQGTHE", "accessiond", isDecoy: true); MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], DecoyProteinFound.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0] }); var proteinList = new List <Protein> { TargetProtein1, TargetProtein2, TargetProtein3, TargetProteinLost, DecoyProteinFound }; var searchModes = new SinglePpmAroundZeroSearchMode(5); bool DoPrecursorDeconvolution = true; bool UseProvidedPrecursorInfo = true; double DeconvolutionIntensityRatio = 4; int DeconvolutionMaxAssumedChargeState = 10; Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //check better when using delta PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, new List <ProductType> { ProductType.B, ProductType.Y }, searchModes, CommonParameters, new List <string>()).Run(); var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, new List <ProductType> { ProductType.B, ProductType.Y }, 1, DecoyType.None, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters, 30000, new List <string>()); var indexResults = (IndexingResults)indexEngine.Run(); MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); PeptideSpectralMatch[] allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, new List <ProductType> { ProductType.B, ProductType.Y }, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); if (proteinList.Any()) { SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(allPsmsArray.ToList(), proteinList, fixedModifications, variableModifications, new List <ProductType> { ProductType.B, ProductType.Y }, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>()); var res = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run(); compactPeptideToProteinPeptideMatching = res.CompactPeptideToProteinPeptideMatching; } foreach (var psm in allPsmsArray) { psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } foreach (var psm in allPsmsArrayModern) { psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); FdrAnalysisResults fdrResultsModernDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3); Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3); CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)); //check worse when using score FdrAnalysisResults fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); FdrAnalysisResults fdrResultsModern = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 0); Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 0); //check that when delta is bad, we used the score // Generate data for files Protein DecoyProtein1 = new Protein("TLEDAGGTHE", "accession1d", isDecoy: true); Protein DecoyProtein2 = new Protein("TLEDLVE", "accession2d", isDecoy: true); Protein DecoyProtein3 = new Protein("TLEDNIE", "accession3d", isDecoy: true); Protein DecoyProteinShiny = new Protein("GGGGGG", "accessionShinyd", isDecoy: true); myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], DecoyProteinShiny.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], }); proteinList = new List <Protein> { TargetProtein1, DecoyProtein1, TargetProtein2, DecoyProtein2, TargetProtein3, DecoyProtein3, DecoyProteinShiny, }; listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //check no change when using delta allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, new List <ProductType> { ProductType.B, ProductType.Y }, searchModes, CommonParameters, new List <string>()).Run(); CommonParameters = new CommonParameters(useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5)); indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, new List <ProductType> { ProductType.B, ProductType.Y }, 1, DecoyType.None, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters, 30000, new List <string>()); indexResults = (IndexingResults)indexEngine.Run(); massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, new List <ProductType> { ProductType.B, ProductType.Y }, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run(); var compactPeptideToProteinPeptideMatching2 = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); if (proteinList.Any()) { SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine2 = new SequencesToActualProteinPeptidesEngine(allPsmsArray.ToList(), proteinList, fixedModifications, variableModifications, new List <ProductType> { ProductType.B, ProductType.Y }, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>()); var res = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine2.Run(); compactPeptideToProteinPeptideMatching2 = res.CompactPeptideToProteinPeptideMatching; } foreach (var psm in allPsmsArray) { psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching2); } foreach (var psm in allPsmsArrayModern) { psm.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching2); } fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); fdrResultsModernDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3); Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3); CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)); //check no change when using score fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); fdrResultsModern = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 3); Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 3); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { // load modifications Status("Loading modifications...", taskId); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); // what types of fragment ions to search for List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); var numRawFiles = currentRawFileList.Count; // write prose settings ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); // temporary search type for writing prose // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); // start the G-PTM-D task Status("Running G-PTM-D...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewDatabases = new List <DbForTask>() }; var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, combinedParams, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); allPsms.AddRange(allPsmsArray.Where(p => p != null)); FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); } ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); // Group and order psms SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string> { taskId }); var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest); } allPsms = allPsms.OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)) .Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>(), double.PositiveInfinity); FinishedWritingFile(writtenFile, new List <string> { taskId }); // get file-specific precursor mass tolerances for the GPTMD engine var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>(); for (int i = 0; i < currentRawFileList.Count; i++) { string filePath = currentRawFileList[i]; Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance; if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null) { fileTolerance = fileSettingsList[i].PrecursorMassTolerance; } filePathToPrecursorMassTolerance.Add(filePath, fileTolerance); } // run GPTMD engine var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> { taskId }).Run(); // Stop if canceled if (GlobalVariables.StopLoops) { return(MyTaskResults); } // write GPTMD databases if (dbFilenameList.Any(b => !b.IsContaminant)) { List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath); var theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant(); bool compressed = theExtension.EndsWith("gz"); databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); FinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false)); MyTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddNiceText("Mods types and counts:"); MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); MyTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddNiceText("Mods types and counts:"); MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(MyTaskResults); }
public static void TestModificationAnalysisWithNonLocalizedPtms() { IScan scan = new ThisTestScan(); ModificationMotif.TryGetMotif("N", out ModificationMotif motif1); ModificationWithMass mod1 = new ModificationWithMass("mod1", "mt", motif1, TerminusLocalization.Any, 10, neutralLosses: new List <double> { 10 }); IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> > { { 2, new List <Modification> { mod1 } }, { 7, new List <Modification> { mod1 } }, }; Protein protein1 = new Protein("MNLDLDNDL", "prot1", oneBasedModifications: oneBasedModifications); Dictionary <int, ModificationWithMass> allModsOneIsNterminus1 = new Dictionary <int, ModificationWithMass> { { 2, mod1 }, }; PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus1); CompactPeptideBase pep1 = new CompactPeptide(pwsm1, TerminusType.None); Dictionary <int, ModificationWithMass> allModsOneIsNterminus3 = new Dictionary <int, ModificationWithMass> { { 7, mod1 }, }; PeptideWithSetModifications pwsm3 = new PeptideWithSetModifications(0, protein1, 2, 9, allModsOneIsNterminus3); CompactPeptideBase pep3 = new CompactPeptide(pwsm3, TerminusType.None); var newPsms = new List <PeptideSpectralMatch> { new PeptideSpectralMatch(pep1, 0, 10, 0, scan), new PeptideSpectralMatch(pep3, 0, 10, 0, scan), }; MassDiffAcceptor searchMode = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = new List <Protein> { protein1 }; CommonParameters CommonParameters = new CommonParameters { DigestionParams = new DigestionParams { MinPeptideLength = null, MaxMissedCleavages = 0, MaxModificationIsoforms = int.MaxValue }, ConserveMemory = false, ScoreCutoff = 1, }; SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine = new SequencesToActualProteinPeptidesEngine(newPsms, proteinList, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <ProductType> { ProductType.B, ProductType.Y }, new List <IDigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, new List <string>()); var nice = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run(); foreach (var psm in newPsms) { psm.MatchToProteinLinkedPeptides(nice.CompactPeptideToProteinPeptideMatching); } Assert.AreEqual(2, nice.CompactPeptideToProteinPeptideMatching[pep1].Count); FdrAnalysisEngine fdrAnalysisEngine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, false, new List <string>()); fdrAnalysisEngine.Run(); ModificationAnalysisEngine modificationAnalysisEngine = new ModificationAnalysisEngine(newPsms, new List <string>()); var res = (ModificationAnalysisResults)modificationAnalysisEngine.Run(); Assert.AreEqual(1, res.AllModsOnProteins.Count()); Assert.AreEqual(2, res.AllModsOnProteins[mod1.id]); Assert.AreEqual(0, res.ModsSeenAndLocalized.Count()); Assert.AreEqual(0, res.AmbiguousButLocalizedModsSeen.Count); Assert.AreEqual(1, res.UnlocalizedMods[mod1.id]); // Saw it, but not sure where! Assert.AreEqual(0, res.UnlocalizedFormulas.Count()); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this) { newDatabases = new List <DbForTask>() }; Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.B); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } Status("Loading proteins...", new List <string> { taskId }); Dictionary <string, Modification> um = null; //Decoys are currently not being searched with DecoyType.None var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out um)).ToList(); var numRawFiles = currentRawFileList.Count; proseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); //puppet searchmode for writing files. Actual searchmode is filespecific MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); proseCreatedWhileRunning.Append("parent mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); Status("Running G-PTM-D...", new List <string> { taskId }); HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b))); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, false, combinedParams, combinedParams.ProductMassTolerance, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); lock (lock2) { allPsms.AddRange(allPsmsArray); } FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); // Group and order psms SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngineTest = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, new List <string> { taskId }); var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngineTest.Run(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, false, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); SucessfullyFinishedWritingFile(writtenFile, new List <string> { taskId }); var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, CommonParameters.PrecursorMassTolerance, new List <string> { taskId }).Run(); if (dbFilenameList.Any(b => !b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => !b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileName(nonContaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); SucessfullyFinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullName, false)); myTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); SucessfullyFinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); myTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(myTaskResults); }