public static void TestGptmdEngine(string proteinSequence, string accession, string sequenceVariantDescription, int numModifiedResidues) { List <PeptideSpectralMatch> allResultingIdentifications = null; ModificationMotif.TryGetMotif("N", out ModificationMotif motifN); var gptmdModifications = new List <Modification> { new Modification(_originalId: "21", _modificationType: "mt", _target: motifN, _locationRestriction: "Anywhere.", _monoisotopicMass: 21.981943) }; IEnumerable <Tuple <double, double> > combos = new List <Tuple <double, double> >(); Tolerance precursorMassTolerance = new PpmTolerance(10); allResultingIdentifications = new List <PeptideSpectralMatch>(); var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>(); fsp.Add(("", new CommonParameters())); var engine = new GptmdEngine(allResultingIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> { { "filepath", precursorMassTolerance } }, new CommonParameters(), fsp, new List <string>()); var res = (GptmdResults)engine.Run(); Assert.AreEqual(0, res.Mods.Count); var parentProtein = new Protein(proteinSequence, accession, sequenceVariations: new List <SequenceVariation> { new SequenceVariation(1, "N", "A", sequenceVariantDescription) }); var variantProteins = parentProtein.GetVariantProteins(); CommonParameters commonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)); List <Modification> variableModifications = new List <Modification>(); var modPep = variantProteins.SelectMany(p => p.Digest(commonParameters.DigestionParams, new List <Modification>(), variableModifications)).First(); //PsmParent newPsm = new TestParentSpectrumMatch(588.22520189093 + 21.981943); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null), (new Proteomics.AminoAcidPolymer.Peptide(modPep.BaseSequence).MonoisotopicMass + 21.981943).ToMz(1), 1, "filepath", new CommonParameters()); var peptidesWithSetModifications = new List <PeptideWithSetModifications> { modPep }; PeptideSpectralMatch newPsm = new PeptideSpectralMatch(peptidesWithSetModifications.First(), 0, 0, 0, scan, commonParameters, new List <MatchedFragmentIon>()); Tolerance fragmentTolerance = new AbsoluteTolerance(0.01); newPsm.SetFdrValues(1, 0, 0, 1, 0, 0, 0, 0); allResultingIdentifications.Add(newPsm); engine = new GptmdEngine(allResultingIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> { { "filepath", precursorMassTolerance } }, new CommonParameters(), null, new List <string>()); res = (GptmdResults)engine.Run(); Assert.AreEqual(1, res.Mods.Count); Assert.AreEqual(numModifiedResidues, res.Mods["accession"].Count); }
public static void TestCombos(string proteinSequence, string accession, string variantAA, string sequenceVariantDescription, int numModHashes, int numModifiedResidues, int numModifiedResiduesN, int numModifiedResiduesP, int numModifiedResiduesNP) { List <PeptideSpectralMatch> allIdentifications = null; ModificationMotif.TryGetMotif("N", out ModificationMotif motifN); ModificationMotif.TryGetMotif("P", out ModificationMotif motifP); var gptmdModifications = new List <Modification> { new Modification(_originalId: "21", _modificationType: "mt", _target: motifN, _locationRestriction: "Anywhere.", _monoisotopicMass: 21.981943), new Modification(_originalId: "16", _modificationType: "mt", _target: motifP, _locationRestriction: "Anywhere.", _monoisotopicMass: 15.994915) }; IEnumerable <Tuple <double, double> > combos = new List <Tuple <double, double> > { new Tuple <double, double>(21.981943, 15.994915) }; Tolerance precursorMassTolerance = new PpmTolerance(10); var parentProtein = new Protein(proteinSequence, accession, sequenceVariations: new List <SequenceVariation> { new SequenceVariation(1, "N", variantAA, sequenceVariantDescription) }); var variantProteins = parentProtein.GetVariantProteins(); DigestionParams digestionParams = new DigestionParams(minPeptideLength: 5); List <Modification> variableModifications = new List <Modification>(); var modPep = variantProteins.SelectMany(p => p.Digest(digestionParams, new List <Modification>(), variableModifications)).First(); MsDataScan dfd = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfd, (new Proteomics.AminoAcidPolymer.Peptide(modPep.BaseSequence).MonoisotopicMass + 21.981943 + 15.994915).ToMz(1), 1, "filepath", new CommonParameters()); var peptidesWithSetModifications = new List <PeptideWithSetModifications> { modPep }; PeptideSpectralMatch match = new PeptideSpectralMatch(peptidesWithSetModifications.First(), 0, 0, 0, scan, digestionParams, new List <MatchedFragmentIon>()); PeptideSpectralMatch newPsm = new PeptideSpectralMatch(peptidesWithSetModifications.First(), 0, 0, 0, scan, digestionParams, new List <MatchedFragmentIon>()); Tolerance fragmentTolerance = new AbsoluteTolerance(0.01); match.SetFdrValues(1, 0, 0, 1, 0, 0, 0, 0); allIdentifications = new List <PeptideSpectralMatch> { match }; var engine = new GptmdEngine(allIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> { { "filepath", precursorMassTolerance } }, new CommonParameters(), new List <string>()); var res = (GptmdResults)engine.Run(); Assert.AreEqual(numModHashes, res.Mods.Count); Assert.AreEqual(numModifiedResidues, res.Mods["accession"].Count); Assert.AreEqual(numModifiedResiduesN, res.Mods["accession"].Where(b => b.Item2.OriginalId.Equals("21")).Count()); Assert.AreEqual(numModifiedResiduesP, res.Mods["accession"].Where(b => b.Item2.OriginalId.Equals("16")).Count()); res.Mods.TryGetValue("accession_N1P", out var hash); Assert.AreEqual(numModifiedResiduesNP, (hash ?? new HashSet <Tuple <int, Modification> >()).Count); }
public static void TestProteinPrunedWithModSelectionAndVariants() { var modToWrite = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "UniProt" && p.Target.ToString() == "T").First(); var modToNotWrite = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "Common Artifact" && p.Target.ToString() == "X").First(); Dictionary <int, List <Modification> > variantMods = new Dictionary <int, List <Modification> >(); variantMods.Add(1, new List <Modification>() { modToNotWrite }); List <SequenceVariation> variants = new List <SequenceVariation> { new SequenceVariation(4, 4, "V", "T", @"20\t41168825\t.\tT\tC\t14290.77\t.\tANN=C|missense_variant|MODERATE|PLCG1|ENSG00000124181|transcript|ENST00000244007.7|protein_coding|22/33|c.2438T>C|p.Ile813Thr|2635/5285|2438/3876|813/1291||\tGT:AD:DP:GQ:PL\t1/1:1,392:393:99:14319,1142,0", variantMods) }; var protein1 = new Protein("PEPVIDEKPEPT", "1", oneBasedModifications: new Dictionary <int, List <Modification> > { { 1, new List <Modification> { modToNotWrite } }, { 12, new List <Modification> { modToWrite } } }, sequenceVariations: variants); var protein2 = new Protein("PEPIDPEPT", "2", oneBasedModifications: new Dictionary <int, List <Modification> > { { 1, new List <Modification> { modToNotWrite } }, { 9, new List <Modification> { modToWrite } } }); var protein1Variants = protein1.GetVariantProteins(1, 0); string path = @"temp"; var proteinList = new List <Protein> { protein1, protein2 }; proteinList.AddRange(protein1Variants); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinList, path); Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant")); Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(1, modToWrite), new Tuple <int, Modification>(2, modToNotWrite), }; var db = ProteinDbWriter.WriteXmlDatabase(modList, proteinList, Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant/fakeDb.xml")); var peptideObserved = protein1Variants.First().Digest(new DigestionParams(minPeptideLength: 1), new List <Modification>(), new List <Modification>()) .Where(p => p.BaseSequence == "PEPT").First(); PostSearchAnalysisParameters testPostTaskParameters = new PostSearchAnalysisParameters(); CommonParameters commonParam = new CommonParameters(useDeltaScore: false); double[,] noiseData = new double[10000, 10000]; noiseData[0, 0] = 1.0; List <Proteomics.Fragmentation.MatchedFragmentIon> matchedFragmentIons = new List <Proteomics.Fragmentation.MatchedFragmentIon>() { }; MzSpectrum spectrum = new MzSpectrum(noiseData); MsDataScan scan = new MsDataScan(spectrum, 1, 1, true, Polarity.Unknown, 2, new MzLibUtil.MzRange(10, 1000), "", MZAnalyzerType.Orbitrap, 10000, null, noiseData, ""); testPostTaskParameters.ProteinList = proteinList; testPostTaskParameters.AllPsms = new List <PeptideSpectralMatch> { new PeptideSpectralMatch(peptideObserved, 0, 20, 1, new Ms2ScanWithSpecificMass(scan, 100, 1, @"", commonParam), commonParam, matchedFragmentIons) }; testPostTaskParameters.SearchParameters = new SearchParameters(); testPostTaskParameters.SearchParameters.WritePrunedDatabase = true; testPostTaskParameters.SearchParameters.DoQuantification = false; testPostTaskParameters.SearchParameters.WriteMzId = false; testPostTaskParameters.DatabaseFilenameList = new List <DbForTask>() { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false) }; testPostTaskParameters.OutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest"); Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual")); testPostTaskParameters.IndividualResultsOutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual"); int[] stuffForSpectraFile = new int[2]; stuffForSpectraFile[0] = 10; stuffForSpectraFile[1] = 10; Dictionary <string, int[]> numSpectraPerFile = new Dictionary <string, int[]>(); numSpectraPerFile.Add("", stuffForSpectraFile); testPostTaskParameters.NumMs2SpectraPerFile = numSpectraPerFile; MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { peptideObserved }); string mzmlName = @"newMzml.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); modList.Add("test", Hash); testPostTaskParameters.CurrentRawFileList = new List <string>() { mzmlName }; SearchTask task5 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, }, CommonParameters = new CommonParameters() }; var test = task5.RunTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest"), new List <DbForTask>() { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false) }, new List <string>() { mzmlName }, "name"); testPostTaskParameters.SearchTaskResults = test; PostSearchAnalysisTask testPostTask = new PostSearchAnalysisTask(); testPostTask.Parameters = testPostTaskParameters; testPostTask.CommonParameters = commonParam; testPostTask.FileSpecificParameters = new List <(string FileName, CommonParameters Parameters)> { ("newMzMl.mzml", commonParam) }; testPostTask.Run(); var proteinsLoaded = ProteinDbLoader.LoadProteinXML(path, true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods); // assert that mods on proteins are the same before/after task is run Assert.AreEqual(protein1Variants.First().Accession, proteinsLoaded.First().Accession); Assert.AreEqual(protein1Variants.First().OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.First().OneBasedPossibleLocalizedModifications.Count()); Assert.AreEqual(protein2.OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.ElementAt(1).OneBasedPossibleLocalizedModifications.Count()); // assert that protein pruned DB has correct proteins mods var proteinPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbproteinPruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods1); Assert.That(proteinPruned.Count().Equals(1)); Assert.That(proteinPruned.FirstOrDefault().OneBasedPossibleLocalizedModifications.Count().Equals(1)); // assert that mod-pruned DB has correct proteins and mods var modPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbpruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods2); Assert.That(modPruned.Count().Equals(2)); Assert.That(modPruned.ElementAt(0).OneBasedPossibleLocalizedModifications.Count().Equals(1)); Assert.That(modPruned.ElementAt(1).OneBasedPossibleLocalizedModifications.Count().Equals(1)); }
public static void TestComputePEPValue() { var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var origDataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML"); MyFileManager myFileManager = new MyFileManager(true); CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams()); var myMsDataFile = myFileManager.LoadFile(origDataFile, CommonParameters); var searchModes = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", CommonParameters).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run()); var nonNullPsms = allPsmsArray.Where(p => p != null).ToList(); var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList(); var maxScore = nonNullPsms.Select(n => n.Score).Max(); PeptideSpectralMatch maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First(); Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>(); List <string> sequences = new List <string>(); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } var s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTimeHI_behavior = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTemHI_behaviorModifiedPeptides = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); //average hydrophobicity, standard deviation hydrophobicity Tuple <double, double> at = new Tuple <double, double>(33.0, 1.0); Dictionary <int, Tuple <double, double> > HI_Time_avg_dev = new Dictionary <int, Tuple <double, double> > { { 154, at } }; fileSpecificRetTimeHI_behavior.Add(@"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", HI_Time_avg_dev); string[] trainingVariables = new[] { "HydrophobicityZScore", "Intensity", "ScanPrecursorCharge", "DeltaScore", "Notch", "PsmCount", "ModsCount", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "IsVariantPeptide" }; int chargeStateMode = 4; var(notch, pwsm) = maxScorePsm.BestMatchingPeptides.First(); var maxPsmData = PEP_Analysis.CreateOnePsmDataEntry(maxScorePsm, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, pwsm, trainingVariables, notch, !pwsm.Protein.IsDecoy); Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count - 1, Is.EqualTo(maxPsmData.Ambiguity)); Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.That(maxPsmData.HydrophobicityZScore, Is.EqualTo(5.170955).Within(0.05)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount)); Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch)); Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount)); Assert.That(-Math.Abs(chargeStateMode - maxScorePsm.ScanPrecursorCharge), Is.EqualTo(maxPsmData.PrecursorChargeDiffToMode)); Assert.AreEqual((float)0, maxPsmData.IsVariantPeptide); PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms); int trueCount = 0; foreach (var item in allPsmsArray.Where(p => p != null)) { var b = item.FdrInfo.PEP; if (b >= 0.5) { trueCount++; } } List <PeptideSpectralMatch> moreNonNullPSMs = new List <PeptideSpectralMatch>(); for (int i = 0; i < 3; i++) { foreach (PeptideSpectralMatch psm in nonNullPsms) { moreNonNullPSMs.Add(psm); } } string expectedMetrics = "************************************************************\r\n* Metrics for Determination of PEP Using Binary Classification \r\n" + "*-----------------------------------------------------------\r\n* Accuracy: 1\r\n* Area Under Curve: 1\r\n* Area under Precision recall Curve: 1\r\n* F1Score: 1\r\n" + "* LogLoss: 2.60551851621861E-10\r\n* LogLossReduction: 0.999999999599165\r\n* PositivePrecision: 1\r\n* PositiveRecall: 1\r\n* NegativePrecision: 1\r\n" + "* NegativeRecall: 1\r\n* Count of Ambiguous Peptides Removed: 0\r\n************************************************************\r\n"; string metrics = PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(moreNonNullPSMs); Assert.AreEqual(expectedMetrics, metrics); Assert.GreaterOrEqual(32, trueCount); //Test Variant Peptide as Input is identified as such as part of PEP calculation input much of the next several lines simply necessry to create a psm. var anMzSpectrum = new MzSpectrum(new double[] { 1, 1 }, new double[] { 2, 2 }, true); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(new MsDataScan(anMzSpectrum, 1, 1, true, Polarity.Negative, 2, null, "", MZAnalyzerType.Orbitrap, 2, null, null, null), 1, 1, "path", new CommonParameters()); Protein variantProtein = new Protein("MPEPPPTIDE", "protein3", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 6, "PPP", "P", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null) }); PeptideWithSetModifications varPep = variantProtein.GetVariantProteins().SelectMany(p => p.Digest(CommonParameters.DigestionParams, null, null)).FirstOrDefault(); PeptideSpectralMatch variantPSM = new PeptideSpectralMatch(varPep, 0, maxScorePsm.Score, maxScorePsm.ScanIndex, scan, new DigestionParams(), null); sequenceToPsmCount = new Dictionary <string, int>(); sequences = new List <string>(); nonNullPsms.Add(variantPSM); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } var(vnotch, vpwsm) = variantPSM.BestMatchingPeptides.First(); PsmData variantPsmData = PEP_Analysis.CreateOnePsmDataEntry(variantPSM, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, vpwsm, trainingVariables, vnotch, !maxScorePsm.IsDecoy); Assert.AreEqual((float)1, variantPsmData.IsVariantPeptide); }