public static void TestComputePEPValueTopDown() { //just making sure that topdown data goes through the pep calculator without crashing. CommonParameters CommonParameters = new CommonParameters( digestionParams: new DigestionParams(protease: "top-down"), scoreCutoff: 1, assumeOrphanPeaksAreZ1Fragments: false); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); List <Protein> proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\HeLaFakeTopDown.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var origDataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML"); MyFileManager myFileManager = new MyFileManager(true); var myMsDataFile = myFileManager.LoadFile(origDataFile, CommonParameters); var searchMode = new SinglePpmAroundZeroSearchMode(5); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, new List <string>()).Run(); var nonNullPsms = allPsmsArray.Where(p => p != null).ToList(); List <PeptideSpectralMatch> moreNonNullPSMs = new List <PeptideSpectralMatch>(); int reps = 3; for (int i = 0; i < reps; i++) { foreach (PeptideSpectralMatch psm in nonNullPsms) { moreNonNullPSMs.Add(psm); } } FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(moreNonNullPSMs.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run()); var maxScore = nonNullPsms.Select(n => n.Score).Max(); PeptideSpectralMatch maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First(); Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>(); List <string> sequences = new List <string>(); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } var s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTimeHI_behavior = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTemHI_behaviorModifiedPeptides = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); string[] trainingVariables = PsmData.trainingInfos["topDown"]; int chargeStateMode = 4; var(notch, pwsm) = maxScorePsm.BestMatchingPeptides.First(); var maxPsmData = PEP_Analysis.CreateOnePsmDataEntry(maxScorePsm, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, pwsm, trainingVariables, notch, !pwsm.Protein.IsDecoy); Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count - 1, Is.EqualTo(maxPsmData.Ambiguity)); Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.AreEqual(maxPsmData.HydrophobicityZScore, float.NaN); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount)); Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch)); Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount * reps)); Assert.That(-Math.Abs(chargeStateMode - maxScorePsm.ScanPrecursorCharge), Is.EqualTo(maxPsmData.PrecursorChargeDiffToMode)); Assert.AreEqual((float)0, maxPsmData.IsVariantPeptide); }
public static void TestComputePEPValue() { var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var origDataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML"); MyFileManager myFileManager = new MyFileManager(true); CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams()); var myMsDataFile = myFileManager.LoadFile(origDataFile, CommonParameters); var searchModes = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", CommonParameters).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run()); var nonNullPsms = allPsmsArray.Where(p => p != null).ToList(); var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList(); var maxScore = nonNullPsms.Select(n => n.Score).Max(); var maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First(); Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>(); List <string> sequences = new List <string>(); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } var s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTimeHI_behavior = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTemHI_behaviorModifiedPeptides = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); //average hydrophobicity, standard deviation hydrophobicity Tuple <double, double> at = new Tuple <double, double>(33.0, 1.0); Dictionary <int, Tuple <double, double> > HI_Time_avg_dev = new Dictionary <int, Tuple <double, double> > { { 154, at } }; fileSpecificRetTimeHI_behavior.Add(@"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", HI_Time_avg_dev); var pwsm = maxScorePsm.BestMatchingPeptides.First(); var maxPsmData = PEP_Analysis.CreateOnePsmDataEntry(maxScorePsm, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, pwsm.Peptide, "standard", pwsm.Notch, !maxScorePsm.IsDecoy); Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count, Is.EqualTo(maxPsmData.Ambiguity)); Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.That(maxPsmData.HydrophobicityZScore, Is.EqualTo(5.170955).Within(0.05)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount)); Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch)); Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount)); Assert.That(maxScorePsm.ScanPrecursorCharge, Is.EqualTo(maxPsmData.ScanPrecursorCharge)); PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms); int trueCount = 0; foreach (var item in allPsmsArray.Where(p => p != null)) { var b = item.FdrInfo.PEP; if (b >= 0.5) { trueCount++; } } Assert.GreaterOrEqual(32, trueCount); }
public static void TestComputePEPValue() { var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var origDataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML"); MyFileManager myFileManager = new MyFileManager(true); CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams()); var myMsDataFile = myFileManager.LoadFile(origDataFile, CommonParameters); var searchModes = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", CommonParameters).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run()); var nonNullPsms = allPsmsArray.Where(p => p != null).ToList(); var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList(); var maxScore = nonNullPsms.Select(n => n.Score).Max(); PeptideSpectralMatch maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First(); Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>(); List <string> sequences = new List <string>(); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } var s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTimeHI_behavior = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTemHI_behaviorModifiedPeptides = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); //average hydrophobicity, standard deviation hydrophobicity Tuple <double, double> at = new Tuple <double, double>(33.0, 1.0); Dictionary <int, Tuple <double, double> > HI_Time_avg_dev = new Dictionary <int, Tuple <double, double> > { { 154, at } }; fileSpecificRetTimeHI_behavior.Add(@"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", HI_Time_avg_dev); string[] trainingVariables = new[] { "HydrophobicityZScore", "Intensity", "ScanPrecursorCharge", "DeltaScore", "Notch", "PsmCount", "ModsCount", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "IsVariantPeptide" }; int chargeStateMode = 4; var(notch, pwsm) = maxScorePsm.BestMatchingPeptides.First(); var maxPsmData = PEP_Analysis.CreateOnePsmDataEntry(maxScorePsm, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, pwsm, trainingVariables, notch, !pwsm.Protein.IsDecoy); Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count - 1, Is.EqualTo(maxPsmData.Ambiguity)); Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.That(maxPsmData.HydrophobicityZScore, Is.EqualTo(5.170955).Within(0.05)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount)); Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch)); Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount)); Assert.That(-Math.Abs(chargeStateMode - maxScorePsm.ScanPrecursorCharge), Is.EqualTo(maxPsmData.PrecursorChargeDiffToMode)); Assert.AreEqual((float)0, maxPsmData.IsVariantPeptide); PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms); int trueCount = 0; foreach (var item in allPsmsArray.Where(p => p != null)) { var b = item.FdrInfo.PEP; if (b >= 0.5) { trueCount++; } } List <PeptideSpectralMatch> moreNonNullPSMs = new List <PeptideSpectralMatch>(); for (int i = 0; i < 3; i++) { foreach (PeptideSpectralMatch psm in nonNullPsms) { moreNonNullPSMs.Add(psm); } } string expectedMetrics = "************************************************************\r\n* Metrics for Determination of PEP Using Binary Classification \r\n" + "*-----------------------------------------------------------\r\n* Accuracy: 1\r\n* Area Under Curve: 1\r\n* Area under Precision recall Curve: 1\r\n* F1Score: 1\r\n" + "* LogLoss: 2.60551851621861E-10\r\n* LogLossReduction: 0.999999999599165\r\n* PositivePrecision: 1\r\n* PositiveRecall: 1\r\n* NegativePrecision: 1\r\n" + "* NegativeRecall: 1\r\n* Count of Ambiguous Peptides Removed: 0\r\n************************************************************\r\n"; string metrics = PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(moreNonNullPSMs); Assert.AreEqual(expectedMetrics, metrics); Assert.GreaterOrEqual(32, trueCount); //Test Variant Peptide as Input is identified as such as part of PEP calculation input much of the next several lines simply necessry to create a psm. var anMzSpectrum = new MzSpectrum(new double[] { 1, 1 }, new double[] { 2, 2 }, true); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(new MsDataScan(anMzSpectrum, 1, 1, true, Polarity.Negative, 2, null, "", MZAnalyzerType.Orbitrap, 2, null, null, null), 1, 1, "path", new CommonParameters()); Protein variantProtein = new Protein("MPEPPPTIDE", "protein3", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 6, "PPP", "P", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null) }); PeptideWithSetModifications varPep = variantProtein.GetVariantProteins().SelectMany(p => p.Digest(CommonParameters.DigestionParams, null, null)).FirstOrDefault(); PeptideSpectralMatch variantPSM = new PeptideSpectralMatch(varPep, 0, maxScorePsm.Score, maxScorePsm.ScanIndex, scan, new DigestionParams(), null); sequenceToPsmCount = new Dictionary <string, int>(); sequences = new List <string>(); nonNullPsms.Add(variantPSM); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } var(vnotch, vpwsm) = variantPSM.BestMatchingPeptides.First(); PsmData variantPsmData = PEP_Analysis.CreateOnePsmDataEntry(variantPSM, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, vpwsm, trainingVariables, vnotch, !maxScorePsm.IsDecoy); Assert.AreEqual((float)1, variantPsmData.IsVariantPeptide); }