protected override MetaMorpheusEngineResults RunSpecific() { FdrAnalysisResults myAnalysisResults = new FdrAnalysisResults(this, AnalysisType); Status("Running FDR analysis..."); DoFalseDiscoveryRateAnalysis(myAnalysisResults); myAnalysisResults.PsmsWithin1PercentFdr = AllPsms.Count(b => b.FdrInfo.QValue <= 0.01 && !b.IsDecoy); return(myAnalysisResults); }
protected override MetaMorpheusEngineResults RunSpecific() { FdrAnalysisResults myAnalysisResults = new FdrAnalysisResults(this); Status("Running FDR analysis..."); DoFalseDiscoveryRateAnalysis(newPsms, massDiffAcceptorNumNotches, calculateEValue); myAnalysisResults.PsmsWithin1PercentFdr = newPsms.Count(b => b.FdrInfo.QValue < 0.01); return(myAnalysisResults); }
private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults) { // Stop if canceled if (GlobalVariables.StopLoops) { return; } // calculate FDR on a per-protease basis (targets and decoys for a specific protease) var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease); foreach (var proteasePsms in psmsGroupedByProtease) { var psms = proteasePsms.ToList(); // generate the null distribution for e-value calculations double globalMeanScore = 0; int globalMeanCount = 0; if (CalculateEValue && psms.Any()) { List <double> combinedScores = new List <double>(); foreach (PeptideSpectralMatch psm in psms) { psm.AllScores.Sort(); combinedScores.AddRange(psm.AllScores); //remove top scoring peptide if (combinedScores.Any()) { combinedScores.RemoveAt(combinedScores.Count - 1); } } if (combinedScores.Any()) { globalMeanScore = combinedScores.Average(); globalMeanCount = (int)((double)combinedScores.Count / psms.Count); } else { // should be a very rare case... if there are PSMs but each PSM only has one hit globalMeanScore = 0; globalMeanCount = 0; } } //Calculate delta scores for the psms (regardless of if we are using them) foreach (PeptideSpectralMatch psm in psms) { if (psm != null) { psm.CalculateDeltaScore(ScoreCutoff); } } //determine if Score or DeltaScore performs better if (UseDeltaScore) { const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); //sort by best method myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs; psms = myAnalysisResults.DeltaScoreImprovement ? psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() : psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } else //sort by score { psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } double cumulativeTarget = 0; double cumulativeDecoy = 0; //set up arrays for local FDRs double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1]; double[] cumulativeDecoyPerNotch = new double[MassDiffAcceptorNumNotches + 1]; //Assign FDR values to PSMs for (int i = 0; i < psms.Count; i++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } PeptideSpectralMatch psm = psms[i]; int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.IsDecoy) { // the PSM can be ambiguous between a target and a decoy sequence // in that case, count it as the fraction of decoy hits // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy double decoyHits = 0; double totalHits = 0; var hits = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence); foreach (var hit in hits) { if (hit.First().Peptide.Protein.IsDecoy) { decoyHits++; } totalHits++; } cumulativeDecoy += decoyHits / totalHits; cumulativeDecoyPerNotch[notch] += decoyHits / totalHits; } else { cumulativeTarget++; cumulativeTargetPerNotch[notch]++; } double qValue = Math.Min(1, cumulativeDecoy / cumulativeTarget); double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]); double maximumLikelihood = 0; double eValue = 0; double eScore = 0; if (CalculateEValue) { eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood); eScore = -Math.Log(eValue, 10); } psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue); } // set q-value thresholds such that a lower scoring PSM can't have // a higher confidence than a higher scoring PSM //Populate min qValues double qValueThreshold = 1.0; double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1]; for (int i = 0; i < qValueNotchThreshold.Length; i++) { qValueNotchThreshold[i] = 1.0; } for (int i = psms.Count - 1; i >= 0; i--) { PeptideSpectralMatch psm = psms[i]; // threshold q-values if (psm.FdrInfo.QValue > qValueThreshold) { psm.FdrInfo.QValue = qValueThreshold; } else if (psm.FdrInfo.QValue < qValueThreshold) { qValueThreshold = psm.FdrInfo.QValue; } // threshold notch q-values int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch]) { psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch]; } else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch]) { qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch; } } } if (AnalysisType == "PSM") { CountPsm(); } }
private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults) { // Stop if canceled if (GlobalVariables.StopLoops) { return; } // generate the null distribution for e-value calculations double globalMeanScore = 0; int globalMeanCount = 0; if (CalculateEValue && Psms.Any()) { List <double> combinedScores = new List <double>(); foreach (PeptideSpectralMatch psm in Psms) { psm.AllScores.Sort(); combinedScores.AddRange(psm.AllScores); //remove top scoring peptide if (combinedScores.Any()) { combinedScores.RemoveAt(combinedScores.Count - 1); } } if (combinedScores.Any()) { globalMeanScore = combinedScores.Average(); globalMeanCount = (int)((double)combinedScores.Count / Psms.Count); } else { // should be a very rare case... if there are PSMs but each PSM only has one hit globalMeanScore = 0; globalMeanCount = 0; } } int cumulativeTarget = 0; int cumulativeDecoy = 0; //Calculate delta scores for the psms (regardless of if we are using them) foreach (PeptideSpectralMatch psm in Psms) { if (psm != null) { psm.CalculateDeltaScore(ScoreCutoff); } } //determine if Score or DeltaScore performs better if (UseDeltaScore) { const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR List <PeptideSpectralMatch> scoreSorted = Psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); scoreSorted = Psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); //sort by best method BUT DON'T GROUP BECAUSE WE NEED THAT FOR LOCALIZATION Psms = (DeltaScorePSMs > ScorePSMs) ? Psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() : Psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs; } else //sort by score { Psms = Psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } //set up arrays for local FDRs int[] cumulativeTargetPerNotch = new int[MassDiffAcceptorNumNotches + 1]; int[] cumulativeDecoyPerNotch = new int[MassDiffAcceptorNumNotches + 1]; //Assign FDR values to PSMs for (int i = 0; i < Psms.Count; i++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } var psm = Psms[i]; int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.IsDecoy) { cumulativeDecoy++; cumulativeDecoyPerNotch[notch]++; } else { cumulativeTarget++; cumulativeTargetPerNotch[notch]++; } double qValue = (double)cumulativeDecoy / cumulativeTarget; double qValueNotch = (double)cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]; double maximumLikelihood = 0; double eValue = 0; double eScore = 0; if (CalculateEValue) { eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood); eScore = -Math.Log(eValue, 10); } if (qValue > 1) { qValue = 1; } if (qValueNotch > 1) { qValueNotch = 1; } psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue); } //Populate min qValues double min_q_value = double.PositiveInfinity; double[] min_q_value_notch = new double[MassDiffAcceptorNumNotches + 1]; for (int i = 0; i < MassDiffAcceptorNumNotches + 1; i++) { min_q_value_notch[i] = double.PositiveInfinity; } //The idea here is to set previous qValues as thresholds, //such that a lower scoring PSM can't have a higher confidence than a higher scoring PSM for (int i = Psms.Count - 1; i >= 0; i--) { PeptideSpectralMatch psm = Psms[i]; if (psm.FdrInfo.QValue > min_q_value) { psm.FdrInfo.QValue = min_q_value; } else if (psm.FdrInfo.QValue < min_q_value) { min_q_value = psm.FdrInfo.QValue; } int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.FdrInfo.QValueNotch > min_q_value_notch[notch]) { psm.FdrInfo.QValueNotch = min_q_value_notch[notch]; } else if (psm.FdrInfo.QValueNotch < min_q_value_notch[notch]) { min_q_value_notch[notch] = psm.FdrInfo.QValueNotch; } } }
private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults) { // Stop if canceled if (GlobalVariables.StopLoops) { return; } // calculate FDR on a per-protease basis (targets and decoys for a specific protease) var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease); foreach (var proteasePsms in psmsGroupedByProtease) { var psms = proteasePsms.ToList(); //determine if Score or DeltaScore performs better if (UseDeltaScore) { const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); //sort by best method myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs; psms = myAnalysisResults.DeltaScoreImprovement ? psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() : psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } else //sort by score { psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } double cumulativeTarget = 0; double cumulativeDecoy = 0; //set up arrays for local FDRs double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1]; double[] cumulativeDecoyPerNotch = new double[MassDiffAcceptorNumNotches + 1]; //Assign FDR values to PSMs for (int i = 0; i < psms.Count; i++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } PeptideSpectralMatch psm = psms[i]; int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.IsDecoy) { // the PSM can be ambiguous between a target and a decoy sequence // in that case, count it as the fraction of decoy hits // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy double decoyHits = 0; double totalHits = 0; var hits = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence); foreach (var hit in hits) { if (hit.First().Peptide.Protein.IsDecoy) { decoyHits++; } totalHits++; } cumulativeDecoy += decoyHits / totalHits; cumulativeDecoyPerNotch[notch] += decoyHits / totalHits; } else { cumulativeTarget++; cumulativeTargetPerNotch[notch]++; } double qValue = Math.Min(1, cumulativeDecoy / cumulativeTarget); double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]); double pep = psm.FdrInfo == null ? double.NaN : psm.FdrInfo.PEP; double pepQValue = psm.FdrInfo == null ? double.NaN : psm.FdrInfo.PEP_QValue; psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, pep, pepQValue); } // set q-value thresholds such that a lower scoring PSM can't have // a higher confidence than a higher scoring PSM //Populate min qValues double qValueThreshold = 1.0; double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1]; for (int i = 0; i < qValueNotchThreshold.Length; i++) { qValueNotchThreshold[i] = 1.0; } for (int i = psms.Count - 1; i >= 0; i--) { PeptideSpectralMatch psm = psms[i]; // threshold q-values if (psm.FdrInfo.QValue > qValueThreshold) { psm.FdrInfo.QValue = qValueThreshold; } else if (psm.FdrInfo.QValue < qValueThreshold) { qValueThreshold = psm.FdrInfo.QValue; } // threshold notch q-values int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch]) { psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch]; } else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch]) { qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch; } } } if (AnalysisType == "PSM") { CountPsm(); if (AllPsms.Count > 0) { myAnalysisResults.BinarySearchTreeMetrics = PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(AllPsms); Compute_PEPValue_Based_QValue(AllPsms); } } if (AnalysisType == "Peptide") { Compute_PEPValue_Based_QValue(AllPsms); } }