Beispiel #1
0
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            FdrAnalysisResults myAnalysisResults = new FdrAnalysisResults(this, AnalysisType);

            Status("Running FDR analysis...");
            DoFalseDiscoveryRateAnalysis(myAnalysisResults);

            myAnalysisResults.PsmsWithin1PercentFdr = AllPsms.Count(b => b.FdrInfo.QValue <= 0.01 && !b.IsDecoy);

            return(myAnalysisResults);
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            FdrAnalysisResults myAnalysisResults = new FdrAnalysisResults(this);

            Status("Running FDR analysis...");
            DoFalseDiscoveryRateAnalysis(newPsms, massDiffAcceptorNumNotches, calculateEValue);

            myAnalysisResults.PsmsWithin1PercentFdr = newPsms.Count(b => b.FdrInfo.QValue < 0.01);

            return(myAnalysisResults);
        }
Beispiel #3
0
        private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
        {
            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return;
            }

            // calculate FDR on a per-protease basis (targets and decoys for a specific protease)
            var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease);

            foreach (var proteasePsms in psmsGroupedByProtease)
            {
                var psms = proteasePsms.ToList();

                // generate the null distribution for e-value calculations
                double globalMeanScore = 0;
                int    globalMeanCount = 0;

                if (CalculateEValue && psms.Any())
                {
                    List <double> combinedScores = new List <double>();

                    foreach (PeptideSpectralMatch psm in psms)
                    {
                        psm.AllScores.Sort();
                        combinedScores.AddRange(psm.AllScores);

                        //remove top scoring peptide
                        if (combinedScores.Any())
                        {
                            combinedScores.RemoveAt(combinedScores.Count - 1);
                        }
                    }

                    if (combinedScores.Any())
                    {
                        globalMeanScore = combinedScores.Average();
                        globalMeanCount = (int)((double)combinedScores.Count / psms.Count);
                    }
                    else
                    {
                        // should be a very rare case... if there are PSMs but each PSM only has one hit
                        globalMeanScore = 0;
                        globalMeanCount = 0;
                    }
                }

                //Calculate delta scores for the psms (regardless of if we are using them)
                foreach (PeptideSpectralMatch psm in psms)
                {
                    if (psm != null)
                    {
                        psm.CalculateDeltaScore(ScoreCutoff);
                    }
                }

                //determine if Score or DeltaScore performs better
                if (UseDeltaScore)
                {
                    const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR

                    List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);
                    scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);

                    //sort by best method
                    myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs;
                    psms = myAnalysisResults.DeltaScoreImprovement ?
                           psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() :
                           psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }
                else //sort by score
                {
                    psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }

                double cumulativeTarget = 0;
                double cumulativeDecoy  = 0;

                //set up arrays for local FDRs
                double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1];
                double[] cumulativeDecoyPerNotch  = new double[MassDiffAcceptorNumNotches + 1];

                //Assign FDR values to PSMs
                for (int i = 0; i < psms.Count; i++)
                {
                    // Stop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }

                    PeptideSpectralMatch psm = psms[i];
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.IsDecoy)
                    {
                        // the PSM can be ambiguous between a target and a decoy sequence
                        // in that case, count it as the fraction of decoy hits
                        // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy
                        double decoyHits = 0;
                        double totalHits = 0;
                        var    hits      = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence);
                        foreach (var hit in hits)
                        {
                            if (hit.First().Peptide.Protein.IsDecoy)
                            {
                                decoyHits++;
                            }
                            totalHits++;
                        }

                        cumulativeDecoy += decoyHits / totalHits;
                        cumulativeDecoyPerNotch[notch] += decoyHits / totalHits;
                    }
                    else
                    {
                        cumulativeTarget++;
                        cumulativeTargetPerNotch[notch]++;
                    }

                    double qValue      = Math.Min(1, cumulativeDecoy / cumulativeTarget);
                    double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]);

                    double maximumLikelihood = 0;
                    double eValue            = 0;
                    double eScore            = 0;
                    if (CalculateEValue)
                    {
                        eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood);
                        eScore = -Math.Log(eValue, 10);
                    }

                    psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue);
                }

                // set q-value thresholds such that a lower scoring PSM can't have
                // a higher confidence than a higher scoring PSM
                //Populate min qValues
                double   qValueThreshold      = 1.0;
                double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1];
                for (int i = 0; i < qValueNotchThreshold.Length; i++)
                {
                    qValueNotchThreshold[i] = 1.0;
                }

                for (int i = psms.Count - 1; i >= 0; i--)
                {
                    PeptideSpectralMatch psm = psms[i];

                    // threshold q-values
                    if (psm.FdrInfo.QValue > qValueThreshold)
                    {
                        psm.FdrInfo.QValue = qValueThreshold;
                    }
                    else if (psm.FdrInfo.QValue < qValueThreshold)
                    {
                        qValueThreshold = psm.FdrInfo.QValue;
                    }

                    // threshold notch q-values
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch])
                    {
                        psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch];
                    }
                    else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch])
                    {
                        qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch;
                    }
                }
            }

            if (AnalysisType == "PSM")
            {
                CountPsm();
            }
        }
        private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
        {
            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return;
            }

            // generate the null distribution for e-value calculations
            double globalMeanScore = 0;
            int    globalMeanCount = 0;

            if (CalculateEValue && Psms.Any())
            {
                List <double> combinedScores = new List <double>();

                foreach (PeptideSpectralMatch psm in Psms)
                {
                    psm.AllScores.Sort();
                    combinedScores.AddRange(psm.AllScores);

                    //remove top scoring peptide
                    if (combinedScores.Any())
                    {
                        combinedScores.RemoveAt(combinedScores.Count - 1);
                    }
                }

                if (combinedScores.Any())
                {
                    globalMeanScore = combinedScores.Average();
                    globalMeanCount = (int)((double)combinedScores.Count / Psms.Count);
                }
                else
                {
                    // should be a very rare case... if there are PSMs but each PSM only has one hit
                    globalMeanScore = 0;
                    globalMeanCount = 0;
                }
            }

            int cumulativeTarget = 0;
            int cumulativeDecoy  = 0;

            //Calculate delta scores for the psms (regardless of if we are using them)
            foreach (PeptideSpectralMatch psm in Psms)
            {
                if (psm != null)
                {
                    psm.CalculateDeltaScore(ScoreCutoff);
                }
            }

            //determine if Score or DeltaScore performs better
            if (UseDeltaScore)
            {
                const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR

                List <PeptideSpectralMatch> scoreSorted = Psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);
                scoreSorted = Psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);

                //sort by best method BUT DON'T GROUP BECAUSE WE NEED THAT FOR LOCALIZATION
                Psms = (DeltaScorePSMs > ScorePSMs) ?
                       Psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() :
                       Psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs;
            }
            else //sort by score
            {
                Psms = Psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
            }

            //set up arrays for local FDRs
            int[] cumulativeTargetPerNotch = new int[MassDiffAcceptorNumNotches + 1];
            int[] cumulativeDecoyPerNotch  = new int[MassDiffAcceptorNumNotches + 1];

            //Assign FDR values to PSMs
            for (int i = 0; i < Psms.Count; i++)
            {
                // Stop if canceled
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var psm   = Psms[i];
                int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                if (psm.IsDecoy)
                {
                    cumulativeDecoy++;
                    cumulativeDecoyPerNotch[notch]++;
                }
                else
                {
                    cumulativeTarget++;
                    cumulativeTargetPerNotch[notch]++;
                }

                double qValue      = (double)cumulativeDecoy / cumulativeTarget;
                double qValueNotch = (double)cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch];

                double maximumLikelihood = 0;
                double eValue            = 0;
                double eScore            = 0;
                if (CalculateEValue)
                {
                    eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood);
                    eScore = -Math.Log(eValue, 10);
                }

                if (qValue > 1)
                {
                    qValue = 1;
                }
                if (qValueNotch > 1)
                {
                    qValueNotch = 1;
                }

                psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue);
            }

            //Populate min qValues
            double min_q_value = double.PositiveInfinity;

            double[] min_q_value_notch = new double[MassDiffAcceptorNumNotches + 1];
            for (int i = 0; i < MassDiffAcceptorNumNotches + 1; i++)
            {
                min_q_value_notch[i] = double.PositiveInfinity;
            }

            //The idea here is to set previous qValues as thresholds,
            //such that a lower scoring PSM can't have a higher confidence than a higher scoring PSM
            for (int i = Psms.Count - 1; i >= 0; i--)
            {
                PeptideSpectralMatch psm = Psms[i];
                if (psm.FdrInfo.QValue > min_q_value)
                {
                    psm.FdrInfo.QValue = min_q_value;
                }
                else if (psm.FdrInfo.QValue < min_q_value)
                {
                    min_q_value = psm.FdrInfo.QValue;
                }
                int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                if (psm.FdrInfo.QValueNotch > min_q_value_notch[notch])
                {
                    psm.FdrInfo.QValueNotch = min_q_value_notch[notch];
                }
                else if (psm.FdrInfo.QValueNotch < min_q_value_notch[notch])
                {
                    min_q_value_notch[notch] = psm.FdrInfo.QValueNotch;
                }
            }
        }
Beispiel #5
0
        private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
        {
            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return;
            }

            // calculate FDR on a per-protease basis (targets and decoys for a specific protease)
            var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease);

            foreach (var proteasePsms in psmsGroupedByProtease)
            {
                var psms = proteasePsms.ToList();

                //determine if Score or DeltaScore performs better
                if (UseDeltaScore)
                {
                    const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR

                    List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);
                    scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);

                    //sort by best method
                    myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs;
                    psms = myAnalysisResults.DeltaScoreImprovement ?
                           psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() :
                           psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }
                else //sort by score
                {
                    psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }

                double cumulativeTarget = 0;
                double cumulativeDecoy  = 0;

                //set up arrays for local FDRs
                double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1];
                double[] cumulativeDecoyPerNotch  = new double[MassDiffAcceptorNumNotches + 1];

                //Assign FDR values to PSMs
                for (int i = 0; i < psms.Count; i++)
                {
                    // Stop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }

                    PeptideSpectralMatch psm = psms[i];
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.IsDecoy)
                    {
                        // the PSM can be ambiguous between a target and a decoy sequence
                        // in that case, count it as the fraction of decoy hits
                        // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy
                        double decoyHits = 0;
                        double totalHits = 0;
                        var    hits      = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence);
                        foreach (var hit in hits)
                        {
                            if (hit.First().Peptide.Protein.IsDecoy)
                            {
                                decoyHits++;
                            }
                            totalHits++;
                        }

                        cumulativeDecoy += decoyHits / totalHits;
                        cumulativeDecoyPerNotch[notch] += decoyHits / totalHits;
                    }
                    else
                    {
                        cumulativeTarget++;
                        cumulativeTargetPerNotch[notch]++;
                    }

                    double qValue      = Math.Min(1, cumulativeDecoy / cumulativeTarget);
                    double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]);

                    double pep       = psm.FdrInfo == null ? double.NaN : psm.FdrInfo.PEP;
                    double pepQValue = psm.FdrInfo == null ? double.NaN : psm.FdrInfo.PEP_QValue;

                    psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, pep, pepQValue);
                }

                // set q-value thresholds such that a lower scoring PSM can't have
                // a higher confidence than a higher scoring PSM
                //Populate min qValues
                double   qValueThreshold      = 1.0;
                double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1];
                for (int i = 0; i < qValueNotchThreshold.Length; i++)
                {
                    qValueNotchThreshold[i] = 1.0;
                }

                for (int i = psms.Count - 1; i >= 0; i--)
                {
                    PeptideSpectralMatch psm = psms[i];

                    // threshold q-values
                    if (psm.FdrInfo.QValue > qValueThreshold)
                    {
                        psm.FdrInfo.QValue = qValueThreshold;
                    }
                    else if (psm.FdrInfo.QValue < qValueThreshold)
                    {
                        qValueThreshold = psm.FdrInfo.QValue;
                    }

                    // threshold notch q-values
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch])
                    {
                        psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch];
                    }
                    else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch])
                    {
                        qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch;
                    }
                }
            }

            if (AnalysisType == "PSM")
            {
                CountPsm();
                if (AllPsms.Count > 0)
                {
                    myAnalysisResults.BinarySearchTreeMetrics = PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(AllPsms);
                    Compute_PEPValue_Based_QValue(AllPsms);
                }
            }

            if (AnalysisType == "Peptide")
            {
                Compute_PEPValue_Based_QValue(AllPsms);
            }
        }