private static float GetSSRCalcHydrophobicityZScore(PeptideSpectralMatch psm, PeptideWithSetModifications Peptide, Dictionary <string, Dictionary <int, Tuple <double, double> > > d)
        {
            //Using SSRCalc3 but probably any number of different calculators could be used instead. One could also use the CE mobility.
            SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300);
            double   hydrophobicityZscore = double.NaN;

            if (d.ContainsKey(psm.FullFilePath))
            {
                int time = (int)Math.Round(psm.ScanRetentionTime, 0);
                if (d[psm.FullFilePath].Keys.Contains(time))
                {
                    double predictedHydrophobicity = calc.ScoreSequence(Peptide);
                    hydrophobicityZscore = Math.Abs(d[psm.FullFilePath][time].Item1 - predictedHydrophobicity) / d[psm.FullFilePath][time].Item2;
                }
            }

            if (double.IsNaN(hydrophobicityZscore) || double.IsInfinity(hydrophobicityZscore))
            {
                hydrophobicityZscore = 100;
            }

            return((float)hydrophobicityZscore);
        }
Пример #2
0
        public static PsmData CreateOnePsmDataEntry(PeptideSpectralMatch psm, Dictionary <string, int> sequenceToPsmCount, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_modified, int chargeStateMode, PeptideWithSetModifications selectedPeptide, string[] trainingVariables, int notchToUse, bool label)
        {
            float totalMatchingFragmentCount = 0;

            if (trainingVariables.Contains("TotalMatchingFragmentCount"))
            {
                totalMatchingFragmentCount = (float)Math.Floor(psm.Score);
            }

            float ambiguity = 0;

            if (trainingVariables.Contains("Ambiguity"))
            {
                ambiguity = Math.Min((float)(psm.PeptidesToMatchingFragments.Keys.Count - 1), 10);
            }
            float intensity = 0;

            if (trainingVariables.Contains("Intensity"))
            {
                intensity = (float)(psm.Score - (int)psm.Score);
            }
            float chargeDifference = 0;

            if (trainingVariables.Contains("PrecursorChargeDiffToMode"))
            {
                chargeDifference = -Math.Abs(chargeStateMode - psm.ScanPrecursorCharge);
            }
            float deltaScore = 0;

            if (trainingVariables.Contains("DeltaScore"))
            {
                deltaScore = (float)psm.DeltaScore;
            }
            float psmCount = 1;

            if (trainingVariables.Contains("PsmCount"))
            {
                psmCount = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())];

                //grouping psm counts as follows is done for stability. you get very nice numbers at low psms to get good statistics. But you get a few peptides with high psm counts that could be either targets or decoys and the values swing between extremes. So grouping psms in bundles really adds stability.
                List <int> psmCountList = new List <int> {
                    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50, 75, 100, 200, 300, 400, 500
                };
                int closest = psmCountList.OrderBy(item => Math.Abs(psmCount - item)).First();
                psmCount = closest;
            }

            int notch = 0;

            if (trainingVariables.Contains("Notch"))
            {
                notch = notchToUse;
            }

            float modCount = 0;

            if (trainingVariables.Contains("ModsCount"))
            {
                modCount = Math.Min((float)selectedPeptide.AllModsOneIsNterminus.Keys.Count(), 10);
            }

            float missedCleavages = 0;

            if (trainingVariables.Contains("MissedCleavagesCount"))
            {
                missedCleavages = selectedPeptide.MissedCleavages;
            }

            float longestSeq = 0;

            if (trainingVariables.Contains("LongestFragmentIonSeries"))
            {
                longestSeq = psm.GetLongestIonSeriesBidirectional(selectedPeptide);
            }

            float hydrophobicityZscore = float.NaN;

            if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence) && trainingVariables.Contains("HydrophobicityZScore"))
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified);
            }
            else if (trainingVariables.Contains("HydrophobicityZScore"))
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified);
            }
            bool isVariantPeptide = PeptideIsVariant(selectedPeptide);

            if (psm.IsDecoy)
            {
                label = false;
            }
            else
            {
                label = true;
            }

            psm.PsmData_forPEPandPercolator = new PsmData
            {
                TotalMatchingFragmentCount = totalMatchingFragmentCount,
                Intensity = intensity,
                PrecursorChargeDiffToMode = chargeDifference,
                DeltaScore               = deltaScore,
                Notch                    = notch,
                PsmCount                 = psmCount,
                ModsCount                = modCount,
                MissedCleavagesCount     = missedCleavages,
                Ambiguity                = ambiguity,
                LongestFragmentIonSeries = longestSeq,
                HydrophobicityZScore     = hydrophobicityZscore,
                IsVariantPeptide         = Convert.ToSingle(isVariantPeptide),
                Label                    = label
            };

            return(psm.PsmData_forPEPandPercolator);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="psm"></param>
        /// <param name="sequenceToPsmCount"></param>
        /// <param name="selectedPeptide"></param>
        /// <param name="notchToUse"></param>
        /// <param name="trueOrFalse"></param>
        /// <returns></returns>
        public static PsmData CreateOnePsmDataEntry(PeptideSpectralMatch psm, Dictionary <string, int> sequenceToPsmCount, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_modified, PeptideWithSetModifications selectedPeptide, string searchType, int?notchToUse, bool?trueOrFalse = null)
        {
            float ambiguity  = (float)psm.PeptidesToMatchingFragments.Keys.Count;
            float intensity  = (float)(psm.Score - (int)psm.Score);
            float charge     = psm.ScanPrecursorCharge;
            float deltaScore = (float)psm.DeltaScore;
            float psmCount   = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())];
            int   notch      = 0;

            if (notchToUse.HasValue)
            {
                notch = notchToUse.Value;
            }
            else if (psm.Notch.HasValue)
            {
                notch = psm.Notch.Value;
            }

            if (selectedPeptide == null)
            {
                selectedPeptide = psm.BestMatchingPeptides.Select(p => p.Peptide).First();
            }

            float modCount        = selectedPeptide.AllModsOneIsNterminus.Keys.Count();
            float missedCleavages = selectedPeptide.MissedCleavages;
            float longestSeq      = psm.GetLongestIonSeriesBidirectional(selectedPeptide);

            float hydrophobicityZscore = float.NaN;

            if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence) && searchType == "standard")
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified);
            }
            else if (searchType == "standard")
            {
                hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified);
            }

            bool label;

            if (trueOrFalse != null)
            {
                label = trueOrFalse.Value;
            }
            else if (psm.IsDecoy)
            {
                label = false;
            }
            else
            {
                label = true;
            }

            return(new PsmData()
            {
                Intensity = intensity,
                ScanPrecursorCharge = charge,
                DeltaScore = deltaScore,
                Notch = notch,
                PsmCount = psmCount,
                ModsCount = modCount,
                MissedCleavagesCount = missedCleavages,
                Ambiguity = ambiguity,
                LongestFragmentIonSeries = longestSeq,
                HydrophobicityZScore = hydrophobicityZscore,
                Label = label
            });
        }
Пример #4
0
 private static void AddMatchedIonsData(Dictionary <string, string> s, PeptideSpectralMatch peptide)
 {
     //sb for writing, format for double.ToString, header for input dictionary key
     (StringBuilder sb, string format, string header)[] matchedIonInfo = new(StringBuilder, string, string)[]
Пример #5
0
        public static PsmData CreateOnePsmDataFromPsm(PeptideSpectralMatch psm, Dictionary <string, int> accessionCounts, Dictionary <string, int> sequenceToPsmCount, bool?trueOrFalse = null)
        {
            // TODO: some properties like DeltaScore will need to be recalculated if we keep top N peptides per PSM
            // and rerank them because the top-scoring peptide can change

            float ambiguity  = (float)psm.PeptidesToMatchingFragments.Count;//(psm.BaseSequence.Split('|').Count());
            float intensity  = (float)(psm.Score - (int)psm.Score);
            float charge     = psm.ScanPrecursorCharge;
            float deltaScore = (float)psm.DeltaScore;
            float psmCount   = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())];

            var   firstPeptide = psm.BestMatchingPeptides.Select(p => p.Peptide).First();
            float modCount     = firstPeptide.AllModsOneIsNterminus.Keys.Count();

            float notch = 0;

            if (psm.Notch.HasValue)
            {
                notch = psm.Notch.Value;
            }



            //todo: for non-specific cleavage, ignore missed cleavages
            float  missedCleavages = firstPeptide.MissedCleavages;
            float  longestSeq      = psm.GetLongestIonSeriesBidirectional(firstPeptide);
            string accession       = firstPeptide.Protein.Accession;
            float  appearances;

            if (accessionCounts.Keys.Count != 0 && accessionCounts.ContainsKey(accession))
            {
                appearances = (float)accessionCounts[accession];
            }
            else
            {
                appearances = 1;
            }

            float score = (float)psm.Score;

            bool label;

            if (trueOrFalse != null)
            {
                label = trueOrFalse.Value;
            }
            else if (psm.IsDecoy)
            {
                label = false;
            }
            else
            {
                label = true;
            }

            return(new PsmData()
            {
                Intensity = intensity,
                ScanPrecursorCharge = charge,
                DeltaScore = deltaScore,
                Notch = notch,
                PsmCount = psmCount,
                ModsCount = modCount,
                MissedCleavagesCount = missedCleavages,
                Ambiguity = ambiguity,
                LongestFragmentIonSeries = longestSeq,
                AccessionAppearances = appearances,
                Label = label
            });
        }