private static float GetSSRCalcHydrophobicityZScore(PeptideSpectralMatch psm, PeptideWithSetModifications Peptide, Dictionary <string, Dictionary <int, Tuple <double, double> > > d) { //Using SSRCalc3 but probably any number of different calculators could be used instead. One could also use the CE mobility. SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); double hydrophobicityZscore = double.NaN; if (d.ContainsKey(psm.FullFilePath)) { int time = (int)Math.Round(psm.ScanRetentionTime, 0); if (d[psm.FullFilePath].Keys.Contains(time)) { double predictedHydrophobicity = calc.ScoreSequence(Peptide); hydrophobicityZscore = Math.Abs(d[psm.FullFilePath][time].Item1 - predictedHydrophobicity) / d[psm.FullFilePath][time].Item2; } } if (double.IsNaN(hydrophobicityZscore) || double.IsInfinity(hydrophobicityZscore)) { hydrophobicityZscore = 100; } return((float)hydrophobicityZscore); }
public static PsmData CreateOnePsmDataEntry(PeptideSpectralMatch psm, Dictionary <string, int> sequenceToPsmCount, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_modified, int chargeStateMode, PeptideWithSetModifications selectedPeptide, string[] trainingVariables, int notchToUse, bool label) { float totalMatchingFragmentCount = 0; if (trainingVariables.Contains("TotalMatchingFragmentCount")) { totalMatchingFragmentCount = (float)Math.Floor(psm.Score); } float ambiguity = 0; if (trainingVariables.Contains("Ambiguity")) { ambiguity = Math.Min((float)(psm.PeptidesToMatchingFragments.Keys.Count - 1), 10); } float intensity = 0; if (trainingVariables.Contains("Intensity")) { intensity = (float)(psm.Score - (int)psm.Score); } float chargeDifference = 0; if (trainingVariables.Contains("PrecursorChargeDiffToMode")) { chargeDifference = -Math.Abs(chargeStateMode - psm.ScanPrecursorCharge); } float deltaScore = 0; if (trainingVariables.Contains("DeltaScore")) { deltaScore = (float)psm.DeltaScore; } float psmCount = 1; if (trainingVariables.Contains("PsmCount")) { psmCount = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())]; //grouping psm counts as follows is done for stability. you get very nice numbers at low psms to get good statistics. But you get a few peptides with high psm counts that could be either targets or decoys and the values swing between extremes. So grouping psms in bundles really adds stability. List <int> psmCountList = new List <int> { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50, 75, 100, 200, 300, 400, 500 }; int closest = psmCountList.OrderBy(item => Math.Abs(psmCount - item)).First(); psmCount = closest; } int notch = 0; if (trainingVariables.Contains("Notch")) { notch = notchToUse; } float modCount = 0; if (trainingVariables.Contains("ModsCount")) { modCount = Math.Min((float)selectedPeptide.AllModsOneIsNterminus.Keys.Count(), 10); } float missedCleavages = 0; if (trainingVariables.Contains("MissedCleavagesCount")) { missedCleavages = selectedPeptide.MissedCleavages; } float longestSeq = 0; if (trainingVariables.Contains("LongestFragmentIonSeries")) { longestSeq = psm.GetLongestIonSeriesBidirectional(selectedPeptide); } float hydrophobicityZscore = float.NaN; if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence) && trainingVariables.Contains("HydrophobicityZScore")) { hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified); } else if (trainingVariables.Contains("HydrophobicityZScore")) { hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified); } bool isVariantPeptide = PeptideIsVariant(selectedPeptide); if (psm.IsDecoy) { label = false; } else { label = true; } psm.PsmData_forPEPandPercolator = new PsmData { TotalMatchingFragmentCount = totalMatchingFragmentCount, Intensity = intensity, PrecursorChargeDiffToMode = chargeDifference, DeltaScore = deltaScore, Notch = notch, PsmCount = psmCount, ModsCount = modCount, MissedCleavagesCount = missedCleavages, Ambiguity = ambiguity, LongestFragmentIonSeries = longestSeq, HydrophobicityZScore = hydrophobicityZscore, IsVariantPeptide = Convert.ToSingle(isVariantPeptide), Label = label }; return(psm.PsmData_forPEPandPercolator); }
/// <summary> /// /// </summary> /// <param name="psm"></param> /// <param name="sequenceToPsmCount"></param> /// <param name="selectedPeptide"></param> /// <param name="notchToUse"></param> /// <param name="trueOrFalse"></param> /// <returns></returns> public static PsmData CreateOnePsmDataEntry(PeptideSpectralMatch psm, Dictionary <string, int> sequenceToPsmCount, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary <string, Dictionary <int, Tuple <double, double> > > timeDependantHydrophobicityAverageAndDeviation_modified, PeptideWithSetModifications selectedPeptide, string searchType, int?notchToUse, bool?trueOrFalse = null) { float ambiguity = (float)psm.PeptidesToMatchingFragments.Keys.Count; float intensity = (float)(psm.Score - (int)psm.Score); float charge = psm.ScanPrecursorCharge; float deltaScore = (float)psm.DeltaScore; float psmCount = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())]; int notch = 0; if (notchToUse.HasValue) { notch = notchToUse.Value; } else if (psm.Notch.HasValue) { notch = psm.Notch.Value; } if (selectedPeptide == null) { selectedPeptide = psm.BestMatchingPeptides.Select(p => p.Peptide).First(); } float modCount = selectedPeptide.AllModsOneIsNterminus.Keys.Count(); float missedCleavages = selectedPeptide.MissedCleavages; float longestSeq = psm.GetLongestIonSeriesBidirectional(selectedPeptide); float hydrophobicityZscore = float.NaN; if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence) && searchType == "standard") { hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified); } else if (searchType == "standard") { hydrophobicityZscore = GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified); } bool label; if (trueOrFalse != null) { label = trueOrFalse.Value; } else if (psm.IsDecoy) { label = false; } else { label = true; } return(new PsmData() { Intensity = intensity, ScanPrecursorCharge = charge, DeltaScore = deltaScore, Notch = notch, PsmCount = psmCount, ModsCount = modCount, MissedCleavagesCount = missedCleavages, Ambiguity = ambiguity, LongestFragmentIonSeries = longestSeq, HydrophobicityZScore = hydrophobicityZscore, Label = label }); }
private static void AddMatchedIonsData(Dictionary <string, string> s, PeptideSpectralMatch peptide) { //sb for writing, format for double.ToString, header for input dictionary key (StringBuilder sb, string format, string header)[] matchedIonInfo = new(StringBuilder, string, string)[]
public static PsmData CreateOnePsmDataFromPsm(PeptideSpectralMatch psm, Dictionary <string, int> accessionCounts, Dictionary <string, int> sequenceToPsmCount, bool?trueOrFalse = null) { // TODO: some properties like DeltaScore will need to be recalculated if we keep top N peptides per PSM // and rerank them because the top-scoring peptide can change float ambiguity = (float)psm.PeptidesToMatchingFragments.Count;//(psm.BaseSequence.Split('|').Count()); float intensity = (float)(psm.Score - (int)psm.Score); float charge = psm.ScanPrecursorCharge; float deltaScore = (float)psm.DeltaScore; float psmCount = sequenceToPsmCount[String.Join("|", psm.BestMatchingPeptides.Select(p => p.Peptide.FullSequence).ToList())]; var firstPeptide = psm.BestMatchingPeptides.Select(p => p.Peptide).First(); float modCount = firstPeptide.AllModsOneIsNterminus.Keys.Count(); float notch = 0; if (psm.Notch.HasValue) { notch = psm.Notch.Value; } //todo: for non-specific cleavage, ignore missed cleavages float missedCleavages = firstPeptide.MissedCleavages; float longestSeq = psm.GetLongestIonSeriesBidirectional(firstPeptide); string accession = firstPeptide.Protein.Accession; float appearances; if (accessionCounts.Keys.Count != 0 && accessionCounts.ContainsKey(accession)) { appearances = (float)accessionCounts[accession]; } else { appearances = 1; } float score = (float)psm.Score; bool label; if (trueOrFalse != null) { label = trueOrFalse.Value; } else if (psm.IsDecoy) { label = false; } else { label = true; } return(new PsmData() { Intensity = intensity, ScanPrecursorCharge = charge, DeltaScore = deltaScore, Notch = notch, PsmCount = psmCount, ModsCount = modCount, MissedCleavagesCount = missedCleavages, Ambiguity = ambiguity, LongestFragmentIonSeries = longestSeq, AccessionAppearances = appearances, Label = label }); }