private static float GetSSRCalcHydrophobicityZScore(PeptideSpectralMatch psm, PeptideWithSetModifications Peptide, Dictionary <string, Dictionary <int, Tuple <double, double> > > d) { //Using SSRCalc3 but probably any number of different calculators could be used instead. One could also use the CE mobility. SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); double hydrophobicityZscore = double.NaN; if (d.ContainsKey(psm.FullFilePath)) { int time = (int)(2 * Math.Round(psm.ScanRetentionTime / 2d, 0)); if (d[psm.FullFilePath].Keys.Contains(time)) { double predictedHydrophobicity = calc.ScoreSequence(Peptide); hydrophobicityZscore = Math.Abs(d[psm.FullFilePath][time].Item1 - predictedHydrophobicity) / d[psm.FullFilePath][time].Item2; } } double maxHydrophobicityZscore = 10; // each "Z" is one standard deviation. so, maxHydrophobicityZscore 10 is quite large if (double.IsNaN(hydrophobicityZscore) || double.IsInfinity(hydrophobicityZscore) || hydrophobicityZscore > maxHydrophobicityZscore) { hydrophobicityZscore = maxHydrophobicityZscore; } return((float)hydrophobicityZscore); }
/// <summary> ///A test for ScoreSequence with 100A column ///</summary> // Problems with the results from the article // [TestMethod] public void ScoreSequence_100A_Test() { SSRCalc3 calc = new SSRCalc3(RetentionTimeRegression.SSRCALC_100_A, SSRCalc3.Column.A100); for (int i = 0; i < _peptides100A.GetLength(0); i++) { string peptide = (string)_peptides100A[i, 0]; double expected = (double)_peptides100A[i, 1]; double actual = calc.ScoreSequence(peptide) ?? 0; // Round the returned value to match the values presented // in the supporting information of the SSRCalc 3 publication. // First cast to float, since the added precision of the double // caused the double representation of 12.805 to round to 12.80 // instead of 12.81. When diffed with 12.81 the result was // 0.005000000000002558. double actualRound = Math.Round((float)actual, 2); // Extra conditional added to improve debugging of issues. if (Math.Abs(expected - actual) > 0.005) { Assert.AreEqual(expected, actualRound, "Peptide {0}", peptide); } } }
public static void SimpleReversePhaseRetentionTimeTest() { Dictionary <string, double> peptidesAndHyrophobicities = new Dictionary <string, double> { { "QSHFANAEPEQK", 11.27 }, { "SDLFENLQNYR", 30.44 }, { "SLPSEFEPINLR", 33.12 } }; SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); foreach (string peptideSequence in peptidesAndHyrophobicities.Keys) { var peptide = new PeptideWithSetModifications(peptideSequence, new Dictionary <string, Modification>()); double expected = peptidesAndHyrophobicities[peptideSequence]; double actual = calc.ScoreSequence(peptide); Assert.That(expected, Is.EqualTo(actual).Within(0.01)); } }
private static Dictionary <string, Dictionary <int, Tuple <double, double> > > ComputeHydrophobicityValues(List <PeptideSpectralMatch> psms, bool computeHydrophobicitiesforModifiedPeptides) { SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); //TODO change the tuple so the values have names Dictionary <string, Dictionary <int, Tuple <double, double> > > rtHydrophobicityAvgDev = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); List <string> filenames = psms.Select(f => f.FullFilePath).ToList(); filenames = filenames.Distinct().ToList(); foreach (string filename in filenames) { Dictionary <int, List <double> > hydrophobicities = new Dictionary <int, List <double> >(); Dictionary <int, Tuple <double, double> > averagesCommaStandardDeviations = new Dictionary <int, Tuple <double, double> >(); foreach (PeptideSpectralMatch psm in psms.Where(f => (f.FullFilePath == filename || f.FullFilePath == null) && f.FdrInfo.QValue <= 0.01 && !f.IsDecoy)) { List <string> fullSequences = new List <string>(); foreach ((int notch, PeptideWithSetModifications pwsm) in psm.BestMatchingPeptides) { if (fullSequences.Contains(pwsm.FullSequence)) { continue; } fullSequences.Add(pwsm.FullSequence); double predictedHydrophobicity = calc.ScoreSequence(pwsm); //here i'm grouping this in 2 minute increments becuase there are cases where you get too few data points to get a good standard deviation an average. This is for stability. int possibleKey = (int)(2 * Math.Round(psm.ScanRetentionTime / 2d, 0)); //First block of if statement is for modified peptides. if (pwsm.AllModsOneIsNterminus.Any() && computeHydrophobicitiesforModifiedPeptides) { if (hydrophobicities.ContainsKey(possibleKey)) { hydrophobicities[possibleKey].Add(predictedHydrophobicity); } else { hydrophobicities.Add(possibleKey, new List <double>() { predictedHydrophobicity }); } } //this second block of if statment is for unmodified peptides. else if (!pwsm.AllModsOneIsNterminus.Any() && !computeHydrophobicitiesforModifiedPeptides) { if (hydrophobicities.ContainsKey(possibleKey)) { hydrophobicities[possibleKey].Add(predictedHydrophobicity); } else { hydrophobicities.Add(possibleKey, new List <double>() { predictedHydrophobicity }); } } } } List <double> allSquaredHyrophobicityDifferences = new List <double>(); foreach (int retentionTimeBin in hydrophobicities.Keys) { //TODO consider using inner-quartile range instead of standard deviation double averageHydrophobicity = hydrophobicities[retentionTimeBin].Average(); averagesCommaStandardDeviations.Add(retentionTimeBin, new Tuple <double, double>(averageHydrophobicity, hydrophobicities[retentionTimeBin].StandardDeviation())); foreach (double hydrophobicity in hydrophobicities[retentionTimeBin]) { double difference = Math.Abs(hydrophobicity - averageHydrophobicity); if (!double.IsNaN(difference) && difference > 0) { allSquaredHyrophobicityDifferences.Add(Math.Pow(difference, 2)); } } } //some standard deviations are too small or too large because of random reasons, so we replace those small numbers of oddballs with reasonable numbers. double globalStDev = 1; if (allSquaredHyrophobicityDifferences.Count() > 1) { globalStDev = Math.Sqrt(allSquaredHyrophobicityDifferences.Sum() / (allSquaredHyrophobicityDifferences.Count() - 1)); } Dictionary <int, Tuple <double, double> > stDevsToChange = new Dictionary <int, Tuple <double, double> >(); foreach (KeyValuePair <int, Tuple <double, double> > item in averagesCommaStandardDeviations) { //add stability. not allowing stdevs that are too small or too large at one position relative to the global stdev //here we are finding which stdevs are out of whack. if (Double.IsNaN(item.Value.Item2) || item.Value.Item2 < 0.5 || (item.Value.Item2 / globalStDev) > 3) { Tuple <double, double> pair = new Tuple <double, double>(averagesCommaStandardDeviations[item.Key].Item1, globalStDev); stDevsToChange.Add(item.Key, pair); } } //here we are replacing the stdevs that are out of whack. foreach (int key in stDevsToChange.Keys) { averagesCommaStandardDeviations[key] = stDevsToChange[key]; } rtHydrophobicityAvgDev.Add(filename, averagesCommaStandardDeviations); } return(rtHydrophobicityAvgDev); }
//determine if a peptide is unqiue or shared. Also generates in silico peptide objects Dictionary <Protein, List <InSilicoPep> > DeterminePeptideStatus(string databaseName, Dictionary <Protein, List <PeptideWithSetModifications> > databasePeptides, Parameters userParams) { SSRCalc3 RTPrediction = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); Dictionary <Protein, List <InSilicoPep> > inSilicoPeptides = new Dictionary <Protein, List <InSilicoPep> >(); if (userParams.TreatModifiedPeptidesAsDifferent == true) { foreach (var peptideSequence in databasePeptides.Select(p => p.Value).SelectMany(pep => pep).GroupBy(p => p.FullSequence).ToDictionary(group => group.Key, group => group.ToList())) { if (peptideSequence.Value.Select(p => p.Protein).Distinct().Count() == 1) { foreach (var peptide in peptideSequence.Value) { if (inSilicoPeptides.ContainsKey(peptide.Protein)) { inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)); } else { inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>() { new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name) }); } } } else { foreach (var peptide in peptideSequence.Value) { if (inSilicoPeptides.ContainsKey(peptide.Protein)) { inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)); } else { inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>() { new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, RTPrediction.ScoreSequence(peptide), GetCifuentesMobility(peptide), peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name) }); } } } } } else { foreach (var peptideSequence in databasePeptides.Select(p => p.Value).SelectMany(pep => pep).GroupBy(p => p.BaseSequence).ToDictionary(group => group.Key, group => group.ToList())) { if (peptideSequence.Value.Select(p => p.Protein).Distinct().Count() == 1) { foreach (var peptide in peptideSequence.Value) { var hydrophob = RTPrediction.ScoreSequence(peptide); var em = GetCifuentesMobility(peptide); if (inSilicoPeptides.ContainsKey(peptide.Protein)) { inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)); } else { inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>() { new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, true, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name) }); } } } else { foreach (var peptide in peptideSequence.Value) { var hydrophob = RTPrediction.ScoreSequence(peptide); var em = GetCifuentesMobility(peptide); if (inSilicoPeptides.ContainsKey(peptide.Protein)) { inSilicoPeptides[peptide.Protein].Add(new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name)); } else { inSilicoPeptides.Add(peptide.Protein, new List <InSilicoPep>() { new InSilicoPep(peptide.BaseSequence, peptide.FullSequence, peptide.PreviousAminoAcid, peptide.NextAminoAcid, false, hydrophob, em, peptide.Length, peptide.MonoisotopicMass, databaseName, peptide.Protein.Accession, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, peptide.DigestionParams.Protease.Name) }); } } } } } databasePeptides = null; return(inSilicoPeptides); }
public void ScoreSequence_300A_Test() { SSRCalc3 calc = new SSRCalc3(RetentionTimeRegression.SSRCALC_300_A, SSRCalc3.Column.A300); for (int i = 0; i < _peptides300A.GetLength(0); i++ ) { string peptide = (string) _peptides300A[i, 0]; double expected = (double) _peptides300A[i, 1]; double actual = calc.ScoreSequence(peptide) ?? 0; // Round the returned value to match the values presented // in the supporting information of the SSRCalc 3 publication. // First cast to float, since the added precision of the double // caused the double representation of 12.805 to round to 12.80 // instead of 12.81. When diffed with 12.81 the result was // 0.005000000000002558. double actualRound = Math.Round((float) actual, 2); // Extra conditional added to improve debugging of issues. if (Math.Abs(expected - actual) > 0.005) Assert.AreEqual(expected, actualRound, "Peptide {0}", peptide); } }
private void linePlot(int plotType) { string yAxisTitle = ""; string xAxisTitle = ""; ScatterSeries series = new ScatterSeries { MarkerFill = OxyColors.Blue, MarkerSize = 0.5, TrackerFormatString = "{1}: {2:0.###}\n{3}: {4:0.###}\nFull sequence: {Tag}" }; ScatterSeries variantSeries = new ScatterSeries { MarkerFill = OxyColors.DarkRed, MarkerSize = 1.5, MarkerType = MarkerType.Circle, TrackerFormatString = "{1}: {2:0.###}\n{3}: {4:0.###}\nFull sequence: {Tag}" }; List <Tuple <double, double, string> > xy = new List <Tuple <double, double, string> >(); List <Tuple <double, double, string> > variantxy = new List <Tuple <double, double, string> >(); var filteredList = allPsms.Where(p => !p.MassDiffDa.Contains("|") && Math.Round(double.Parse(p.MassDiffDa), 0) == 0).ToList(); var test = allPsms.SelectMany(p => p.MatchedIons.Select(v => v.MassErrorPpm)); switch (plotType) { case 1: // Precursor PPM Error vs. RT yAxisTitle = "Precursor error (ppm)"; xAxisTitle = "Retention time"; foreach (var psm in filteredList) { if (psm.IdentifiedSequenceVariations == null || psm.IdentifiedSequenceVariations.Equals("")) { xy.Add(new Tuple <double, double, string>(double.Parse(psm.MassDiffPpm), (double)psm.RetentionTime, psm.FullSequence)); } else { variantxy.Add(new Tuple <double, double, string>(double.Parse(psm.MassDiffPpm), (double)psm.RetentionTime, psm.FullSequence)); } } break; case 2: // Fragment PPM Error vs. RT yAxisTitle = "Retention time"; xAxisTitle = "Fragment error (ppm)"; foreach (var psm in allPsms) { foreach (var ion in psm.MatchedIons) { xy.Add(new Tuple <double, double, string>((double)psm.RetentionTime, ion.MassErrorPpm, psm.FullSequence)); } } break; case 3: // Predicted RT vs. Observed RT yAxisTitle = "Predicted retention time"; xAxisTitle = "Observed retention time"; SSRCalc3 sSRCalc3 = new SSRCalc3("A100", SSRCalc3.Column.A100); foreach (var psm in allPsms) { if (psm.IdentifiedSequenceVariations == null || psm.IdentifiedSequenceVariations.Equals("")) { xy.Add(new Tuple <double, double, string>(sSRCalc3.ScoreSequence(new PeptideWithSetModifications(psm.BaseSeq.Split('|')[0], null)), (double)psm.RetentionTime, psm.FullSequence)); } else { variantxy.Add(new Tuple <double, double, string>(sSRCalc3.ScoreSequence(new PeptideWithSetModifications(psm.BaseSeq.Split('|')[0], null)), (double)psm.RetentionTime, psm.FullSequence)); } } break; } if (xy.Count != 0) { // plot each peptide IOrderedEnumerable <Tuple <double, double, string> > sorted = xy.OrderBy(x => x.Item1); foreach (var val in sorted) { series.Points.Add(new ScatterPoint(val.Item2, val.Item1, tag: val.Item3)); } privateModel.Series.Add(series); // add series displayed in legend, the real series will show up with a tiny dot for the symbol privateModel.Series.Add(new ScatterSeries { Title = "non-variant PSMs", MarkerFill = OxyColors.Blue }); } if (variantxy.Count != 0) { // plot each variant peptide IOrderedEnumerable <Tuple <double, double, string> > variantSorted = variantxy.OrderBy(x => x.Item1); foreach (var val in variantSorted) { variantSeries.Points.Add(new ScatterPoint(val.Item2, val.Item1, tag: val.Item3)); } privateModel.Series.Add(variantSeries); // add series displayed in legend, the real series will show up with a tiny dot for the symbol privateModel.Series.Add(new ScatterSeries { Title = "variant PSMs", MarkerFill = OxyColors.DarkRed }); } privateModel.Axes.Add(new LinearAxis { Title = xAxisTitle, Position = AxisPosition.Bottom }); privateModel.Axes.Add(new LinearAxis { Title = yAxisTitle, Position = AxisPosition.Left }); }
private static Dictionary <string, Dictionary <int, Tuple <double, double> > > ComputeHydrophobicityValues(List <PeptideSpectralMatch> psms, bool computeHydrophobicitiesforModifiedPeptides) { SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); Dictionary <string, Dictionary <int, Tuple <double, double> > > rtHydrophobicityAvgDev = new Dictionary <string, Dictionary <int, Tuple <double, double> > >(); List <string> filenames = psms.Select(f => f.FullFilePath).ToList(); filenames = filenames.Distinct().ToList(); foreach (string filename in filenames) { Dictionary <int, List <double> > hydrobophobicites = new Dictionary <int, List <double> >(); Dictionary <int, Tuple <double, double> > averagesCommaStandardDeviations = new Dictionary <int, Tuple <double, double> >(); foreach (PeptideSpectralMatch psm in psms.Where(f => (f.FullFilePath == filename || f.FullFilePath == null) && f.FdrInfo.QValue <= 0.01)) { foreach ((int notch, PeptideWithSetModifications pwsm) in psm.BestMatchingPeptides) { if (pwsm.AllModsOneIsNterminus.Any() && !computeHydrophobicitiesforModifiedPeptides) { double predictedHydrophobicity = calc.ScoreSequence(pwsm); int possibleKey = (int)Math.Round(psm.ScanRetentionTime, 0); if (hydrobophobicites.ContainsKey(possibleKey)) { hydrobophobicites[possibleKey].Add(predictedHydrophobicity); } else { hydrobophobicites.Add(possibleKey, new List <double>() { predictedHydrophobicity }); } } else if (!pwsm.AllModsOneIsNterminus.Any() && computeHydrophobicitiesforModifiedPeptides) { double predictedHydrophobicity = calc.ScoreSequence(pwsm); int possibleKey = (int)Math.Round(psm.ScanRetentionTime, 0); if (hydrobophobicites.ContainsKey(possibleKey)) { hydrobophobicites[possibleKey].Add(predictedHydrophobicity); } else { hydrobophobicites.Add(possibleKey, new List <double>() { predictedHydrophobicity }); } } } } foreach (int key in hydrobophobicites.Keys) { //TODO consider using inner-quartile range instead of standard deviation averagesCommaStandardDeviations.Add(key, new Tuple <double, double>(hydrobophobicites[key].Average(), hydrobophobicites[key].StandardDeviation())); } rtHydrophobicityAvgDev.Add(filename, averagesCommaStandardDeviations); } return(rtHydrophobicityAvgDev); }
//determine if peptides are unique and shared for the speicifc database that they came from (Will do pooled analysis later) Dictionary <Protein, List <InSilicoPeptide> > DeterminePeptideStatus(Dictionary <Protein, List <PeptideWithSetModifications> > databasePeptides, Parameters userParams) { SSRCalc3 RTPrediction = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); bool treatModPeptidesAsDifferent = userParams.TreatModifiedPeptidesAsDifferent; Dictionary <string, (List <PeptideWithSetModifications>, HashSet <Protein>)> peptidesToProteins = new Dictionary <string, (List <PeptideWithSetModifications>, HashSet <Protein>)>(); foreach (var protein in databasePeptides) { if (treatModPeptidesAsDifferent) { //use full sequences foreach (var peptide in protein.Value) { if (peptidesToProteins.ContainsKey(peptide.FullSequence)) { peptidesToProteins[peptide.FullSequence].Item1.Add(peptide); peptidesToProteins[peptide.FullSequence].Item2.Add(protein.Key); } else { peptidesToProteins.Add(peptide.FullSequence, (new List <PeptideWithSetModifications>() { peptide }, new HashSet <Protein>() { protein.Key })); } } } else { //use base sequences foreach (var peptide in protein.Value) { if (peptidesToProteins.ContainsKey(peptide.BaseSequence)) { peptidesToProteins[peptide.BaseSequence].Item1.Add(peptide); peptidesToProteins[peptide.BaseSequence].Item2.Add(protein.Key); } else { peptidesToProteins.Add(peptide.BaseSequence, (new List <PeptideWithSetModifications>() { peptide }, new HashSet <Protein>() { protein.Key })); } } } } var sharedPeptides = peptidesToProteins.Select(p => p.Value).Where(p => p.Item2.Count >= 2).Select(p => p.Item1).SelectMany(p => p).ToList(); var uniquePeptides = peptidesToProteins.Select(p => p.Value).Where(p => p.Item2.Count == 1).Select(p => p.Item1).SelectMany(p => p).ToList(); List <InSilicoPeptide> inSilicoPeptides = new List <InSilicoPeptide>(); foreach (var peptide in sharedPeptides) { var pep = new InSilicoPeptide(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, CleavageSpecificity.Full, peptide.PeptideDescription, peptide.MissedCleavages, peptide.AllModsOneIsNterminus, peptide.NumFixedMods, peptide.BaseSequence, false); var hydrophob = RTPrediction.ScoreSequence(pep); var em = GetCifuentesMobility(peptide); pep.SetHydrophobicity(hydrophob); pep.SetElectrophoreticMobility(em); inSilicoPeptides.Add(pep); } foreach (var peptide in uniquePeptides) { var pep = new InSilicoPeptide(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidueInProtein, peptide.OneBasedEndResidueInProtein, CleavageSpecificity.Full, peptide.PeptideDescription, peptide.MissedCleavages, peptide.AllModsOneIsNterminus, peptide.NumFixedMods, peptide.BaseSequence, true); var hydrophob = RTPrediction.ScoreSequence(pep); var em = GetCifuentesMobility(peptide); pep.SetHydrophobicity(hydrophob); pep.SetElectrophoreticMobility(em); inSilicoPeptides.Add(pep); } var labeledPeptides = inSilicoPeptides.GroupBy(p => p.Protein).ToDictionary(group => group.Key, group => group.ToList()); return(labeledPeptides); }