private string PeptideToString(proteome.Peptide peptide, IList <phosphoRS.PTMSiteProbability> localizationProbabilities, PhosphoRSConfig config) { var probabilityMap = localizationProbabilities.ToDictionary(o => o.SequencePosition, o => o.Probability); string format = String.Format("[{{0:f{0}}}]", 0); StringBuilder sb = new StringBuilder(); if (peptide.modifications().ContainsKey(proteome.ModificationMap.NTerminus())) { sb.AppendFormat(format, peptide.modifications()[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass()); } for (int i = 0; i < peptide.sequence.Length; ++i) { sb.Append(peptide.sequence[i]); if (probabilityMap.ContainsKey(i + 1)) { if (probabilityMap[i + 1] > 0) { sb.AppendFormat("[{0:f0}({1:f0}%)]", config.scoredAA.MassDelta, probabilityMap[i + 1] * 100); } //else // sb.AppendFormat("({0:f0})", config.scoredAA.MassDelta, probabilityMap[i + 1]); } else if (peptide.modifications().ContainsKey(i)) { double modMass = peptide.modifications()[i].monoisotopicDeltaMass(); sb.AppendFormat(format, modMass); } } if (peptide.modifications().ContainsKey(proteome.ModificationMap.CTerminus())) { sb.AppendFormat(format, peptide.modifications()[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass()); } return(sb.ToString()); }
public static void CreateTestData (NHibernate.ISession session, IList<SpectrumTuple> testPsmSummary) { var dbGroups = new Map<string, SpectrumSourceGroup>(); foreach (var ssg in session.Query<SpectrumSourceGroup>()) dbGroups[ssg.Name] = ssg; var dbSources = new Map<long, SpectrumSource>(); foreach (var ss in session.Query<SpectrumSource>()) dbSources[ss.Id.Value] = ss; var dbAnalyses = new Map<long, Analysis>(); foreach (var a in session.Query<Analysis>()) dbAnalyses[a.Id.Value] = a; var dbPeptides = new Map<string, Peptide>(); foreach (var pep in session.Query<Peptide>()) dbPeptides[pep.Sequence] = pep; var bulkInserter = new BulkInserter(session.Connection); long lastPsmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideSpectrumMatch").UniqueResult<long?>().GetValueOrDefault(); long lastModId = session.CreateQuery("SELECT MAX(Id) FROM Modification").UniqueResult<long?>().GetValueOrDefault(); long lastPmId = session.CreateQuery("SELECT MAX(Id) FROM PeptideModification").UniqueResult<long?>().GetValueOrDefault(); long lastGroupId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroup").UniqueResult<long?>().GetValueOrDefault(); long lastSourceId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSource").UniqueResult<long?>().GetValueOrDefault(); long lastSglId = session.CreateQuery("SELECT MAX(Id) FROM SpectrumSourceGroupLink").UniqueResult<long?>().GetValueOrDefault(); foreach (SpectrumTuple row in testPsmSummary) { string groupName = row.Group; string sourceName = "Source " + row.Source; string analysisId = "Engine " + row.Analysis; string peptideTuples = row.PeptideTuples; SpectrumSourceGroup group = dbGroups[groupName]; if (String.IsNullOrEmpty(group.Name)) { group.Id = ++lastGroupId; group.Name = groupName; bulkInserter.Add(group); } SpectrumSource source = dbSources[row.Source]; if (String.IsNullOrEmpty(source.Name)) { source.Id = ++lastSourceId; source.Name = sourceName; source.Group = group; source.Spectra = new List<Spectrum>(); bulkInserter.Add(source); // add a source group link for the source's immediate group bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = group, Source = source }); #region add source group links for all of the immediate group's parent groups if (groupName != "/") { string parentGroupName = groupName.Substring(0, groupName.LastIndexOf("/")); while (true) { if (String.IsNullOrEmpty(parentGroupName)) parentGroupName = "/"; // add the parent group if it doesn't exist yet SpectrumSourceGroup parentGroup = session.UniqueResult<SpectrumSourceGroup>(o => o.Name == parentGroupName); if (parentGroup == null) { parentGroup = new SpectrumSourceGroup() { Id = ++lastGroupId, Name = parentGroupName }; bulkInserter.Add(parentGroup); } bulkInserter.Add(new SpectrumSourceGroupLink() { Id = ++lastSglId, Group = parentGroup, Source = source }); if (parentGroupName == "/") break; parentGroupName = parentGroupName.Substring(0, parentGroupName.LastIndexOf("/")); } } #endregion } Spectrum spectrum = source.Spectra.SingleOrDefault(o => o.Source.Id == source.Id && o.Index == row.Spectrum - 1); if (spectrum == null) { spectrum = new Spectrum() { Id = source.Id * 10000 + row.Spectrum, Index = row.Spectrum - 1, NativeID = "scan=" + row.Spectrum, Source = source, PrecursorMZ = 42 }; source.Spectra.Add(spectrum); bulkInserter.Add(spectrum); } Analysis analysis = dbAnalyses[row.Analysis]; if (String.IsNullOrEmpty(analysis.Name)) { analysis.Id = dbAnalyses.Max(o => o.Value.Id).GetValueOrDefault() + 1; analysis.Name = analysisId + " 1.0"; analysis.Software = new AnalysisSoftware() {Name = analysisId, Version = "1.0"}; analysis.StartTime = DateTime.Today.AddHours(row.Analysis); analysis.Type = AnalysisType.DatabaseSearch; analysis.Parameters = new SortedSet<AnalysisParameter>() { new AnalysisParameter() { Id = analysis.Id * 10000, Analysis = analysis, Name = "Parameter 1", Value = "Value 1" } }; bulkInserter.Add(analysis); } // make sure peptides are sorted by their score divider (which will determine rank) var peptideList = new SortedList<int, List<PeptideTuple>>(); foreach (string tuple in peptideTuples.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)) { var peptideTuple = new PeptideTuple() { Sequence = tuple.Split('@', '/')[0], Charge = Convert.ToInt32(tuple.Split('@', '/')[1]), ScoreDivider = Convert.ToInt32(tuple.Split('@', '/')[2]) }; if (!peptideList.ContainsKey(peptideTuple.ScoreDivider)) peptideList[peptideTuple.ScoreDivider] = new List<PeptideTuple>(); peptideList[peptideTuple.ScoreDivider].Add(peptideTuple); } int rank = 1; int lastDivider = 1; foreach (var peptideTupleList in peptideList.Values) foreach (var peptideTuple in peptideTupleList) { using (PwizPeptide pwizPeptide = new PwizPeptide(peptideTuple.Sequence, ModParsing.ModificationParsing_Auto, ModDelimiter.ModificationDelimiter_Brackets)) { Peptide peptide = dbPeptides[pwizPeptide.sequence]; if (String.IsNullOrEmpty(peptide.Sequence)) { peptide = new TestPeptide(pwizPeptide.sequence); peptide.Id = dbPeptides.Max(o => o.Value.Id).GetValueOrDefault() + 1; peptide.MonoisotopicMass = pwizPeptide.monoisotopicMass(false); peptide.MolecularWeight = pwizPeptide.molecularWeight(false); dbPeptides[pwizPeptide.sequence] = peptide; bulkInserter.Add(peptide); createTestPeptideInstances(session, bulkInserter, peptide); } double neutralPrecursorMass = (spectrum.PrecursorMZ*peptideTuple.Charge) - (peptideTuple.Charge*Proton.Mass); var psm = new PeptideSpectrumMatch() { Id = ++lastPsmId, Peptide = peptide, Spectrum = spectrum, Analysis = analysis, ObservedNeutralMass = neutralPrecursorMass, MonoisotopicMassError = neutralPrecursorMass - pwizPeptide.monoisotopicMass(), MolecularWeightError = neutralPrecursorMass - pwizPeptide.molecularWeight(), Charge = peptideTuple.Charge, Rank = (peptideTuple.ScoreDivider == lastDivider ? rank : ++rank), QValue = (rank == 1 ? row.QValue : PeptideSpectrumMatch.DefaultQValue), }; if (row.Score != null) psm.Scores = new Dictionary<string, double>() { {"score1", (double) row.Score/peptideTuple.ScoreDivider}, {"score2", 1/((double) row.Score/peptideTuple.ScoreDivider)} }; bulkInserter.Add(psm); lastDivider = peptideTuple.ScoreDivider; // add PeptideModifications and Modifications foreach (KeyValuePair<int, ModList> itr in pwizPeptide.modifications()) { foreach (PwizMod pwizMod in itr.Value) { Modification mod = session.UniqueResult<Modification>(o => o.Formula == pwizMod.formula()); if (mod == null) { mod = new Modification() { Id = ++lastModId, Formula = pwizMod.formula(), MonoMassDelta = pwizMod.monoisotopicDeltaMass(), AvgMassDelta = pwizMod.averageDeltaMass(), Name = pwizMod.formula() }; bulkInserter.Add(mod); } bulkInserter.Add(new PeptideModification() { Id = ++lastPmId, PeptideSpectrumMatch = psm, Modification = mod, Offset = itr.Key == ModMap.NTerminus() ? int.MinValue : itr.Key == ModMap.CTerminus() ? int.MaxValue : itr.Key }); } } } } } bulkInserter.Execute(); bulkInserter.Reset(""); }
public PhosphoPeptideAttestationRow(object[] queryRow) { PSMId = (long)queryRow[0]; SpectrumId = (long)queryRow[1]; SourceName = (string)queryRow[2]; SpectrumNativeID = (string)queryRow[3]; PrecursorMZ = Convert.ToDouble(queryRow[4]); Charge = Convert.ToInt32(queryRow[5]); // Build the peptide sequence with modifications. Leave the phospho sites out of the string. They // will reunited with the string right before the PSM is submitted to phosphoRS. This is necessary // because phosphoRS requires all phospho sites marked with a single numerical representation across all // PSMs. OriginalPhosphoSites = new SortedDictionary<int, long>(); var mods = new Dictionary<int, List<double>>(); string peptideSequence = (string)queryRow[7]; Peptide = new proteome.Peptide(peptideSequence); var pwizMods = Peptide.modifications(); if (!String.IsNullOrEmpty((string)queryRow[6])) { var IdMassDeltaAndOffsetTriplets = ((string)queryRow[6]).Split(','); foreach (var triplet in IdMassDeltaAndOffsetTriplets) { var tokens = triplet.Split(':'); long pmId = Convert.ToInt64(tokens[0]); double deltaMass = Convert.ToDouble(tokens[1]); int roundedDeltaMass = (int) Math.Round(deltaMass); int offset = Convert.ToInt32(tokens[2]); pwizMods[offset].Add(new proteome.Modification(deltaMass, deltaMass)); if (roundedDeltaMass == 80 && (peptideSequence[offset] == 'S' || peptideSequence[offset] == 'T' || peptideSequence[offset] == 'Y')) OriginalPhosphoSites[offset] = pmId; else { if (!mods.ContainsKey(offset)) mods[offset] = new List<double>(); mods[offset].Add(deltaMass); } } } string format = String.Format("[{{0:f{0}}}]", 4); StringBuilder sb = new StringBuilder(peptideSequence); foreach (var mod in (from m in mods orderby m.Key descending select m)) foreach (var massDelta in mod.Value) if (mod.Key == int.MinValue) sb.Insert(0, String.Format(format, massDelta)); else if (mod.Key == int.MaxValue || mod.Key >= sb.Length) sb.AppendFormat(format, massDelta); else sb.Insert(mod.Key + 1, String.Format(format, massDelta)); UnphosphorylatedSequence = sb.ToString(); DecoyState = Convert.ToInt16(queryRow[8]); // Determine the location of phosphorylation sites PossiblePhosphoSites = new List<int>(); for (int residueIndex = 0; residueIndex < peptideSequence.Length; ++residueIndex) if (peptideSequence[residueIndex] == 'S' || peptideSequence[residueIndex] == 'T' || peptideSequence[residueIndex] == 'Y') PossiblePhosphoSites.Add(residueIndex); }
private phosphoRS.PeptideSpectrumMatch getPhosphoRS_PSM(PhosphoRSConfig config, PhosphoPeptideAttestationRow variant) { // Get the phosphorylated peptide and add all modifications to the base sequence. proteome.Peptide phosphoPeptide = new proteome.Peptide(variant.UnphosphorylatedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); proteome.ModificationMap variantPeptideMods = phosphoPeptide.modifications(); variant.OriginalPhosphoSites.Keys.ToList().ForEach(location => { variantPeptideMods[location].Add(config.pwizMod); }); // This modification ID is used to tell phosphoRS how to modify the sequence. int modificationID = config.phosphorylationSymbol + 1; // Build a string representation of all modificaitons in a peptide for phospoRS // "0.00011000000000.0" : 1 is the ID of the modification. All phosphos in a data // set need to have one ID. This ID is used by the PhosphoRS to figure out which // mods need to be scored. var ptmRepresentation = new StringBuilder(); // Store all modifications in phosphoRS modification objects var modifications = new List<phosphoRS.AminoAcidModification>(); // Get the n-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.NTerminus())) { phosphoRS.AminoAcidModification otherMod = new phosphoRS.AminoAcidModification('2', "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString() + "."); //++modificationID; } else { ptmRepresentation.Append("0."); } // Process all other modifications. for (int aaIndex = 0; aaIndex < phosphoPeptide.sequence.Length; ++aaIndex) { // If phosphorylation, use the existing scoredAA variable. if (variantPeptideMods.ContainsKey(aaIndex)) { if (variant.OriginalPhosphoSites.Keys.Contains(aaIndex)) { modifications.Add(config.scoredAA); ptmRepresentation.Append(config.phosphorylationSymbol.ToString()[0]); } else { // Otherwise, make an "unknown" modification with a separate modification ID. var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[aaIndex].monoisotopicDeltaMass(), 0.0, phosphoRS.AminoAcidSequence.ParseAASequence("" + phosphoPeptide.sequence[aaIndex])); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString()); //++modificationID; } } else { ptmRepresentation.Append("0"); } } // Process any c-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.CTerminus())) { var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append("." + modificationID.ToString()); } else { ptmRepresentation.Append(".0"); } // Get the phosphoRS peptide sequence. // Assign spectrum ID, amino acid sequence, list of all modifications, a so-called 'modification position string' (here every digit represents an amino acid within the peptide sequence // '0' indicates not modified, values != '0' indicate the unique identifier of the amino acid's modification the first digit represents the n-terminus the last digit represents the c-terminus) var AAS = phosphoRS.AminoAcidSequence.Create((int)variant.SpectrumId, phosphoPeptide.sequence, modifications, ptmRepresentation.ToString()); // Make a phosphoRS peptide-spectrum match. return new phosphoRS.PeptideSpectrumMatch((int)variant.PSMId, variant.SpectrumType, variant.Charge, variant.PrecursorMZ, variant.Peaks, AAS); }
public PhosphoPeptideAttestationRow(object[] queryRow) { PSMId = (long)queryRow[0]; SpectrumId = (long)queryRow[1]; SourceName = (string)queryRow[2]; SpectrumNativeID = (string)queryRow[3]; PrecursorMZ = Convert.ToDouble(queryRow[4]); Charge = Convert.ToInt32(queryRow[5]); // Build the peptide sequence with modifications. Leave the phospho sites out of the string. They // will reunited with the string right before the PSM is submitted to phosphoRS. This is necessary // because phosphoRS requires all phospho sites marked with a single numerical representation across all // PSMs. OriginalPhosphoSites = new SortedDictionary <int, long>(); var mods = new Dictionary <int, List <double> >(); string peptideSequence = (string)queryRow[7]; Peptide = new proteome.Peptide(peptideSequence); var pwizMods = Peptide.modifications(); if (!String.IsNullOrEmpty((string)queryRow[6])) { var IdMassDeltaAndOffsetTriplets = ((string)queryRow[6]).Split(Properties.Settings.Default.GroupConcatSeparator[0]); foreach (var triplet in IdMassDeltaAndOffsetTriplets) { var tokens = triplet.Split(':'); long pmId = Convert.ToInt64(tokens[0]); double deltaMass = Convert.ToDouble(tokens[1]); int roundedDeltaMass = (int)Math.Round(deltaMass); int offset = Convert.ToInt32(tokens[2]); pwizMods[offset].Add(new proteome.Modification(deltaMass, deltaMass)); if (roundedDeltaMass == 80 && (peptideSequence[offset] == 'S' || peptideSequence[offset] == 'T' || peptideSequence[offset] == 'Y')) { OriginalPhosphoSites[offset] = pmId; } else { if (!mods.ContainsKey(offset)) { mods[offset] = new List <double>(); } mods[offset].Add(deltaMass); } } } string format = String.Format("[{{0:f{0}}}]", 4); StringBuilder sb = new StringBuilder(peptideSequence); foreach (var mod in (from m in mods orderby m.Key descending select m)) { foreach (var massDelta in mod.Value) { if (mod.Key == int.MinValue) { sb.Insert(0, String.Format(format, massDelta)); } else if (mod.Key == int.MaxValue || mod.Key >= sb.Length) { sb.AppendFormat(format, massDelta); } else { sb.Insert(mod.Key + 1, String.Format(format, massDelta)); } } } UnphosphorylatedSequence = sb.ToString(); DecoyState = Convert.ToInt16(queryRow[8]); // Determine the location of phosphorylation sites PossiblePhosphoSites = new List <int>(); for (int residueIndex = 0; residueIndex < peptideSequence.Length; ++residueIndex) { if (peptideSequence[residueIndex] == 'S' || peptideSequence[residueIndex] == 'T' || peptideSequence[residueIndex] == 'Y') { PossiblePhosphoSites.Add(residueIndex); } } }
private phosphoRS.PeptideSpectrumMatch getPhosphoRS_PSM(PhosphoRSConfig config, PhosphoPeptideAttestationRow variant) { // Get the phosphorylated peptide and add all modifications to the base sequence. proteome.Peptide phosphoPeptide = new proteome.Peptide(variant.UnphosphorylatedSequence, proteome.ModificationParsing.ModificationParsing_Auto, proteome.ModificationDelimiter.ModificationDelimiter_Brackets); proteome.ModificationMap variantPeptideMods = phosphoPeptide.modifications(); variant.OriginalPhosphoSites.Keys.ToList().ForEach(location => { variantPeptideMods[location].Add(config.pwizMod); }); // This modification ID is used to tell phosphoRS how to modify the sequence. int modificationID = config.phosphorylationSymbol + 1; // Build a string representation of all modificaitons in a peptide for phospoRS // "0.00011000000000.0" : 1 is the ID of the modification. All phosphos in a data // set need to have one ID. This ID is used by the PhosphoRS to figure out which // mods need to be scored. var ptmRepresentation = new StringBuilder(); // Store all modifications in phosphoRS modification objects var modifications = new List <phosphoRS.AminoAcidModification>(); // Get the n-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.NTerminus())) { phosphoRS.AminoAcidModification otherMod = new phosphoRS.AminoAcidModification('2', "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.NTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString() + "."); //++modificationID; } else { ptmRepresentation.Append("0."); } // Process all other modifications. for (int aaIndex = 0; aaIndex < phosphoPeptide.sequence.Length; ++aaIndex) { // If phosphorylation, use the existing scoredAA variable. if (variantPeptideMods.ContainsKey(aaIndex)) { if (variant.OriginalPhosphoSites.Keys.Contains(aaIndex)) { modifications.Add(config.scoredAA); ptmRepresentation.Append(config.phosphorylationSymbol.ToString()[0]); } else { // Otherwise, make an "unknown" modification with a separate modification ID. var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[aaIndex].monoisotopicDeltaMass(), 0.0, phosphoRS.AminoAcidSequence.ParseAASequence("" + phosphoPeptide.sequence[aaIndex])); modifications.Add(otherMod); ptmRepresentation.Append(modificationID.ToString()); //++modificationID; } } else { ptmRepresentation.Append("0"); } } // Process any c-terminal modifications. if (variantPeptideMods.ContainsKey(proteome.ModificationMap.CTerminus())) { var otherMod = new phosphoRS.AminoAcidModification(modificationID.ToString()[0], "unknown", "unk", "none", variantPeptideMods[proteome.ModificationMap.CTerminus()].monoisotopicDeltaMass(), 0.0, null); modifications.Add(otherMod); ptmRepresentation.Append("." + modificationID.ToString()); } else { ptmRepresentation.Append(".0"); } // Get the phosphoRS peptide sequence. // Assign spectrum ID, amino acid sequence, list of all modifications, a so-called 'modification position string' (here every digit represents an amino acid within the peptide sequence // '0' indicates not modified, values != '0' indicate the unique identifier of the amino acid's modification the first digit represents the n-terminus the last digit represents the c-terminus) var AAS = phosphoRS.AminoAcidSequence.Create((int)variant.SpectrumId, phosphoPeptide.sequence, modifications, ptmRepresentation.ToString()); // Make a phosphoRS peptide-spectrum match. return(new phosphoRS.PeptideSpectrumMatch((int)variant.PSMId, variant.SpectrumType, variant.Charge, variant.PrecursorMZ, variant.Peaks, AAS)); }