private void DetermineFullSequence() { var subsequence = new StringBuilder(); // modification on peptide N-terminus if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) { subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); } for (int r = 0; r < Length; r++) { subsequence.Append(this[r]); // modification on this residue if (AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) { subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); } } // modification on peptide C-terminus if (AllModsOneIsNterminus.TryGetValue(Length + 2, out mod)) { subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); } FullSequence = subsequence.ToString(); }
/// <summary> /// Makes the string representing a detected sequence variation, including any modifications on a variant amino acid. /// takes in the variant as well as the bool value of wheter the peptid eintersects the variant. (this allows for identified /// variants that cause the cleavage site for the peptide. /// </summary> /// <param name="p"></param> /// <param name="d"></param> /// <returns></returns> public string SequenceVariantString(SequenceVariation applied, bool intersects) { if (intersects == true) { bool startAtNTerm = applied.OneBasedBeginPosition == 1 && OneBasedStartResidueInProtein == 1; bool onlyPeptideStartAtNTerm = OneBasedStartResidueInProtein == 1 && applied.OneBasedBeginPosition != 1; int modResidueScale = 0; if (startAtNTerm) { modResidueScale = 1; } else if (onlyPeptideStartAtNTerm) { modResidueScale = 2; } else { modResidueScale = 3; } int lengthDiff = applied.VariantSequence.Length - applied.OriginalSequence.Length; var modsOnVariantOneIsNTerm = AllModsOneIsNterminus .Where(kv => kv.Key == 1 && applied.OneBasedBeginPosition == 1 || applied.OneBasedBeginPosition <= kv.Key - 2 + OneBasedStartResidueInProtein && kv.Key - 2 + OneBasedStartResidueInProtein <= applied.OneBasedEndPosition) .ToDictionary(kv => kv.Key - applied.OneBasedBeginPosition + (modResidueScale), kv => kv.Value); PeptideWithSetModifications variantWithAnyMods = new PeptideWithSetModifications(Protein, DigestionParams, applied.OneBasedBeginPosition == 1 ? applied.OneBasedBeginPosition : applied.OneBasedBeginPosition - 1, applied.OneBasedEndPosition, CleavageSpecificityForFdrCategory, PeptideDescription, MissedCleavages, modsOnVariantOneIsNTerm, NumFixedMods); return($"{applied.OriginalSequence}{applied.OneBasedBeginPosition}{variantWithAnyMods.FullSequence.Substring(applied.OneBasedBeginPosition == 1 ? 0 : 1)}"); } //if the variant caused a cleavage site leading the the peptide sequence (variant does not intersect but is identified) else { return($"{applied.OriginalSequence}{ applied.OneBasedBeginPosition}{applied.VariantSequence}"); } }
/// <summary> /// Generates theoretical fragments for given dissociation type for this peptide /// </summary> public IEnumerable <Product> Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus) { // molecular ion //yield return new Product(ProductType.M, new NeutralTerminusFragment(FragmentationTerminus.None, this.MonoisotopicMass, Length, Length), 0); var productCollection = TerminusSpecificProductTypes.ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus].Intersect(DissociationTypeCollection.ProductsFromDissociationType[dissociationType]); List <(ProductType, int)> skippers = new List <(ProductType, int)>(); foreach (var product in productCollection.Where(f => f != ProductType.zPlusOne)) { skippers.Add((product, BaseSequence.Length)); } switch (dissociationType) { case DissociationType.CID: skippers.Add((ProductType.b, 1)); break; case DissociationType.ETD: case DissociationType.ECD: case DissociationType.EThcD: skippers.AddRange(GetProlineZIonIndicies()); break; } foreach (var productType in productCollection) { // we're separating the N and C terminal masses and computing a separate compact peptide for each one // this speeds calculations up without producing unnecessary terminus fragment info FragmentationTerminus temporaryFragmentationTerminus = TerminusSpecificProductTypes.ProductTypeToFragmentationTerminus[productType]; NeutralTerminusFragment[] terminalMasses = CompactPeptide(temporaryFragmentationTerminus).TerminalMasses; for (int f = 0; f < terminalMasses.Length; f++) { // fragments with neutral loss if (AllModsOneIsNterminus.TryGetValue(terminalMasses[f].AminoAcidPosition + 1, out Modification mod) && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses) { if (neutralLoss == 0) { continue; } for (int n = f; n < terminalMasses.Length; n++) { if (!skippers.Contains((productType, terminalMasses[n].FragmentNumber)))
public virtual string EssentialSequence(IReadOnlyDictionary <string, int> modstoWritePruned) { string essentialSequence = BaseSequence; if (modstoWritePruned != null) { var sbsequence = new StringBuilder(); // variable modification on peptide N-terminus if (AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod)) { if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType)) { sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']'); } } for (int r = 0; r < Length; r++) { sbsequence.Append(this[r]); // variable modification on this residue if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod)) { if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType)) { sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']'); } } } // variable modification on peptide C-terminus if (AllModsOneIsNterminus.TryGetValue(Length + 2, out Modification pep_c_term_variable_mod)) { if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType)) { sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']'); } } essentialSequence = sbsequence.ToString(); } return(essentialSequence); }
public override string ToString() { return(FullSequence + string.Join("\t", AllModsOneIsNterminus.Select(m => m.ToString()))); }
/// <summary> /// Generates theoretical fragments for given dissociation type for this peptide. /// The "products" parameter is filled with these fragments. /// </summary> public void Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus, List <Product> products) { // This code is specifically written to be memory- and CPU -efficient because it is // called millions of times for a typical search (i.e., at least once per peptide). // If you modify this code, BE VERY CAREFUL about allocating new memory, especially // for new collections. This code also deliberately avoids using "yield return", again // for performance reasons. Be sure to benchmark any changes with a parallelized // fragmentation of every peptide in a database (i.e., test for speed decreases and // memory issues). products.Clear(); var massCaps = DissociationTypeCollection.GetNAndCTerminalMassShiftsForDissociationType(dissociationType); double cTermMass = 0; double nTermMass = 0; List <ProductType> nTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N); List <ProductType> cTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C); bool calculateNTermFragments = fragmentationTerminus == FragmentationTerminus.N || fragmentationTerminus == FragmentationTerminus.Both; bool calculateCTermFragments = fragmentationTerminus == FragmentationTerminus.C || fragmentationTerminus == FragmentationTerminus.Both; //From http://www.matrixscience.com/help/fragmentation_help.html //Low Energy CID -- In low energy CID(i.e.collision induced dissociation in a triple quadrupole or an ion trap) a peptide carrying a positive charge fragments mainly along its backbone, //generating predominantly b and y ions. In addition, for fragments containing RKNQ, peaks are seen for ions that have lost ammonia (-17 Da) denoted a*, b* and y*. For fragments containing //STED, loss of water(-18 Da) is denoted a°, b° and y°. Satellite ions from side chain cleavage are not observed. bool haveSeenNTermDegreeIon = false; bool haveSeenNTermStarIon = false; bool haveSeenCTermDegreeIon = false; bool haveSeenCTermStarIon = false; // these two collections keep track of the neutral losses observed so far on the n-term or c-term. // they are apparently necessary, but allocating memory for collections in this function results in // inefficient memory usage and thus frequent garbage collection. // TODO: If you can think of a way to remove these collections and still maintain correct // fragmentation, please do so. HashSet <double> nTermNeutralLosses = null; HashSet <double> cTermNeutralLosses = null; // n-terminus mod if (calculateNTermFragments) { if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) { nTermMass += mod.MonoisotopicMass.Value; } } // c-terminus mod if (calculateCTermFragments) { if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length + 2, out Modification mod)) { cTermMass += mod.MonoisotopicMass.Value; } } for (int r = 0; r < BaseSequence.Length - 1; r++) { // n-term fragments if (calculateNTermFragments) { char nTermResidue = BaseSequence[r]; // get n-term residue mass if (Residue.TryGetResidue(nTermResidue, out Residue residue)) { nTermMass += residue.MonoisotopicMass; } else { nTermMass = double.NaN; } // add side-chain mod if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification mod)) { nTermMass += mod.MonoisotopicMass.Value; } // handle star and degree ions for low-res CID if (dissociationType == DissociationType.LowCID) { if (nTermResidue == 'R' || nTermResidue == 'K' || nTermResidue == 'N' || nTermResidue == 'Q') { haveSeenNTermStarIon = true; } if (nTermResidue == 'S' || nTermResidue == 'T' || nTermResidue == 'E' || nTermResidue == 'D') { haveSeenNTermDegreeIon = true; } } // skip first N-terminal fragment (b1, aDegree1, ...) for CID if (r == 0 && (dissociationType == DissociationType.CID || dissociationType == DissociationType.LowCID)) { goto CTerminusFragments; } // generate products for (int i = 0; i < nTermProductTypes.Count; i++) { if (dissociationType == DissociationType.LowCID) { if (!haveSeenNTermStarIon && (nTermProductTypes[i] == ProductType.aStar || nTermProductTypes[i] == ProductType.bStar)) { continue; } if (!haveSeenNTermDegreeIon && (nTermProductTypes[i] == ProductType.aDegree || nTermProductTypes[i] == ProductType.bDegree)) { continue; } } products.Add(new Product( nTermProductTypes[i], FragmentationTerminus.N, nTermMass + massCaps.Item1[i], r + 1, r + 1, 0)); if (mod != null && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses.Where(p => p != 0)) { if (nTermNeutralLosses == null) { nTermNeutralLosses = new HashSet <double>(); } nTermNeutralLosses.Add(neutralLoss); } } if (nTermNeutralLosses != null) { foreach (double neutralLoss in nTermNeutralLosses) { products.Add(new Product( nTermProductTypes[i], FragmentationTerminus.N, nTermMass + massCaps.Item1[i] - neutralLoss, r + 1, r + 1, neutralLoss)); } } } } // c-term fragments CTerminusFragments: if (calculateCTermFragments) { char cTermResidue = BaseSequence[BaseSequence.Length - r - 1]; // get c-term residue mass if (Residue.TryGetResidue(cTermResidue, out Residue residue)) { cTermMass += residue.MonoisotopicMass; } else { cTermMass = double.NaN; } // add side-chain mod if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length - r + 1, out Modification mod)) { cTermMass += mod.MonoisotopicMass.Value; } // handle star and degree ions for low-res CID if (dissociationType == DissociationType.LowCID) { if (cTermResidue == 'R' || cTermResidue == 'K' || cTermResidue == 'N' || cTermResidue == 'Q') { haveSeenCTermStarIon = true; } if (cTermResidue == 'S' || cTermResidue == 'T' || cTermResidue == 'E' || cTermResidue == 'D') { haveSeenCTermDegreeIon = true; } } // generate products for (int i = 0; i < cTermProductTypes.Count; i++) { // skip zDot ions for proline residues for ETD/ECD/EThcD if (cTermResidue == 'P' && (dissociationType == DissociationType.ECD || dissociationType == DissociationType.ETD || dissociationType == DissociationType.EThcD) && cTermProductTypes[i] == ProductType.zDot) { continue; } if (dissociationType == DissociationType.LowCID) { if (!haveSeenCTermStarIon && cTermProductTypes[i] == ProductType.yStar) { continue; } if (!haveSeenCTermDegreeIon && cTermProductTypes[i] == ProductType.yDegree) { continue; } } products.Add(new Product( cTermProductTypes[i], FragmentationTerminus.C, cTermMass + massCaps.Item2[i], r + 1, BaseSequence.Length - r, 0)); if (mod != null && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses.Where(p => p != 0)) { if (cTermNeutralLosses == null) { cTermNeutralLosses = new HashSet <double>(); } cTermNeutralLosses.Add(neutralLoss); } } if (cTermNeutralLosses != null) { foreach (double neutralLoss in cTermNeutralLosses) { products.Add(new Product( cTermProductTypes[i], FragmentationTerminus.C, cTermMass + massCaps.Item2[i] - neutralLoss, r + 1, BaseSequence.Length - r, neutralLoss)); } } } } } // zDot generates one more ion... if (cTermProductTypes.Contains(ProductType.zDot) && BaseSequence[0] != 'P') { // get c-term residue mass if (Residue.TryGetResidue(BaseSequence[0], out Residue residue)) { cTermMass += residue.MonoisotopicMass; } else { cTermMass = double.NaN; } // add side-chain mod if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) { cTermMass += mod.MonoisotopicMass.Value; } // generate zDot product products.Add(new Product( ProductType.zDot, FragmentationTerminus.C, cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot), BaseSequence.Length, 1, 0)); if (mod != null && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses.Where(p => p != 0)) { products.Add(new Product( ProductType.zDot, FragmentationTerminus.C, cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot) - neutralLoss, BaseSequence.Length, 1, neutralLoss)); } } } foreach (var mod in AllModsOneIsNterminus.Where(p => p.Value.NeutralLosses != null)) { // molecular ion minus neutral losses if (mod.Value.NeutralLosses.TryGetValue(dissociationType, out List <double> losses)) { foreach (double neutralLoss in losses.Where(p => p != 0)) { if (neutralLoss != 0) { products.Add(new Product(ProductType.M, FragmentationTerminus.Both, MonoisotopicMass - neutralLoss, 0, 0, neutralLoss)); } } } } // generate diagnostic ions // TODO: this code is memory-efficient but sort of CPU inefficient; it can be further optimized. // however, diagnostic ions are fairly rare so it's probably OK for now foreach (double diagnosticIon in AllModsOneIsNterminus.Where(p => p.Value.DiagnosticIons != null && p.Value.DiagnosticIons.ContainsKey(dissociationType)).SelectMany(p => p.Value.DiagnosticIons[dissociationType]).Distinct()) { int diagnosticIonLabel = (int)Math.Round(diagnosticIon.ToMz(1), 0); // the diagnostic ion is assumed to be annotated in the mod info as the *neutral mass* of the diagnostic ion, not the ionized species products.Add(new Product(ProductType.D, FragmentationTerminus.Both, diagnosticIon, diagnosticIonLabel, 0, 0)); } }