Exemplo n.º 1
0
        private void DetermineFullSequence()
        {
            var subsequence = new StringBuilder();

            // modification on peptide N-terminus
            if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
            {
                subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
            }

            for (int r = 0; r < Length; r++)
            {
                subsequence.Append(this[r]);

                // modification on this residue
                if (AllModsOneIsNterminus.TryGetValue(r + 2, out mod))
                {
                    subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
                }
            }

            // modification on peptide C-terminus
            if (AllModsOneIsNterminus.TryGetValue(Length + 2, out mod))
            {
                subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
            }

            FullSequence = subsequence.ToString();
        }
Exemplo n.º 2
0
 /// <summary>
 /// Makes the string representing a detected sequence variation, including any modifications on a variant amino acid.
 /// takes in the variant as well as the bool value of wheter the peptid eintersects the variant. (this allows for identified
 /// variants that cause the cleavage site for the peptide.
 /// </summary>
 /// <param name="p"></param>
 /// <param name="d"></param>
 /// <returns></returns>
 public string SequenceVariantString(SequenceVariation applied, bool intersects)
 {
     if (intersects == true)
     {
         bool startAtNTerm            = applied.OneBasedBeginPosition == 1 && OneBasedStartResidueInProtein == 1;
         bool onlyPeptideStartAtNTerm = OneBasedStartResidueInProtein == 1 && applied.OneBasedBeginPosition != 1;
         int  modResidueScale         = 0;
         if (startAtNTerm)
         {
             modResidueScale = 1;
         }
         else if (onlyPeptideStartAtNTerm)
         {
             modResidueScale = 2;
         }
         else
         {
             modResidueScale = 3;
         }
         int lengthDiff = applied.VariantSequence.Length - applied.OriginalSequence.Length;
         var modsOnVariantOneIsNTerm = AllModsOneIsNterminus
                                       .Where(kv => kv.Key == 1 && applied.OneBasedBeginPosition == 1 || applied.OneBasedBeginPosition <= kv.Key - 2 + OneBasedStartResidueInProtein && kv.Key - 2 + OneBasedStartResidueInProtein <= applied.OneBasedEndPosition)
                                       .ToDictionary(kv => kv.Key - applied.OneBasedBeginPosition + (modResidueScale), kv => kv.Value);
         PeptideWithSetModifications variantWithAnyMods = new PeptideWithSetModifications(Protein, DigestionParams, applied.OneBasedBeginPosition == 1 ? applied.OneBasedBeginPosition : applied.OneBasedBeginPosition - 1, applied.OneBasedEndPosition, CleavageSpecificityForFdrCategory, PeptideDescription, MissedCleavages, modsOnVariantOneIsNTerm, NumFixedMods);
         return($"{applied.OriginalSequence}{applied.OneBasedBeginPosition}{variantWithAnyMods.FullSequence.Substring(applied.OneBasedBeginPosition == 1 ? 0 : 1)}");
     }
     //if the variant caused a cleavage site leading the the peptide sequence (variant does not intersect but is identified)
     else
     {
         return($"{applied.OriginalSequence}{ applied.OneBasedBeginPosition}{applied.VariantSequence}");
     }
 }
        /// <summary>
        /// Generates theoretical fragments for given dissociation type for this peptide
        /// </summary>
        public IEnumerable <Product> Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)
        {
            // molecular ion
            //yield return new Product(ProductType.M, new NeutralTerminusFragment(FragmentationTerminus.None, this.MonoisotopicMass, Length, Length), 0);

            var productCollection = TerminusSpecificProductTypes.ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus].Intersect(DissociationTypeCollection.ProductsFromDissociationType[dissociationType]);

            List <(ProductType, int)> skippers = new List <(ProductType, int)>();

            foreach (var product in productCollection.Where(f => f != ProductType.zPlusOne))
            {
                skippers.Add((product, BaseSequence.Length));
            }

            switch (dissociationType)
            {
            case DissociationType.CID:
                skippers.Add((ProductType.b, 1));
                break;

            case DissociationType.ETD:
            case DissociationType.ECD:
            case DissociationType.EThcD:
                skippers.AddRange(GetProlineZIonIndicies());
                break;
            }

            foreach (var productType in productCollection)
            {
                // we're separating the N and C terminal masses and computing a separate compact peptide for each one
                // this speeds calculations up without producing unnecessary terminus fragment info
                FragmentationTerminus     temporaryFragmentationTerminus = TerminusSpecificProductTypes.ProductTypeToFragmentationTerminus[productType];
                NeutralTerminusFragment[] terminalMasses = CompactPeptide(temporaryFragmentationTerminus).TerminalMasses;

                for (int f = 0; f < terminalMasses.Length; f++)
                {
                    // fragments with neutral loss
                    if (AllModsOneIsNterminus.TryGetValue(terminalMasses[f].AminoAcidPosition + 1, out Modification mod) && mod.NeutralLosses != null &&
                        mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                    {
                        foreach (double neutralLoss in neutralLosses)
                        {
                            if (neutralLoss == 0)
                            {
                                continue;
                            }

                            for (int n = f; n < terminalMasses.Length; n++)
                            {
                                if (!skippers.Contains((productType, terminalMasses[n].FragmentNumber)))
Exemplo n.º 4
0
        public virtual string EssentialSequence(IReadOnlyDictionary <string, int> modstoWritePruned)
        {
            string essentialSequence = BaseSequence;

            if (modstoWritePruned != null)
            {
                var sbsequence = new StringBuilder();

                // variable modification on peptide N-terminus
                if (AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod))
                {
                    if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType))
                    {
                        sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']');
                    }
                }
                for (int r = 0; r < Length; r++)
                {
                    sbsequence.Append(this[r]);
                    // variable modification on this residue
                    if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod))
                    {
                        if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType))
                        {
                            sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']');
                        }
                    }
                }

                // variable modification on peptide C-terminus
                if (AllModsOneIsNterminus.TryGetValue(Length + 2, out Modification pep_c_term_variable_mod))
                {
                    if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType))
                    {
                        sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']');
                    }
                }

                essentialSequence = sbsequence.ToString();
            }
            return(essentialSequence);
        }
Exemplo n.º 5
0
 public override string ToString()
 {
     return(FullSequence + string.Join("\t", AllModsOneIsNterminus.Select(m => m.ToString())));
 }
Exemplo n.º 6
0
        /// <summary>
        /// Generates theoretical fragments for given dissociation type for this peptide.
        /// The "products" parameter is filled with these fragments.
        /// </summary>
        public void Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus, List <Product> products)
        {
            // This code is specifically written to be memory- and CPU -efficient because it is
            // called millions of times for a typical search (i.e., at least once per peptide).
            // If you modify this code, BE VERY CAREFUL about allocating new memory, especially
            // for new collections. This code also deliberately avoids using "yield return", again
            // for performance reasons. Be sure to benchmark any changes with a parallelized
            // fragmentation of every peptide in a database (i.e., test for speed decreases and
            // memory issues).

            products.Clear();

            var massCaps = DissociationTypeCollection.GetNAndCTerminalMassShiftsForDissociationType(dissociationType);

            double cTermMass = 0;
            double nTermMass = 0;

            List <ProductType> nTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N);
            List <ProductType> cTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C);

            bool calculateNTermFragments = fragmentationTerminus == FragmentationTerminus.N ||
                                           fragmentationTerminus == FragmentationTerminus.Both;

            bool calculateCTermFragments = fragmentationTerminus == FragmentationTerminus.C ||
                                           fragmentationTerminus == FragmentationTerminus.Both;

            //From http://www.matrixscience.com/help/fragmentation_help.html
            //Low Energy CID -- In low energy CID(i.e.collision induced dissociation in a triple quadrupole or an ion trap) a peptide carrying a positive charge fragments mainly along its backbone,
            //generating predominantly b and y ions. In addition, for fragments containing RKNQ, peaks are seen for ions that have lost ammonia (-17 Da) denoted a*, b* and y*. For fragments containing
            //STED, loss of water(-18 Da) is denoted a°, b° and y°. Satellite ions from side chain cleavage are not observed.
            bool haveSeenNTermDegreeIon = false;
            bool haveSeenNTermStarIon   = false;
            bool haveSeenCTermDegreeIon = false;
            bool haveSeenCTermStarIon   = false;

            // these two collections keep track of the neutral losses observed so far on the n-term or c-term.
            // they are apparently necessary, but allocating memory for collections in this function results in
            // inefficient memory usage and thus frequent garbage collection.
            // TODO: If you can think of a way to remove these collections and still maintain correct
            // fragmentation, please do so.
            HashSet <double> nTermNeutralLosses = null;
            HashSet <double> cTermNeutralLosses = null;

            // n-terminus mod
            if (calculateNTermFragments)
            {
                if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
                {
                    nTermMass += mod.MonoisotopicMass.Value;
                }
            }

            // c-terminus mod
            if (calculateCTermFragments)
            {
                if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length + 2, out Modification mod))
                {
                    cTermMass += mod.MonoisotopicMass.Value;
                }
            }

            for (int r = 0; r < BaseSequence.Length - 1; r++)
            {
                // n-term fragments
                if (calculateNTermFragments)
                {
                    char nTermResidue = BaseSequence[r];

                    // get n-term residue mass
                    if (Residue.TryGetResidue(nTermResidue, out Residue residue))
                    {
                        nTermMass += residue.MonoisotopicMass;
                    }
                    else
                    {
                        nTermMass = double.NaN;
                    }

                    // add side-chain mod
                    if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification mod))
                    {
                        nTermMass += mod.MonoisotopicMass.Value;
                    }

                    // handle star and degree ions for low-res CID
                    if (dissociationType == DissociationType.LowCID)
                    {
                        if (nTermResidue == 'R' || nTermResidue == 'K' || nTermResidue == 'N' || nTermResidue == 'Q')
                        {
                            haveSeenNTermStarIon = true;
                        }

                        if (nTermResidue == 'S' || nTermResidue == 'T' || nTermResidue == 'E' || nTermResidue == 'D')
                        {
                            haveSeenNTermDegreeIon = true;
                        }
                    }

                    // skip first N-terminal fragment (b1, aDegree1, ...) for CID
                    if (r == 0 && (dissociationType == DissociationType.CID || dissociationType == DissociationType.LowCID))
                    {
                        goto CTerminusFragments;
                    }

                    // generate products
                    for (int i = 0; i < nTermProductTypes.Count; i++)
                    {
                        if (dissociationType == DissociationType.LowCID)
                        {
                            if (!haveSeenNTermStarIon && (nTermProductTypes[i] == ProductType.aStar || nTermProductTypes[i] == ProductType.bStar))
                            {
                                continue;
                            }

                            if (!haveSeenNTermDegreeIon && (nTermProductTypes[i] == ProductType.aDegree || nTermProductTypes[i] == ProductType.bDegree))
                            {
                                continue;
                            }
                        }

                        products.Add(new Product(
                                         nTermProductTypes[i],
                                         FragmentationTerminus.N,
                                         nTermMass + massCaps.Item1[i],
                                         r + 1,
                                         r + 1,
                                         0));

                        if (mod != null && mod.NeutralLosses != null &&
                            mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                        {
                            foreach (double neutralLoss in neutralLosses.Where(p => p != 0))
                            {
                                if (nTermNeutralLosses == null)
                                {
                                    nTermNeutralLosses = new HashSet <double>();
                                }

                                nTermNeutralLosses.Add(neutralLoss);
                            }
                        }

                        if (nTermNeutralLosses != null)
                        {
                            foreach (double neutralLoss in nTermNeutralLosses)
                            {
                                products.Add(new Product(
                                                 nTermProductTypes[i],
                                                 FragmentationTerminus.N,
                                                 nTermMass + massCaps.Item1[i] - neutralLoss,
                                                 r + 1,
                                                 r + 1,
                                                 neutralLoss));
                            }
                        }
                    }
                }

                // c-term fragments
CTerminusFragments:
                if (calculateCTermFragments)
                {
                    char cTermResidue = BaseSequence[BaseSequence.Length - r - 1];

                    // get c-term residue mass
                    if (Residue.TryGetResidue(cTermResidue, out Residue residue))
                    {
                        cTermMass += residue.MonoisotopicMass;
                    }
                    else
                    {
                        cTermMass = double.NaN;
                    }

                    // add side-chain mod
                    if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length - r + 1, out Modification mod))
                    {
                        cTermMass += mod.MonoisotopicMass.Value;
                    }

                    // handle star and degree ions for low-res CID
                    if (dissociationType == DissociationType.LowCID)
                    {
                        if (cTermResidue == 'R' || cTermResidue == 'K' || cTermResidue == 'N' || cTermResidue == 'Q')
                        {
                            haveSeenCTermStarIon = true;
                        }

                        if (cTermResidue == 'S' || cTermResidue == 'T' || cTermResidue == 'E' || cTermResidue == 'D')
                        {
                            haveSeenCTermDegreeIon = true;
                        }
                    }

                    // generate products
                    for (int i = 0; i < cTermProductTypes.Count; i++)
                    {
                        // skip zDot ions for proline residues for ETD/ECD/EThcD
                        if (cTermResidue == 'P' &&
                            (dissociationType == DissociationType.ECD || dissociationType == DissociationType.ETD || dissociationType == DissociationType.EThcD) &&
                            cTermProductTypes[i] == ProductType.zDot)
                        {
                            continue;
                        }

                        if (dissociationType == DissociationType.LowCID)
                        {
                            if (!haveSeenCTermStarIon && cTermProductTypes[i] == ProductType.yStar)
                            {
                                continue;
                            }

                            if (!haveSeenCTermDegreeIon && cTermProductTypes[i] == ProductType.yDegree)
                            {
                                continue;
                            }
                        }

                        products.Add(new Product(
                                         cTermProductTypes[i],
                                         FragmentationTerminus.C,
                                         cTermMass + massCaps.Item2[i],
                                         r + 1,
                                         BaseSequence.Length - r,
                                         0));

                        if (mod != null && mod.NeutralLosses != null &&
                            mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                        {
                            foreach (double neutralLoss in neutralLosses.Where(p => p != 0))
                            {
                                if (cTermNeutralLosses == null)
                                {
                                    cTermNeutralLosses = new HashSet <double>();
                                }

                                cTermNeutralLosses.Add(neutralLoss);
                            }
                        }

                        if (cTermNeutralLosses != null)
                        {
                            foreach (double neutralLoss in cTermNeutralLosses)
                            {
                                products.Add(new Product(
                                                 cTermProductTypes[i],
                                                 FragmentationTerminus.C,
                                                 cTermMass + massCaps.Item2[i] - neutralLoss,
                                                 r + 1,
                                                 BaseSequence.Length - r,
                                                 neutralLoss));
                            }
                        }
                    }
                }
            }

            // zDot generates one more ion...
            if (cTermProductTypes.Contains(ProductType.zDot) && BaseSequence[0] != 'P')
            {
                // get c-term residue mass
                if (Residue.TryGetResidue(BaseSequence[0], out Residue residue))
                {
                    cTermMass += residue.MonoisotopicMass;
                }
                else
                {
                    cTermMass = double.NaN;
                }

                // add side-chain mod
                if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
                {
                    cTermMass += mod.MonoisotopicMass.Value;
                }

                // generate zDot product
                products.Add(new Product(
                                 ProductType.zDot,
                                 FragmentationTerminus.C,
                                 cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot),
                                 BaseSequence.Length,
                                 1,
                                 0));

                if (mod != null && mod.NeutralLosses != null &&
                    mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses))
                {
                    foreach (double neutralLoss in neutralLosses.Where(p => p != 0))
                    {
                        products.Add(new Product(
                                         ProductType.zDot,
                                         FragmentationTerminus.C,
                                         cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot) - neutralLoss,
                                         BaseSequence.Length,
                                         1,
                                         neutralLoss));
                    }
                }
            }

            foreach (var mod in AllModsOneIsNterminus.Where(p => p.Value.NeutralLosses != null))
            {
                // molecular ion minus neutral losses
                if (mod.Value.NeutralLosses.TryGetValue(dissociationType, out List <double> losses))
                {
                    foreach (double neutralLoss in losses.Where(p => p != 0))
                    {
                        if (neutralLoss != 0)
                        {
                            products.Add(new Product(ProductType.M, FragmentationTerminus.Both, MonoisotopicMass - neutralLoss, 0, 0, neutralLoss));
                        }
                    }
                }
            }

            // generate diagnostic ions
            // TODO: this code is memory-efficient but sort of CPU inefficient; it can be further optimized.
            // however, diagnostic ions are fairly rare so it's probably OK for now
            foreach (double diagnosticIon in AllModsOneIsNterminus.Where(p => p.Value.DiagnosticIons != null &&
                                                                         p.Value.DiagnosticIons.ContainsKey(dissociationType)).SelectMany(p => p.Value.DiagnosticIons[dissociationType]).Distinct())
            {
                int diagnosticIonLabel = (int)Math.Round(diagnosticIon.ToMz(1), 0);

                // the diagnostic ion is assumed to be annotated in the mod info as the *neutral mass* of the diagnostic ion, not the ionized species
                products.Add(new Product(ProductType.D, FragmentationTerminus.Both, diagnosticIon, diagnosticIonLabel, 0, 0));
            }
        }