/// <summary>
        /// Gets peptides for semispecific digestion of a protein
        ///
        /// semi-specific search enters here...
        /// </summary>
        /// <param name="protein"></param>
        /// <returns></returns>
        public IEnumerable <PeptideWithSetModifications> SemiSpecificDigestion(Protein protein)
        {
            List <ProteolyticPeptide> intervals     = new List <ProteolyticPeptide>();
            List <int> oneBasedIndicesToCleaveAfter = Protease.GetDigestionSiteIndices(protein.BaseSequence);

            for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - MaximumMissedCleavages - 1; i++)
            {
                if (Protease.Retain(i, InitiatorMethionineBehavior, protein[0]) &&
                    Protease.OkayLength(oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], MinPeptidesLength, MaxPeptidesLength))
                {
                    intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1],
                                                         oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], "semi"));
                }

                if (Protease.Cleave(i, InitiatorMethionineBehavior, protein[0]) &&
                    Protease.OkayLength(oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - 1, MinPeptidesLength, MaxPeptidesLength))
                {
                    intervals.Add(new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1],
                                                         oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - 1, "semi:M cleaved"));
                }
            }

            int lastIndex = oneBasedIndicesToCleaveAfter.Count - 1;
            int maxIndex  = MaximumMissedCleavages < lastIndex ? MaximumMissedCleavages : lastIndex;

            for (int i = 1; i <= maxIndex; i++)
            {
                if (DigestionParams.TerminusTypeSemiProtease == FragmentationTerminus.N) //tricky, it's N because we want the extra peptide at the C terminus |_
                {
                    if (Protease.OkayLength(oneBasedIndicesToCleaveAfter[lastIndex] - oneBasedIndicesToCleaveAfter[lastIndex - i], MinPeptidesLength, MaxPeptidesLength))
                    {
                        intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[lastIndex - i] + 1, oneBasedIndicesToCleaveAfter[lastIndex],
                                                             oneBasedIndicesToCleaveAfter[lastIndex] - oneBasedIndicesToCleaveAfter[lastIndex - i], "semiN"));
                    }
                }
                else //TerminusType.C
                {
                    if (Protease.OkayLength(oneBasedIndicesToCleaveAfter[i] - oneBasedIndicesToCleaveAfter[0], MinPeptidesLength, MaxPeptidesLength))
                    {
                        intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[0] + 1, oneBasedIndicesToCleaveAfter[i],
                                                             oneBasedIndicesToCleaveAfter[i] - oneBasedIndicesToCleaveAfter[0], "semiC"));
                    }
                }
            }

            // Also digest using the proteolysis product start/end indices
            intervals.AddRange(
                protein.ProteolysisProducts
                .Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue &&
                       (proteolysisProduct.OneBasedBeginPosition != 1 || proteolysisProduct.OneBasedEndPosition != protein.Length))
                .Select(proteolysisProduct => new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value,
                                                                     0, proteolysisProduct.Type + " start")));

            return(intervals.SelectMany(peptide => peptide.GetModifiedPeptides(AllKnownFixedModifications, DigestionParams, VariableModifications)));
        }
Exemple #2
0
        }                                                                    //used to look for unlabeled proteins (in addition to labeled proteins) for SILAC experiments

        public override bool Equals(object obj)
        {
            return(obj is DigestionParams a &&
                   MaxMissedCleavages.Equals(a.MaxMissedCleavages) &&
                   MinPeptideLength.Equals(a.MinPeptideLength) &&
                   MaxPeptideLength.Equals(a.MaxPeptideLength) &&
                   InitiatorMethionineBehavior.Equals(a.InitiatorMethionineBehavior) &&
                   MaxModificationIsoforms.Equals(a.MaxModificationIsoforms) &&
                   MaxModsForPeptide.Equals(a.MaxModsForPeptide) &&
                   Protease.Equals(a.Protease) &&
                   SearchModeType.Equals(a.SearchModeType) &&
                   FragmentationTerminus.Equals(a.FragmentationTerminus) &&
                   GeneratehUnlabeledProteinsForSilac.Equals(a.GeneratehUnlabeledProteinsForSilac));
        }
        public static Dictionary <string, Protease> LoadProteaseDictionary(string proteasesLocation)
        {
            Dictionary <string, Protease> dict = new Dictionary <string, Protease>();

            using (StreamReader proteases = new StreamReader(proteasesLocation))
            {
                proteases.ReadLine();

                while (proteases.Peek() != -1)
                {
                    string     line   = proteases.ReadLine();
                    string[][] fields = line.Split('\t').Select(x => x.Split('|')).ToArray();
                    string     name   = fields[0][0];
                    string[]   preventing;
                    List <Tuple <string, FragmentationTerminus> > sequencesInducingCleavage  = new List <Tuple <string, FragmentationTerminus> >();
                    List <Tuple <string, FragmentationTerminus> > sequencePreventingCleavage = new List <Tuple <string, FragmentationTerminus> >();
                    for (int i = 0; i < fields[1].Length; i++)
                    {
                        if (!fields[1][i].Equals(""))
                        {
                            sequencesInducingCleavage.Add(new Tuple <string, FragmentationTerminus>(fields[1][i], ((FragmentationTerminus)Enum.Parse(typeof(FragmentationTerminus), fields[3][i], true))));
                            if (!fields[2].Contains(""))
                            {
                                preventing = (fields[2][i].Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries));
                                for (int j = 0; j < preventing.Length; j++)
                                {
                                    sequencePreventingCleavage.Add(new Tuple <string, FragmentationTerminus>(preventing[j], (FragmentationTerminus)Enum.Parse(typeof(FragmentationTerminus), fields[3][i], true)));
                                }
                            }
                        }
                    }
                    var    cleavageSpecificity  = ((CleavageSpecificity)Enum.Parse(typeof(CleavageSpecificity), fields[4][0], true));
                    string psiMsAccessionNumber = fields[5][0];
                    string psiMsName            = fields[6][0];
                    string siteRegexp           = fields[7][0];
                    var    protease             = new Protease(name, sequencesInducingCleavage, sequencePreventingCleavage, cleavageSpecificity, psiMsAccessionNumber, psiMsName, siteRegexp);
                    dict.Add(protease.Name, protease);
                }
            }
            return(dict);
        }
Exemple #4
0
        /// <summary>
        /// Gets peptides for the singleC protease
        /// </summary>
        /// <param name="protein"></param>
        /// <param name="initiatorMethionineBehavior"></param>
        /// <param name="maximumMissedCleavages"></param>
        /// <param name="minPeptideLength"></param>
        /// <param name="maxPeptideLength"></param>
        /// <param name="specificProtease"></param>
        /// <returns></returns>
        private List <ProteolyticPeptide> SingleC_Digestion(Protein protein, InitiatorMethionineBehavior initiatorMethionineBehavior,
                                                            int maximumMissedCleavages, int minPeptideLength, int maxPeptideLength, Protease specificProtease)
        {
            List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>();
            int proteinStart = Retain(0, initiatorMethionineBehavior, protein[0]) ? 1 : 2; //where does the protein start?

            if (Equals(specificProtease))
            {
                int lengthDifference = proteinStart - 1; //take it back one for zero based index
                for (int proteinEnd = 1; proteinEnd <= protein.Length; proteinEnd++)
                {
                    //length of peptide will be at least the start index
                    if (OkayMinLength(proteinEnd - lengthDifference, minPeptideLength)) //is the maximum possible length longer than the minimum?
                    {
                        //use the start index as the max of the N-terminus or the c-terminus minus the max (+1 because inclusive, otherwise peptides will be one AA too long)
                        peptides.Add(new ProteolyticPeptide(protein, Math.Max(proteinStart, proteinEnd - maxPeptideLength + 1), proteinEnd, 0, CleavageSpecificity.SingleC, "SingleC"));
                    }
                }
            }
            else //if there's a specific protease, then we need to adhere to the specified missed cleavage rules
            {
                //generate only peptides with the maximum number of missed cleavages, unless the protein has fewer than the max or we're near the unselected terminus (where we run to the end of the protein)
                List <int> oneBasedIndicesToCleaveAfter = specificProtease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included)
                oneBasedIndicesToCleaveAfter[0] = proteinStart - 1;                                                       //update the first cleavage to represent the initiator methionine rules
                int maximumMissedCleavagesIndexShift = maximumMissedCleavages + 1;

                for (int i = oneBasedIndicesToCleaveAfter.Count - 1; i > maximumMissedCleavagesIndexShift; i--)
                {
                    int endProteaseIndex   = oneBasedIndicesToCleaveAfter[i];
                    int startProteaseIndex = oneBasedIndicesToCleaveAfter[i - maximumMissedCleavagesIndexShift];
                    int peptideLength      = endProteaseIndex - startProteaseIndex;
                    if (peptideLength >= minPeptideLength) //if bigger than min
                    {
                        int startActualIndex = startProteaseIndex;
                        if (peptideLength > maxPeptideLength) //if the next cleavage is too far away, crop it to the max length
                        {
                            startActualIndex = endProteaseIndex - maxPeptideLength;
                        }
                        int nextEndIndex = oneBasedIndicesToCleaveAfter[i - 1];
                        //make SingleC peptides until we reach the next index to cleave at or until the peptides are too small
                        for (; (endProteaseIndex > nextEndIndex) && (endProteaseIndex - startActualIndex >= minPeptideLength); endProteaseIndex--)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, startActualIndex + 1, endProteaseIndex, maximumMissedCleavages, CleavageSpecificity.SingleC, "SingleC"));

                            //update startIndex if needed
                            if (startActualIndex != startProteaseIndex)
                            {
                                startActualIndex--;
                            }
                        }
                    }
                }
                //wrap up the terminus
                //if there are more missed cleavages allowed than there are cleavages to cleave, change the effective number of missed cleavages to the max
                if (oneBasedIndicesToCleaveAfter.Count <= maximumMissedCleavagesIndexShift)
                {
                    maximumMissedCleavagesIndexShift = oneBasedIndicesToCleaveAfter.Count - 1;
                }
                int lastEndIndex = oneBasedIndicesToCleaveAfter[maximumMissedCleavagesIndexShift];
                int startIndex   = Math.Max(proteinStart, lastEndIndex - maxPeptideLength + 1);
                int minPeptideLengthOneBasedResidueShift = minPeptideLength - 1;
                for (; lastEndIndex >= startIndex + minPeptideLengthOneBasedResidueShift; lastEndIndex--)
                {
                    peptides.Add(new ProteolyticPeptide(protein, startIndex, lastEndIndex, maximumMissedCleavages, CleavageSpecificity.SingleC, "SingleC"));

                    //update the start if needed
                    if (startIndex != proteinStart)
                    {
                        startIndex--;
                    }
                }
            }
            return(peptides);
        }
Exemple #5
0
        /// <summary>
        /// Gets peptides for the singleN protease
        /// </summary>
        /// <param name="protein"></param>
        /// <param name="initiatorMethionineBehavior"></param>
        /// <param name="maximumMissedCleavages"></param>
        /// <param name="minPeptideLength"></param>
        /// <param name="maxPeptideLength"></param>
        /// <param name="specificProtease"></param>
        /// <returns></returns>
        private List <ProteolyticPeptide> SingleN_Digestion(Protein protein, InitiatorMethionineBehavior initiatorMethionineBehavior,
                                                            int maximumMissedCleavages, int minPeptideLength, int maxPeptideLength, Protease specificProtease)
        {
            List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>();
            int proteinStart = Retain(0, initiatorMethionineBehavior, protein[0]) ? 1 : 2; //where does the protein start?

            if (Equals(specificProtease))
            {
                bool maxTooBig = protein.Length + maxPeptideLength < 0; //when maxPeptideLength is too large, it becomes negative and causes issues
                                                                        //This happens when maxPeptideLength == int.MaxValue or something close to it
                for (; proteinStart <= protein.Length; proteinStart++)
                {
                    if (OkayMinLength(protein.Length - proteinStart + 1, minPeptideLength))
                    {
                        //need Math.Max if max length is int.MaxLength, since +proteinStart will make it negative
                        //if the max length is too big to be an int (ie infinity), just do the protein length.
                        //if it's not too big to be an int, it might still be too big. Take the minimum of the protein length or the maximum length (-1, because the index is inclusive. Without -1, peptides will be one AA too long)
                        peptides.Add(new ProteolyticPeptide(protein, proteinStart, maxTooBig ? protein.Length : Math.Min(protein.Length, proteinStart + maxPeptideLength - 1), 0, CleavageSpecificity.SingleN, "SingleN"));
                    }
                }
            }
            else //if there's a specific protease, then we need to adhere to the specified missed cleavage rules
            {
                //generate only peptides with the maximum number of missed cleavages, unless the protein has fewer than the max or we're near the unselected terminus (where we run to the end of the protein)
                List <int> oneBasedIndicesToCleaveAfter = specificProtease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included)
                oneBasedIndicesToCleaveAfter[0] = proteinStart - 1;                                                       //update the first cleavage to represent the initiator methionine rules
                int maximumMissedCleavagesIndexShift = maximumMissedCleavages + 1;

                for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift; i++)
                {
                    int startIndex       = oneBasedIndicesToCleaveAfter[i];
                    int endProteaseIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift];
                    int peptideLength    = endProteaseIndex - startIndex;
                    if (peptideLength >= minPeptideLength) //if bigger than min
                    {
                        int endActualIndex = endProteaseIndex;
                        if (peptideLength > maxPeptideLength) //if the next cleavage is too far away, crop it to the max length
                        {
                            endActualIndex = startIndex + maxPeptideLength;
                        }
                        int nextStartIndex = oneBasedIndicesToCleaveAfter[i + 1] + 1;

                        //make SingleN peptides until we reach the next index to cleave at or until the peptides are too small
                        for (; (startIndex + 1 < nextStartIndex) && (endActualIndex - startIndex >= minPeptideLength); startIndex++)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endActualIndex, maximumMissedCleavages, CleavageSpecificity.SingleN, "SingleN"));

                            //update endIndex if needed
                            if (endActualIndex != endProteaseIndex)
                            {
                                endActualIndex++;
                            }
                        }
                    }
                }
                //wrap up the terminus
                if (oneBasedIndicesToCleaveAfter.Count < maximumMissedCleavagesIndexShift)
                {
                    maximumMissedCleavagesIndexShift = oneBasedIndicesToCleaveAfter.Count;
                }
                int lastStartIndex  = oneBasedIndicesToCleaveAfter[oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift] + 1;
                int proteinEndIndex = oneBasedIndicesToCleaveAfter[oneBasedIndicesToCleaveAfter.Count - 1]; //end of protein
                int lastEndIndex    = Math.Min(proteinEndIndex, lastStartIndex + maxPeptideLength - 1);     //end of protein
                for (; lastStartIndex + minPeptideLength - 1 <= lastEndIndex; lastStartIndex++)
                {
                    peptides.Add(new ProteolyticPeptide(protein, lastStartIndex, lastEndIndex, maximumMissedCleavages, CleavageSpecificity.SingleN, "SingleN"));

                    //update the end if needed
                    if (lastEndIndex != proteinEndIndex)
                    {
                        lastEndIndex++;
                    }
                }
            }
            return(peptides);
        }
Exemple #6
0
        /// <summary>
        /// Gets intervals of a protein sequence that will result from digestion by this protease.
        /// </summary>
        /// <param name="protein"></param>
        /// <param name="maximumMissedCleavages"></param>
        /// <param name="initiatorMethionineBehavior"></param>
        /// <param name="minPeptideLength"></param>
        /// <param name="maxPeptideLength"></param>
        /// <returns></returns>
        internal List <ProteolyticPeptide> GetUnmodifiedPeptides(Protein protein, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior,
                                                                 int minPeptideLength, int maxPeptideLength, Protease specificProtease)
        {
            List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>();

            // proteolytic cleavage in one spot (N)
            if (CleavageSpecificity == CleavageSpecificity.SingleN)
            {
                peptides = SingleN_Digestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength, specificProtease);
            }

            // proteolytic cleavage in one spot (C)
            else if (CleavageSpecificity == CleavageSpecificity.SingleC)
            {
                peptides = SingleC_Digestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength, specificProtease);
            }

            //top-down
            else if (CleavageSpecificity == CleavageSpecificity.None)
            {
                // retain methionine
                if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || protein[0] != 'M') &&
                    OkayLength(protein.Length, minPeptideLength, maxPeptideLength))
                {
                    peptides.Add(new ProteolyticPeptide(protein, 1, protein.Length, 0, CleavageSpecificity.Full, "full"));
                }

                // cleave methionine
                if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Retain && protein[0] == 'M') &&
                    OkayLength(protein.Length - 1, minPeptideLength, maxPeptideLength))
                {
                    peptides.Add(new ProteolyticPeptide(protein, 2, protein.Length, 0, CleavageSpecificity.Full, "full:M cleaved"));
                }

                // Also digest using the proteolysis product start/end indices
                peptides.AddRange(
                    protein.ProteolysisProducts
                    .Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue &&
                           OkayLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength))
                    .Select(proteolysisProduct =>
                            new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, 0, CleavageSpecificity.Full, proteolysisProduct.Type)));
            }

            // Full proteolytic cleavage
            else if (CleavageSpecificity == CleavageSpecificity.Full)
            {
                peptides.AddRange(FullDigestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength));
            }

            // Cleavage rules for semi-specific search
            else if (CleavageSpecificity == CleavageSpecificity.Semi)
            {
                peptides.AddRange(SemiProteolyticDigestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength));
            }
            else
            {
                throw new NotImplementedException();
            }

            return(peptides);
        }
        /// <summary>
        /// Gets peptides for specific protease digestion of a protein
        /// </summary>
        /// <param name="protein"></param>
        /// <returns></returns>
        public IEnumerable <PeptideWithSetModifications> Digestion(Protein protein)
        {
            var intervals = Protease.GetDigestionIntervals(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptidesLength, MaxPeptidesLength);

            return(intervals.SelectMany(peptide => peptide.GetModifiedPeptides(AllKnownFixedModifications, DigestionParams, VariableModifications)));
        }
Exemple #8
0
        /// <summary>
        /// Gets peptides for speedy semispecific digestion of a protein
        /// This generates specific peptides of maximum missed cleavages
        /// These peptides need to be digested post search to their actual sequences
        /// semi-specific search enters here...
        /// </summary>
        /// <param name="protein"></param>
        /// <returns></returns>
        public IEnumerable <ProteolyticPeptide> SpeedySemiSpecificDigestion(Protein protein) //We are only getting fully specific peptides of the maximum cleaved residues here
        {
            List <ProteolyticPeptide> peptides          = new List <ProteolyticPeptide>();
            List <int> oneBasedIndicesToCleaveAfter     = Protease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included)
            int        maximumMissedCleavagesIndexShift = MaximumMissedCleavages + 1;

            //it's possible not to go through this loop (maxMissedCleavages+1>number of indexes), and that's okay. It will get digested in the next loops (finish C/N termini)
            for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift; i++)
            {
                bool retain = Protease.Retain(i, InitiatorMethionineBehavior, protein[0]);
                if (retain) //it's okay to use i instead of oneBasedIndicesToCleaveAfter[i], because the index of zero is zero and it only checks if it's the N-terminus or not
                {
                    int peptideLength = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift] - oneBasedIndicesToCleaveAfter[i];
                    if (peptideLength >= MinPeptideLength)     //if bigger than min
                    {
                        if (peptideLength <= MaxPeptideLength) //if an acceptable length (bigger than min, smaller than max), add it
                        {
                            peptides.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift],
                                                                MaximumMissedCleavages, CleavageSpecificity.Full, "full"));
                        }
                        else if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N) //make something with the maximum length and fixed N
                        {
                            int startIndex = oneBasedIndicesToCleaveAfter[i];
                            peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, startIndex + MaxPeptideLength, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                        }
                        else //It has to be FragmentationTerminus.C //make something with the maximum length and fixed C
                        {
                            int endIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift];
                            peptides.Add(new ProteolyticPeptide(protein, endIndex - MaxPeptideLength + 1, endIndex, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                        }
                    }
                }

                if (Protease.Cleave(i, InitiatorMethionineBehavior, protein[0]) && (DigestionParams.FragmentationTerminus == FragmentationTerminus.N || !retain)) //it's okay to use i instead of oneBasedIndicesToCleaveAfter[i], because the index of zero is zero and it only checks if it's the N-terminus or not
                {
                    int peptideLength = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift] - 1;
                    if (peptideLength >= MinPeptideLength)
                    {
                        if (peptideLength <= MaxPeptideLength)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift], //two is hardcoded, since M=1, so the next aa is 2 (one based)
                                                                MaximumMissedCleavages, CleavageSpecificity.Full, "full:M cleaved"));
                        }
                        else if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, 2, 2 + MaxPeptideLength - 1, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                        }
                        else //It has to be FragmentationTerminus.C //make something with the maximum length and fixed C
                        {
                            //kinda tricky, because we'll be creating a duplication if cleavage is variable
                            if (!Protease.Retain(i, InitiatorMethionineBehavior, protein[0])) //only if cleave, because then not made earlier during retain
                            {
                                int tempIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift];
                                peptides.Add(new ProteolyticPeptide(protein, tempIndex - MaxPeptideLength + 1, tempIndex, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                            }
                        }
                    }
                }
            }

            //wrap up the termini that weren't hit earlier
            int lastIndex          = oneBasedIndicesToCleaveAfter.Count - 1;                                  //last cleavage index (the c-terminus)
            int maxIndexDifference = MaximumMissedCleavages < lastIndex ? MaximumMissedCleavages : lastIndex; //the number of index differences allowed.
            //If the protein has fewer cleavage sites than allowed missed cleavages, just use the number of cleavage sites (lastIndex)
            bool nTerminusFragmentation = DigestionParams.FragmentationTerminus == FragmentationTerminus.N;

            for (int i = 1; i <= maxIndexDifference; i++) //i is the difference (in indexes) between indexes (cleavages), so it needs to start at 1, or the peptide would have length = 0
            {
                int startIndex = nTerminusFragmentation ?
                                 oneBasedIndicesToCleaveAfter[lastIndex - i] :
                                 oneBasedIndicesToCleaveAfter[0];
                int endIndex = nTerminusFragmentation ?
                               oneBasedIndicesToCleaveAfter[lastIndex] :
                               oneBasedIndicesToCleaveAfter[i];

                int peptideLength = endIndex - startIndex;
                if (peptideLength >= MinPeptideLength)
                {
                    if (peptideLength <= MaxPeptideLength) //if okay length, add it up to the terminus
                    {
                        peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endIndex, i - 1, CleavageSpecificity.Full, "full"));
                    }
                    else //update so that not the end of terminus
                    {
                        if (nTerminusFragmentation)
                        {
                            endIndex = startIndex + MaxPeptideLength;
                        }
                        else
                        {
                            startIndex = endIndex - MaxPeptideLength;
                        }
                        peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endIndex, i - 1, CleavageSpecificity.Semi, "semi"));
                    }
                }
            }

            // Also digest using the proteolysis product start/end indices
            foreach (ProteolysisProduct product in protein.ProteolysisProducts)
            {
                //if fixed N, we care if the start position is novel
                if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N)
                {
                    //if has value and not a duplicate
                    if (product.OneBasedBeginPosition.HasValue && !oneBasedIndicesToCleaveAfter.Contains(product.OneBasedBeginPosition.Value - 1))
                    {
                        int proteaseClevageIndex = 0;

                        //get the first cleavage index after the start of the proteolysis product
                        while (oneBasedIndicesToCleaveAfter[proteaseClevageIndex] < product.OneBasedBeginPosition.Value)
                        {
                            proteaseClevageIndex++;
                        }
                        //add max missed cleavages
                        proteaseClevageIndex += MaximumMissedCleavages;

                        //set to the end if we overshot
                        if (proteaseClevageIndex >= oneBasedIndicesToCleaveAfter.Count)
                        {
                            proteaseClevageIndex = oneBasedIndicesToCleaveAfter.Count - 1;
                        }
                        int endIndex = oneBasedIndicesToCleaveAfter[proteaseClevageIndex];

                        //set to product end value if cleavages extend past
                        if (product.OneBasedEndPosition.HasValue && product.OneBasedEndPosition.Value < endIndex)
                        {
                            endIndex = product.OneBasedEndPosition.Value;
                        }

                        //limit length to the maximum allowed if necessary
                        if (endIndex - product.OneBasedBeginPosition.Value >= MaxPeptideLength)
                        {
                            endIndex = product.OneBasedBeginPosition.Value + MaxPeptideLength - 1;
                        }

                        //if it's bigger than the minimum allowed, then add it
                        if (endIndex - product.OneBasedBeginPosition.Value + 1 >= MinPeptideLength)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, product.OneBasedBeginPosition.Value, endIndex, MaximumMissedCleavages, CleavageSpecificity.Full, product.Type + " start"));
                        }
                    }
                }
                else //if fixed C, we care if the end position is novel
                {
                    //if has value and not a duplicate
                    if (product.OneBasedEndPosition.HasValue && !oneBasedIndicesToCleaveAfter.Contains(product.OneBasedEndPosition.Value))
                    {
                        int proteaseClevageIndex = 0;

                        //get the first cleavage index after the start of the proteolysis product
                        while (oneBasedIndicesToCleaveAfter[proteaseClevageIndex] < product.OneBasedEndPosition.Value)
                        {
                            proteaseClevageIndex++;
                        }
                        //subtract max missed cleavages
                        proteaseClevageIndex -= (MaximumMissedCleavages + 1); //+1 because we overshot in the while loop

                        //set to the beginning if we overshot
                        if (proteaseClevageIndex < 0)
                        {
                            proteaseClevageIndex = 0;
                        }
                        int beginIndex = oneBasedIndicesToCleaveAfter[proteaseClevageIndex] + 1;

                        //set to product end value if cleavages extend past
                        if (product.OneBasedBeginPosition.HasValue && product.OneBasedBeginPosition.Value > beginIndex)
                        {
                            beginIndex = product.OneBasedBeginPosition.Value;
                        }

                        //limit length to the maximum allowed if necessary
                        if (product.OneBasedEndPosition.Value - beginIndex >= MaxPeptideLength)
                        {
                            beginIndex = product.OneBasedEndPosition.Value - MaxPeptideLength + 1;
                        }
                        //if it's bigger than the minimum allowed, then add it
                        if (product.OneBasedEndPosition.Value - beginIndex + 1 >= MinPeptideLength)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, beginIndex, product.OneBasedEndPosition.Value, MaximumMissedCleavages, CleavageSpecificity.Full, product.Type + " start"));
                        }
                    }
                }
            }

            return(peptides);
        }
Exemple #9
0
 /// <summary>
 /// Gets peptides for specific protease digestion of a protein
 /// </summary>
 /// <param name="protein"></param>
 /// <returns></returns>
 public IEnumerable <ProteolyticPeptide> Digestion(Protein protein)
 {
     return(Protease.GetUnmodifiedPeptides(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptideLength, MaxPeptideLength, DigestionParams.SpecificProtease));
 }
        public static Dictionary <string, Protease> LoadProteaseDictionary(string path, List <Modification> proteaseMods = null)
        {
            Dictionary <string, Protease> dict = new Dictionary <string, Protease>();

            string[] myLines = File.ReadAllLines(path);
            myLines = myLines.Skip(1).ToArray();

            foreach (string line in myLines)
            {
                if (line.Trim() != string.Empty) // skip empty lines
                {
                    string[] fields = line.Split('\t');
                    List <DigestionMotif> motifList = DigestionMotif.ParseDigestionMotifsFromString(fields[1]);
                    string name = fields[0];
                    var    cleavageSpecificity  = ((CleavageSpecificity)Enum.Parse(typeof(CleavageSpecificity), fields[4], true));
                    string psiMsAccessionNumber = fields[5];
                    string psiMsName            = fields[6];
                    //name of the modification that is associated with proteolytic cleavage
                    string proteaseModDetails = fields[8];
                    //if this protease has an associated modification, look it up in the list of mods loaded fro the protease mods file
                    if (proteaseModDetails != "" && proteaseMods != null)
                    {
                        if (proteaseMods.Select(p => p.IdWithMotif).ToList().Contains(proteaseModDetails))
                        {
                            Modification proteaseModification = proteaseMods.Where(p => p.IdWithMotif == proteaseModDetails).First();
                            var          protease             = new Protease(name, cleavageSpecificity, psiMsAccessionNumber, psiMsName, motifList, proteaseModification);
                            if (!dict.ContainsKey(protease.Name))
                            {
                                dict.Add(protease.Name, protease);
                            }
                            else
                            {
                                throw new MzLibException("More than one protease named " + protease.Name + " exists");
                            }
                        }
                        else
                        {
                            var protease = new Protease(name, cleavageSpecificity, psiMsAccessionNumber, psiMsName, motifList);
                            if (!dict.ContainsKey(protease.Name))
                            {
                                dict.Add(protease.Name, protease);
                            }
                            else
                            {
                                throw new MzLibException("More than one protease named " + protease.Name + " exists");
                            }
                            throw new MzLibException(proteaseModDetails + " is not a valid modification");
                        }
                    }
                    else
                    {
                        var protease = new Protease(name, cleavageSpecificity, psiMsAccessionNumber, psiMsName, motifList);
                        if (!dict.ContainsKey(protease.Name))
                        {
                            dict.Add(protease.Name, protease);
                        }
                        else
                        {
                            throw new MzLibException("More than one protease named " + protease.Name + " exists");
                        }
                    }
                }
            }

            return(dict);
        }