Example #1
0
        /// <summary>
        /// Gets peptides for semispecific digestion of a protein
        ///
        /// semi-specific search enters here...
        /// </summary>
        /// <param name="protein"></param>
        /// <returns></returns>
        public IEnumerable <PeptideWithSetModifications> SemiSpecificDigestion(Protein protein)
        {
            List <ProteolyticPeptide> intervals     = new List <ProteolyticPeptide>();
            List <int> oneBasedIndicesToCleaveAfter = Protease.GetDigestionSiteIndices(protein.BaseSequence);

            for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - MaximumMissedCleavages - 1; i++)
            {
                if (Protease.Retain(i, InitiatorMethionineBehavior, protein[0]) &&
                    Protease.OkayLength(oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], MinPeptidesLength, MaxPeptidesLength))
                {
                    intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1],
                                                         oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], "semi"));
                }

                if (Protease.Cleave(i, InitiatorMethionineBehavior, protein[0]) &&
                    Protease.OkayLength(oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - 1, MinPeptidesLength, MaxPeptidesLength))
                {
                    intervals.Add(new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1],
                                                         oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - 1, "semi:M cleaved"));
                }
            }

            int lastIndex = oneBasedIndicesToCleaveAfter.Count - 1;
            int maxIndex  = MaximumMissedCleavages < lastIndex ? MaximumMissedCleavages : lastIndex;

            for (int i = 1; i <= maxIndex; i++)
            {
                if (DigestionParams.TerminusTypeSemiProtease == FragmentationTerminus.N) //tricky, it's N because we want the extra peptide at the C terminus |_
                {
                    if (Protease.OkayLength(oneBasedIndicesToCleaveAfter[lastIndex] - oneBasedIndicesToCleaveAfter[lastIndex - i], MinPeptidesLength, MaxPeptidesLength))
                    {
                        intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[lastIndex - i] + 1, oneBasedIndicesToCleaveAfter[lastIndex],
                                                             oneBasedIndicesToCleaveAfter[lastIndex] - oneBasedIndicesToCleaveAfter[lastIndex - i], "semiN"));
                    }
                }
                else //TerminusType.C
                {
                    if (Protease.OkayLength(oneBasedIndicesToCleaveAfter[i] - oneBasedIndicesToCleaveAfter[0], MinPeptidesLength, MaxPeptidesLength))
                    {
                        intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[0] + 1, oneBasedIndicesToCleaveAfter[i],
                                                             oneBasedIndicesToCleaveAfter[i] - oneBasedIndicesToCleaveAfter[0], "semiC"));
                    }
                }
            }

            // Also digest using the proteolysis product start/end indices
            intervals.AddRange(
                protein.ProteolysisProducts
                .Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue &&
                       (proteolysisProduct.OneBasedBeginPosition != 1 || proteolysisProduct.OneBasedEndPosition != protein.Length))
                .Select(proteolysisProduct => new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value,
                                                                     0, proteolysisProduct.Type + " start")));

            return(intervals.SelectMany(peptide => peptide.GetModifiedPeptides(AllKnownFixedModifications, DigestionParams, VariableModifications)));
        }
Example #2
0
        /// <summary>
        /// Gets peptides for speedy semispecific digestion of a protein
        /// This generates specific peptides of maximum missed cleavages
        /// These peptides need to be digested post search to their actual sequences
        /// semi-specific search enters here...
        /// </summary>
        /// <param name="protein"></param>
        /// <returns></returns>
        public IEnumerable <ProteolyticPeptide> SpeedySemiSpecificDigestion(Protein protein) //We are only getting fully specific peptides of the maximum cleaved residues here
        {
            List <ProteolyticPeptide> peptides          = new List <ProteolyticPeptide>();
            List <int> oneBasedIndicesToCleaveAfter     = Protease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included)
            int        maximumMissedCleavagesIndexShift = MaximumMissedCleavages + 1;

            //it's possible not to go through this loop (maxMissedCleavages+1>number of indexes), and that's okay. It will get digested in the next loops (finish C/N termini)
            for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift; i++)
            {
                bool retain = Protease.Retain(i, InitiatorMethionineBehavior, protein[0]);
                if (retain) //it's okay to use i instead of oneBasedIndicesToCleaveAfter[i], because the index of zero is zero and it only checks if it's the N-terminus or not
                {
                    int peptideLength = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift] - oneBasedIndicesToCleaveAfter[i];
                    if (peptideLength >= MinPeptideLength)     //if bigger than min
                    {
                        if (peptideLength <= MaxPeptideLength) //if an acceptable length (bigger than min, smaller than max), add it
                        {
                            peptides.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift],
                                                                MaximumMissedCleavages, CleavageSpecificity.Full, "full"));
                        }
                        else if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N) //make something with the maximum length and fixed N
                        {
                            int startIndex = oneBasedIndicesToCleaveAfter[i];
                            peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, startIndex + MaxPeptideLength, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                        }
                        else //It has to be FragmentationTerminus.C //make something with the maximum length and fixed C
                        {
                            int endIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift];
                            peptides.Add(new ProteolyticPeptide(protein, endIndex - MaxPeptideLength + 1, endIndex, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                        }
                    }
                }

                if (Protease.Cleave(i, InitiatorMethionineBehavior, protein[0]) && (DigestionParams.FragmentationTerminus == FragmentationTerminus.N || !retain)) //it's okay to use i instead of oneBasedIndicesToCleaveAfter[i], because the index of zero is zero and it only checks if it's the N-terminus or not
                {
                    int peptideLength = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift] - 1;
                    if (peptideLength >= MinPeptideLength)
                    {
                        if (peptideLength <= MaxPeptideLength)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift], //two is hardcoded, since M=1, so the next aa is 2 (one based)
                                                                MaximumMissedCleavages, CleavageSpecificity.Full, "full:M cleaved"));
                        }
                        else if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, 2, 2 + MaxPeptideLength - 1, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                        }
                        else //It has to be FragmentationTerminus.C //make something with the maximum length and fixed C
                        {
                            //kinda tricky, because we'll be creating a duplication if cleavage is variable
                            if (!Protease.Retain(i, InitiatorMethionineBehavior, protein[0])) //only if cleave, because then not made earlier during retain
                            {
                                int tempIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift];
                                peptides.Add(new ProteolyticPeptide(protein, tempIndex - MaxPeptideLength + 1, tempIndex, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi"));
                            }
                        }
                    }
                }
            }

            //wrap up the termini that weren't hit earlier
            int lastIndex          = oneBasedIndicesToCleaveAfter.Count - 1;                                  //last cleavage index (the c-terminus)
            int maxIndexDifference = MaximumMissedCleavages < lastIndex ? MaximumMissedCleavages : lastIndex; //the number of index differences allowed.
            //If the protein has fewer cleavage sites than allowed missed cleavages, just use the number of cleavage sites (lastIndex)
            bool nTerminusFragmentation = DigestionParams.FragmentationTerminus == FragmentationTerminus.N;

            for (int i = 1; i <= maxIndexDifference; i++) //i is the difference (in indexes) between indexes (cleavages), so it needs to start at 1, or the peptide would have length = 0
            {
                int startIndex = nTerminusFragmentation ?
                                 oneBasedIndicesToCleaveAfter[lastIndex - i] :
                                 oneBasedIndicesToCleaveAfter[0];
                int endIndex = nTerminusFragmentation ?
                               oneBasedIndicesToCleaveAfter[lastIndex] :
                               oneBasedIndicesToCleaveAfter[i];

                int peptideLength = endIndex - startIndex;
                if (peptideLength >= MinPeptideLength)
                {
                    if (peptideLength <= MaxPeptideLength) //if okay length, add it up to the terminus
                    {
                        peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endIndex, i - 1, CleavageSpecificity.Full, "full"));
                    }
                    else //update so that not the end of terminus
                    {
                        if (nTerminusFragmentation)
                        {
                            endIndex = startIndex + MaxPeptideLength;
                        }
                        else
                        {
                            startIndex = endIndex - MaxPeptideLength;
                        }
                        peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endIndex, i - 1, CleavageSpecificity.Semi, "semi"));
                    }
                }
            }

            // Also digest using the proteolysis product start/end indices
            foreach (ProteolysisProduct product in protein.ProteolysisProducts)
            {
                //if fixed N, we care if the start position is novel
                if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N)
                {
                    //if has value and not a duplicate
                    if (product.OneBasedBeginPosition.HasValue && !oneBasedIndicesToCleaveAfter.Contains(product.OneBasedBeginPosition.Value - 1))
                    {
                        int proteaseClevageIndex = 0;

                        //get the first cleavage index after the start of the proteolysis product
                        while (oneBasedIndicesToCleaveAfter[proteaseClevageIndex] < product.OneBasedBeginPosition.Value)
                        {
                            proteaseClevageIndex++;
                        }
                        //add max missed cleavages
                        proteaseClevageIndex += MaximumMissedCleavages;

                        //set to the end if we overshot
                        if (proteaseClevageIndex >= oneBasedIndicesToCleaveAfter.Count)
                        {
                            proteaseClevageIndex = oneBasedIndicesToCleaveAfter.Count - 1;
                        }
                        int endIndex = oneBasedIndicesToCleaveAfter[proteaseClevageIndex];

                        //set to product end value if cleavages extend past
                        if (product.OneBasedEndPosition.HasValue && product.OneBasedEndPosition.Value < endIndex)
                        {
                            endIndex = product.OneBasedEndPosition.Value;
                        }

                        //limit length to the maximum allowed if necessary
                        if (endIndex - product.OneBasedBeginPosition.Value >= MaxPeptideLength)
                        {
                            endIndex = product.OneBasedBeginPosition.Value + MaxPeptideLength - 1;
                        }

                        //if it's bigger than the minimum allowed, then add it
                        if (endIndex - product.OneBasedBeginPosition.Value + 1 >= MinPeptideLength)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, product.OneBasedBeginPosition.Value, endIndex, MaximumMissedCleavages, CleavageSpecificity.Full, product.Type + " start"));
                        }
                    }
                }
                else //if fixed C, we care if the end position is novel
                {
                    //if has value and not a duplicate
                    if (product.OneBasedEndPosition.HasValue && !oneBasedIndicesToCleaveAfter.Contains(product.OneBasedEndPosition.Value))
                    {
                        int proteaseClevageIndex = 0;

                        //get the first cleavage index after the start of the proteolysis product
                        while (oneBasedIndicesToCleaveAfter[proteaseClevageIndex] < product.OneBasedEndPosition.Value)
                        {
                            proteaseClevageIndex++;
                        }
                        //subtract max missed cleavages
                        proteaseClevageIndex -= (MaximumMissedCleavages + 1); //+1 because we overshot in the while loop

                        //set to the beginning if we overshot
                        if (proteaseClevageIndex < 0)
                        {
                            proteaseClevageIndex = 0;
                        }
                        int beginIndex = oneBasedIndicesToCleaveAfter[proteaseClevageIndex] + 1;

                        //set to product end value if cleavages extend past
                        if (product.OneBasedBeginPosition.HasValue && product.OneBasedBeginPosition.Value > beginIndex)
                        {
                            beginIndex = product.OneBasedBeginPosition.Value;
                        }

                        //limit length to the maximum allowed if necessary
                        if (product.OneBasedEndPosition.Value - beginIndex >= MaxPeptideLength)
                        {
                            beginIndex = product.OneBasedEndPosition.Value - MaxPeptideLength + 1;
                        }
                        //if it's bigger than the minimum allowed, then add it
                        if (product.OneBasedEndPosition.Value - beginIndex + 1 >= MinPeptideLength)
                        {
                            peptides.Add(new ProteolyticPeptide(protein, beginIndex, product.OneBasedEndPosition.Value, MaximumMissedCleavages, CleavageSpecificity.Full, product.Type + " start"));
                        }
                    }
                }
            }

            return(peptides);
        }