/// <summary> /// Gets peptides for semispecific digestion of a protein /// /// semi-specific search enters here... /// </summary> /// <param name="protein"></param> /// <returns></returns> public IEnumerable <PeptideWithSetModifications> SemiSpecificDigestion(Protein protein) { List <ProteolyticPeptide> intervals = new List <ProteolyticPeptide>(); List <int> oneBasedIndicesToCleaveAfter = Protease.GetDigestionSiteIndices(protein.BaseSequence); for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - MaximumMissedCleavages - 1; i++) { if (Protease.Retain(i, InitiatorMethionineBehavior, protein[0]) && Protease.OkayLength(oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], MinPeptidesLength, MaxPeptidesLength)) { intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1], oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], "semi")); } if (Protease.Cleave(i, InitiatorMethionineBehavior, protein[0]) && Protease.OkayLength(oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - 1, MinPeptidesLength, MaxPeptidesLength)) { intervals.Add(new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1], oneBasedIndicesToCleaveAfter[i + MaximumMissedCleavages + 1] - 1, "semi:M cleaved")); } } int lastIndex = oneBasedIndicesToCleaveAfter.Count - 1; int maxIndex = MaximumMissedCleavages < lastIndex ? MaximumMissedCleavages : lastIndex; for (int i = 1; i <= maxIndex; i++) { if (DigestionParams.TerminusTypeSemiProtease == FragmentationTerminus.N) //tricky, it's N because we want the extra peptide at the C terminus |_ { if (Protease.OkayLength(oneBasedIndicesToCleaveAfter[lastIndex] - oneBasedIndicesToCleaveAfter[lastIndex - i], MinPeptidesLength, MaxPeptidesLength)) { intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[lastIndex - i] + 1, oneBasedIndicesToCleaveAfter[lastIndex], oneBasedIndicesToCleaveAfter[lastIndex] - oneBasedIndicesToCleaveAfter[lastIndex - i], "semiN")); } } else //TerminusType.C { if (Protease.OkayLength(oneBasedIndicesToCleaveAfter[i] - oneBasedIndicesToCleaveAfter[0], MinPeptidesLength, MaxPeptidesLength)) { intervals.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[0] + 1, oneBasedIndicesToCleaveAfter[i], oneBasedIndicesToCleaveAfter[i] - oneBasedIndicesToCleaveAfter[0], "semiC")); } } } // Also digest using the proteolysis product start/end indices intervals.AddRange( protein.ProteolysisProducts .Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue && (proteolysisProduct.OneBasedBeginPosition != 1 || proteolysisProduct.OneBasedEndPosition != protein.Length)) .Select(proteolysisProduct => new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, 0, proteolysisProduct.Type + " start"))); return(intervals.SelectMany(peptide => peptide.GetModifiedPeptides(AllKnownFixedModifications, DigestionParams, VariableModifications))); }
} //used to look for unlabeled proteins (in addition to labeled proteins) for SILAC experiments public override bool Equals(object obj) { return(obj is DigestionParams a && MaxMissedCleavages.Equals(a.MaxMissedCleavages) && MinPeptideLength.Equals(a.MinPeptideLength) && MaxPeptideLength.Equals(a.MaxPeptideLength) && InitiatorMethionineBehavior.Equals(a.InitiatorMethionineBehavior) && MaxModificationIsoforms.Equals(a.MaxModificationIsoforms) && MaxModsForPeptide.Equals(a.MaxModsForPeptide) && Protease.Equals(a.Protease) && SearchModeType.Equals(a.SearchModeType) && FragmentationTerminus.Equals(a.FragmentationTerminus) && GeneratehUnlabeledProteinsForSilac.Equals(a.GeneratehUnlabeledProteinsForSilac)); }
public static Dictionary <string, Protease> LoadProteaseDictionary(string proteasesLocation) { Dictionary <string, Protease> dict = new Dictionary <string, Protease>(); using (StreamReader proteases = new StreamReader(proteasesLocation)) { proteases.ReadLine(); while (proteases.Peek() != -1) { string line = proteases.ReadLine(); string[][] fields = line.Split('\t').Select(x => x.Split('|')).ToArray(); string name = fields[0][0]; string[] preventing; List <Tuple <string, FragmentationTerminus> > sequencesInducingCleavage = new List <Tuple <string, FragmentationTerminus> >(); List <Tuple <string, FragmentationTerminus> > sequencePreventingCleavage = new List <Tuple <string, FragmentationTerminus> >(); for (int i = 0; i < fields[1].Length; i++) { if (!fields[1][i].Equals("")) { sequencesInducingCleavage.Add(new Tuple <string, FragmentationTerminus>(fields[1][i], ((FragmentationTerminus)Enum.Parse(typeof(FragmentationTerminus), fields[3][i], true)))); if (!fields[2].Contains("")) { preventing = (fields[2][i].Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries)); for (int j = 0; j < preventing.Length; j++) { sequencePreventingCleavage.Add(new Tuple <string, FragmentationTerminus>(preventing[j], (FragmentationTerminus)Enum.Parse(typeof(FragmentationTerminus), fields[3][i], true))); } } } } var cleavageSpecificity = ((CleavageSpecificity)Enum.Parse(typeof(CleavageSpecificity), fields[4][0], true)); string psiMsAccessionNumber = fields[5][0]; string psiMsName = fields[6][0]; string siteRegexp = fields[7][0]; var protease = new Protease(name, sequencesInducingCleavage, sequencePreventingCleavage, cleavageSpecificity, psiMsAccessionNumber, psiMsName, siteRegexp); dict.Add(protease.Name, protease); } } return(dict); }
/// <summary> /// Gets peptides for the singleC protease /// </summary> /// <param name="protein"></param> /// <param name="initiatorMethionineBehavior"></param> /// <param name="maximumMissedCleavages"></param> /// <param name="minPeptideLength"></param> /// <param name="maxPeptideLength"></param> /// <param name="specificProtease"></param> /// <returns></returns> private List <ProteolyticPeptide> SingleC_Digestion(Protein protein, InitiatorMethionineBehavior initiatorMethionineBehavior, int maximumMissedCleavages, int minPeptideLength, int maxPeptideLength, Protease specificProtease) { List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>(); int proteinStart = Retain(0, initiatorMethionineBehavior, protein[0]) ? 1 : 2; //where does the protein start? if (Equals(specificProtease)) { int lengthDifference = proteinStart - 1; //take it back one for zero based index for (int proteinEnd = 1; proteinEnd <= protein.Length; proteinEnd++) { //length of peptide will be at least the start index if (OkayMinLength(proteinEnd - lengthDifference, minPeptideLength)) //is the maximum possible length longer than the minimum? { //use the start index as the max of the N-terminus or the c-terminus minus the max (+1 because inclusive, otherwise peptides will be one AA too long) peptides.Add(new ProteolyticPeptide(protein, Math.Max(proteinStart, proteinEnd - maxPeptideLength + 1), proteinEnd, 0, CleavageSpecificity.SingleC, "SingleC")); } } } else //if there's a specific protease, then we need to adhere to the specified missed cleavage rules { //generate only peptides with the maximum number of missed cleavages, unless the protein has fewer than the max or we're near the unselected terminus (where we run to the end of the protein) List <int> oneBasedIndicesToCleaveAfter = specificProtease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included) oneBasedIndicesToCleaveAfter[0] = proteinStart - 1; //update the first cleavage to represent the initiator methionine rules int maximumMissedCleavagesIndexShift = maximumMissedCleavages + 1; for (int i = oneBasedIndicesToCleaveAfter.Count - 1; i > maximumMissedCleavagesIndexShift; i--) { int endProteaseIndex = oneBasedIndicesToCleaveAfter[i]; int startProteaseIndex = oneBasedIndicesToCleaveAfter[i - maximumMissedCleavagesIndexShift]; int peptideLength = endProteaseIndex - startProteaseIndex; if (peptideLength >= minPeptideLength) //if bigger than min { int startActualIndex = startProteaseIndex; if (peptideLength > maxPeptideLength) //if the next cleavage is too far away, crop it to the max length { startActualIndex = endProteaseIndex - maxPeptideLength; } int nextEndIndex = oneBasedIndicesToCleaveAfter[i - 1]; //make SingleC peptides until we reach the next index to cleave at or until the peptides are too small for (; (endProteaseIndex > nextEndIndex) && (endProteaseIndex - startActualIndex >= minPeptideLength); endProteaseIndex--) { peptides.Add(new ProteolyticPeptide(protein, startActualIndex + 1, endProteaseIndex, maximumMissedCleavages, CleavageSpecificity.SingleC, "SingleC")); //update startIndex if needed if (startActualIndex != startProteaseIndex) { startActualIndex--; } } } } //wrap up the terminus //if there are more missed cleavages allowed than there are cleavages to cleave, change the effective number of missed cleavages to the max if (oneBasedIndicesToCleaveAfter.Count <= maximumMissedCleavagesIndexShift) { maximumMissedCleavagesIndexShift = oneBasedIndicesToCleaveAfter.Count - 1; } int lastEndIndex = oneBasedIndicesToCleaveAfter[maximumMissedCleavagesIndexShift]; int startIndex = Math.Max(proteinStart, lastEndIndex - maxPeptideLength + 1); int minPeptideLengthOneBasedResidueShift = minPeptideLength - 1; for (; lastEndIndex >= startIndex + minPeptideLengthOneBasedResidueShift; lastEndIndex--) { peptides.Add(new ProteolyticPeptide(protein, startIndex, lastEndIndex, maximumMissedCleavages, CleavageSpecificity.SingleC, "SingleC")); //update the start if needed if (startIndex != proteinStart) { startIndex--; } } } return(peptides); }
/// <summary> /// Gets peptides for the singleN protease /// </summary> /// <param name="protein"></param> /// <param name="initiatorMethionineBehavior"></param> /// <param name="maximumMissedCleavages"></param> /// <param name="minPeptideLength"></param> /// <param name="maxPeptideLength"></param> /// <param name="specificProtease"></param> /// <returns></returns> private List <ProteolyticPeptide> SingleN_Digestion(Protein protein, InitiatorMethionineBehavior initiatorMethionineBehavior, int maximumMissedCleavages, int minPeptideLength, int maxPeptideLength, Protease specificProtease) { List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>(); int proteinStart = Retain(0, initiatorMethionineBehavior, protein[0]) ? 1 : 2; //where does the protein start? if (Equals(specificProtease)) { bool maxTooBig = protein.Length + maxPeptideLength < 0; //when maxPeptideLength is too large, it becomes negative and causes issues //This happens when maxPeptideLength == int.MaxValue or something close to it for (; proteinStart <= protein.Length; proteinStart++) { if (OkayMinLength(protein.Length - proteinStart + 1, minPeptideLength)) { //need Math.Max if max length is int.MaxLength, since +proteinStart will make it negative //if the max length is too big to be an int (ie infinity), just do the protein length. //if it's not too big to be an int, it might still be too big. Take the minimum of the protein length or the maximum length (-1, because the index is inclusive. Without -1, peptides will be one AA too long) peptides.Add(new ProteolyticPeptide(protein, proteinStart, maxTooBig ? protein.Length : Math.Min(protein.Length, proteinStart + maxPeptideLength - 1), 0, CleavageSpecificity.SingleN, "SingleN")); } } } else //if there's a specific protease, then we need to adhere to the specified missed cleavage rules { //generate only peptides with the maximum number of missed cleavages, unless the protein has fewer than the max or we're near the unselected terminus (where we run to the end of the protein) List <int> oneBasedIndicesToCleaveAfter = specificProtease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included) oneBasedIndicesToCleaveAfter[0] = proteinStart - 1; //update the first cleavage to represent the initiator methionine rules int maximumMissedCleavagesIndexShift = maximumMissedCleavages + 1; for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift; i++) { int startIndex = oneBasedIndicesToCleaveAfter[i]; int endProteaseIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift]; int peptideLength = endProteaseIndex - startIndex; if (peptideLength >= minPeptideLength) //if bigger than min { int endActualIndex = endProteaseIndex; if (peptideLength > maxPeptideLength) //if the next cleavage is too far away, crop it to the max length { endActualIndex = startIndex + maxPeptideLength; } int nextStartIndex = oneBasedIndicesToCleaveAfter[i + 1] + 1; //make SingleN peptides until we reach the next index to cleave at or until the peptides are too small for (; (startIndex + 1 < nextStartIndex) && (endActualIndex - startIndex >= minPeptideLength); startIndex++) { peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endActualIndex, maximumMissedCleavages, CleavageSpecificity.SingleN, "SingleN")); //update endIndex if needed if (endActualIndex != endProteaseIndex) { endActualIndex++; } } } } //wrap up the terminus if (oneBasedIndicesToCleaveAfter.Count < maximumMissedCleavagesIndexShift) { maximumMissedCleavagesIndexShift = oneBasedIndicesToCleaveAfter.Count; } int lastStartIndex = oneBasedIndicesToCleaveAfter[oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift] + 1; int proteinEndIndex = oneBasedIndicesToCleaveAfter[oneBasedIndicesToCleaveAfter.Count - 1]; //end of protein int lastEndIndex = Math.Min(proteinEndIndex, lastStartIndex + maxPeptideLength - 1); //end of protein for (; lastStartIndex + minPeptideLength - 1 <= lastEndIndex; lastStartIndex++) { peptides.Add(new ProteolyticPeptide(protein, lastStartIndex, lastEndIndex, maximumMissedCleavages, CleavageSpecificity.SingleN, "SingleN")); //update the end if needed if (lastEndIndex != proteinEndIndex) { lastEndIndex++; } } } return(peptides); }
/// <summary> /// Gets intervals of a protein sequence that will result from digestion by this protease. /// </summary> /// <param name="protein"></param> /// <param name="maximumMissedCleavages"></param> /// <param name="initiatorMethionineBehavior"></param> /// <param name="minPeptideLength"></param> /// <param name="maxPeptideLength"></param> /// <returns></returns> internal List <ProteolyticPeptide> GetUnmodifiedPeptides(Protein protein, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior, int minPeptideLength, int maxPeptideLength, Protease specificProtease) { List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>(); // proteolytic cleavage in one spot (N) if (CleavageSpecificity == CleavageSpecificity.SingleN) { peptides = SingleN_Digestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength, specificProtease); } // proteolytic cleavage in one spot (C) else if (CleavageSpecificity == CleavageSpecificity.SingleC) { peptides = SingleC_Digestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength, specificProtease); } //top-down else if (CleavageSpecificity == CleavageSpecificity.None) { // retain methionine if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || protein[0] != 'M') && OkayLength(protein.Length, minPeptideLength, maxPeptideLength)) { peptides.Add(new ProteolyticPeptide(protein, 1, protein.Length, 0, CleavageSpecificity.Full, "full")); } // cleave methionine if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Retain && protein[0] == 'M') && OkayLength(protein.Length - 1, minPeptideLength, maxPeptideLength)) { peptides.Add(new ProteolyticPeptide(protein, 2, protein.Length, 0, CleavageSpecificity.Full, "full:M cleaved")); } // Also digest using the proteolysis product start/end indices peptides.AddRange( protein.ProteolysisProducts .Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue && OkayLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength)) .Select(proteolysisProduct => new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, 0, CleavageSpecificity.Full, proteolysisProduct.Type))); } // Full proteolytic cleavage else if (CleavageSpecificity == CleavageSpecificity.Full) { peptides.AddRange(FullDigestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength)); } // Cleavage rules for semi-specific search else if (CleavageSpecificity == CleavageSpecificity.Semi) { peptides.AddRange(SemiProteolyticDigestion(protein, initiatorMethionineBehavior, maximumMissedCleavages, minPeptideLength, maxPeptideLength)); } else { throw new NotImplementedException(); } return(peptides); }
/// <summary> /// Gets peptides for specific protease digestion of a protein /// </summary> /// <param name="protein"></param> /// <returns></returns> public IEnumerable <PeptideWithSetModifications> Digestion(Protein protein) { var intervals = Protease.GetDigestionIntervals(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptidesLength, MaxPeptidesLength); return(intervals.SelectMany(peptide => peptide.GetModifiedPeptides(AllKnownFixedModifications, DigestionParams, VariableModifications))); }
/// <summary> /// Gets peptides for speedy semispecific digestion of a protein /// This generates specific peptides of maximum missed cleavages /// These peptides need to be digested post search to their actual sequences /// semi-specific search enters here... /// </summary> /// <param name="protein"></param> /// <returns></returns> public IEnumerable <ProteolyticPeptide> SpeedySemiSpecificDigestion(Protein protein) //We are only getting fully specific peptides of the maximum cleaved residues here { List <ProteolyticPeptide> peptides = new List <ProteolyticPeptide>(); List <int> oneBasedIndicesToCleaveAfter = Protease.GetDigestionSiteIndices(protein.BaseSequence); //get peptide bonds to cleave SPECIFICALLY (termini included) int maximumMissedCleavagesIndexShift = MaximumMissedCleavages + 1; //it's possible not to go through this loop (maxMissedCleavages+1>number of indexes), and that's okay. It will get digested in the next loops (finish C/N termini) for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - maximumMissedCleavagesIndexShift; i++) { bool retain = Protease.Retain(i, InitiatorMethionineBehavior, protein[0]); if (retain) //it's okay to use i instead of oneBasedIndicesToCleaveAfter[i], because the index of zero is zero and it only checks if it's the N-terminus or not { int peptideLength = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift] - oneBasedIndicesToCleaveAfter[i]; if (peptideLength >= MinPeptideLength) //if bigger than min { if (peptideLength <= MaxPeptideLength) //if an acceptable length (bigger than min, smaller than max), add it { peptides.Add(new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift], MaximumMissedCleavages, CleavageSpecificity.Full, "full")); } else if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N) //make something with the maximum length and fixed N { int startIndex = oneBasedIndicesToCleaveAfter[i]; peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, startIndex + MaxPeptideLength, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi")); } else //It has to be FragmentationTerminus.C //make something with the maximum length and fixed C { int endIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift]; peptides.Add(new ProteolyticPeptide(protein, endIndex - MaxPeptideLength + 1, endIndex, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi")); } } } if (Protease.Cleave(i, InitiatorMethionineBehavior, protein[0]) && (DigestionParams.FragmentationTerminus == FragmentationTerminus.N || !retain)) //it's okay to use i instead of oneBasedIndicesToCleaveAfter[i], because the index of zero is zero and it only checks if it's the N-terminus or not { int peptideLength = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift] - 1; if (peptideLength >= MinPeptideLength) { if (peptideLength <= MaxPeptideLength) { peptides.Add(new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift], //two is hardcoded, since M=1, so the next aa is 2 (one based) MaximumMissedCleavages, CleavageSpecificity.Full, "full:M cleaved")); } else if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N) { peptides.Add(new ProteolyticPeptide(protein, 2, 2 + MaxPeptideLength - 1, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi")); } else //It has to be FragmentationTerminus.C //make something with the maximum length and fixed C { //kinda tricky, because we'll be creating a duplication if cleavage is variable if (!Protease.Retain(i, InitiatorMethionineBehavior, protein[0])) //only if cleave, because then not made earlier during retain { int tempIndex = oneBasedIndicesToCleaveAfter[i + maximumMissedCleavagesIndexShift]; peptides.Add(new ProteolyticPeptide(protein, tempIndex - MaxPeptideLength + 1, tempIndex, MaximumMissedCleavages, CleavageSpecificity.Semi, "semi")); } } } } } //wrap up the termini that weren't hit earlier int lastIndex = oneBasedIndicesToCleaveAfter.Count - 1; //last cleavage index (the c-terminus) int maxIndexDifference = MaximumMissedCleavages < lastIndex ? MaximumMissedCleavages : lastIndex; //the number of index differences allowed. //If the protein has fewer cleavage sites than allowed missed cleavages, just use the number of cleavage sites (lastIndex) bool nTerminusFragmentation = DigestionParams.FragmentationTerminus == FragmentationTerminus.N; for (int i = 1; i <= maxIndexDifference; i++) //i is the difference (in indexes) between indexes (cleavages), so it needs to start at 1, or the peptide would have length = 0 { int startIndex = nTerminusFragmentation ? oneBasedIndicesToCleaveAfter[lastIndex - i] : oneBasedIndicesToCleaveAfter[0]; int endIndex = nTerminusFragmentation ? oneBasedIndicesToCleaveAfter[lastIndex] : oneBasedIndicesToCleaveAfter[i]; int peptideLength = endIndex - startIndex; if (peptideLength >= MinPeptideLength) { if (peptideLength <= MaxPeptideLength) //if okay length, add it up to the terminus { peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endIndex, i - 1, CleavageSpecificity.Full, "full")); } else //update so that not the end of terminus { if (nTerminusFragmentation) { endIndex = startIndex + MaxPeptideLength; } else { startIndex = endIndex - MaxPeptideLength; } peptides.Add(new ProteolyticPeptide(protein, startIndex + 1, endIndex, i - 1, CleavageSpecificity.Semi, "semi")); } } } // Also digest using the proteolysis product start/end indices foreach (ProteolysisProduct product in protein.ProteolysisProducts) { //if fixed N, we care if the start position is novel if (DigestionParams.FragmentationTerminus == FragmentationTerminus.N) { //if has value and not a duplicate if (product.OneBasedBeginPosition.HasValue && !oneBasedIndicesToCleaveAfter.Contains(product.OneBasedBeginPosition.Value - 1)) { int proteaseClevageIndex = 0; //get the first cleavage index after the start of the proteolysis product while (oneBasedIndicesToCleaveAfter[proteaseClevageIndex] < product.OneBasedBeginPosition.Value) { proteaseClevageIndex++; } //add max missed cleavages proteaseClevageIndex += MaximumMissedCleavages; //set to the end if we overshot if (proteaseClevageIndex >= oneBasedIndicesToCleaveAfter.Count) { proteaseClevageIndex = oneBasedIndicesToCleaveAfter.Count - 1; } int endIndex = oneBasedIndicesToCleaveAfter[proteaseClevageIndex]; //set to product end value if cleavages extend past if (product.OneBasedEndPosition.HasValue && product.OneBasedEndPosition.Value < endIndex) { endIndex = product.OneBasedEndPosition.Value; } //limit length to the maximum allowed if necessary if (endIndex - product.OneBasedBeginPosition.Value >= MaxPeptideLength) { endIndex = product.OneBasedBeginPosition.Value + MaxPeptideLength - 1; } //if it's bigger than the minimum allowed, then add it if (endIndex - product.OneBasedBeginPosition.Value + 1 >= MinPeptideLength) { peptides.Add(new ProteolyticPeptide(protein, product.OneBasedBeginPosition.Value, endIndex, MaximumMissedCleavages, CleavageSpecificity.Full, product.Type + " start")); } } } else //if fixed C, we care if the end position is novel { //if has value and not a duplicate if (product.OneBasedEndPosition.HasValue && !oneBasedIndicesToCleaveAfter.Contains(product.OneBasedEndPosition.Value)) { int proteaseClevageIndex = 0; //get the first cleavage index after the start of the proteolysis product while (oneBasedIndicesToCleaveAfter[proteaseClevageIndex] < product.OneBasedEndPosition.Value) { proteaseClevageIndex++; } //subtract max missed cleavages proteaseClevageIndex -= (MaximumMissedCleavages + 1); //+1 because we overshot in the while loop //set to the beginning if we overshot if (proteaseClevageIndex < 0) { proteaseClevageIndex = 0; } int beginIndex = oneBasedIndicesToCleaveAfter[proteaseClevageIndex] + 1; //set to product end value if cleavages extend past if (product.OneBasedBeginPosition.HasValue && product.OneBasedBeginPosition.Value > beginIndex) { beginIndex = product.OneBasedBeginPosition.Value; } //limit length to the maximum allowed if necessary if (product.OneBasedEndPosition.Value - beginIndex >= MaxPeptideLength) { beginIndex = product.OneBasedEndPosition.Value - MaxPeptideLength + 1; } //if it's bigger than the minimum allowed, then add it if (product.OneBasedEndPosition.Value - beginIndex + 1 >= MinPeptideLength) { peptides.Add(new ProteolyticPeptide(protein, beginIndex, product.OneBasedEndPosition.Value, MaximumMissedCleavages, CleavageSpecificity.Full, product.Type + " start")); } } } } return(peptides); }
/// <summary> /// Gets peptides for specific protease digestion of a protein /// </summary> /// <param name="protein"></param> /// <returns></returns> public IEnumerable <ProteolyticPeptide> Digestion(Protein protein) { return(Protease.GetUnmodifiedPeptides(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptideLength, MaxPeptideLength, DigestionParams.SpecificProtease)); }
public static Dictionary <string, Protease> LoadProteaseDictionary(string path, List <Modification> proteaseMods = null) { Dictionary <string, Protease> dict = new Dictionary <string, Protease>(); string[] myLines = File.ReadAllLines(path); myLines = myLines.Skip(1).ToArray(); foreach (string line in myLines) { if (line.Trim() != string.Empty) // skip empty lines { string[] fields = line.Split('\t'); List <DigestionMotif> motifList = DigestionMotif.ParseDigestionMotifsFromString(fields[1]); string name = fields[0]; var cleavageSpecificity = ((CleavageSpecificity)Enum.Parse(typeof(CleavageSpecificity), fields[4], true)); string psiMsAccessionNumber = fields[5]; string psiMsName = fields[6]; //name of the modification that is associated with proteolytic cleavage string proteaseModDetails = fields[8]; //if this protease has an associated modification, look it up in the list of mods loaded fro the protease mods file if (proteaseModDetails != "" && proteaseMods != null) { if (proteaseMods.Select(p => p.IdWithMotif).ToList().Contains(proteaseModDetails)) { Modification proteaseModification = proteaseMods.Where(p => p.IdWithMotif == proteaseModDetails).First(); var protease = new Protease(name, cleavageSpecificity, psiMsAccessionNumber, psiMsName, motifList, proteaseModification); if (!dict.ContainsKey(protease.Name)) { dict.Add(protease.Name, protease); } else { throw new MzLibException("More than one protease named " + protease.Name + " exists"); } } else { var protease = new Protease(name, cleavageSpecificity, psiMsAccessionNumber, psiMsName, motifList); if (!dict.ContainsKey(protease.Name)) { dict.Add(protease.Name, protease); } else { throw new MzLibException("More than one protease named " + protease.Name + " exists"); } throw new MzLibException(proteaseModDetails + " is not a valid modification"); } } else { var protease = new Protease(name, cleavageSpecificity, psiMsAccessionNumber, psiMsName, motifList); if (!dict.ContainsKey(protease.Name)) { dict.Add(protease.Name, protease); } else { throw new MzLibException("More than one protease named " + protease.Name + " exists"); } } } } return(dict); }