/// <summary> /// The only constructor for the CrossLink object requires 1 or 2 peptides, the mass of the peptides, and the ModType of the cross-link. /// </summary> /// <param name="proteinId">The identifier of the protein of the cross-link.</param> /// <param name="peptideOne">The first peptide of the cross-link.</param> /// <param name="peptideTwo">The second peptide of the cross-link. Can be null if linking the first peptide to itself.</param> /// <param name="mass">The monoisotopic mass of the un-shifted cross-link, in daltons.</param> /// <param name="modType">The mod type of the cross link. See CrossLink.ModType for explaination of mod types.</param> public CrossLink(string proteinId, clsInSilicoDigest.PeptideInfoClass peptideOne, clsInSilicoDigest.PeptideInfoClass peptideTwo, double mass, ModType modType) { this.ProteinId = proteinId; this.PeptideOne = peptideOne; this.PeptideTwo = peptideTwo; this.Mass = mass; this.ModType = modType; this.MassShiftList = new List<double>(); if (peptideOne != null) this.MassShiftList.Add(CrossLinkUtil.CalculateMassShift(peptideOne.SequenceOneLetter)); if (peptideTwo != null) this.MassShiftList.Add(CrossLinkUtil.CalculateMassShift(peptideTwo.SequenceOneLetter)); }
/// <summary> /// Given a protein sequence, a collection of peptides will be returned. /// Assumes conventional trypsin digest and max 1 missed cleavage. /// </summary> /// <param name="proteinSequence">The protein sequence to digest.</param> /// <param name="digestionRule">Determines which digestion rule to use (fully tryptic, partially tryptic, no rules).</param> /// <param name="maxMissedCleavages">The maximum number of missed cleavages to consider.</param> /// <returns>An IEnumerable of Peptide objects.</returns> public static IEnumerable<clsInSilicoDigest.PeptideInfoClass> DigestProtein(string proteinSequence, clsInSilicoDigest.CleavageRuleConstants digestionRule, int maxMissedCleavages) { clsParseProteinFile parseProteinFile = new clsParseProteinFile { AssumeFastaFile = true, AssumeDelimitedFile = false, ComputeProteinMass = true, CreateProteinOutputFile = false, CreateDigestedProteinOutputFile = false, GenerateUniqueIDValuesForPeptides = true }; clsInSilicoDigest.PeptideInfoClass[] peptideArray = new clsInSilicoDigest.PeptideInfoClass[1]; clsInSilicoDigest.DigestionOptionsClass digestionOptions = new clsInSilicoDigest.DigestionOptionsClass { CleavageRuleID = digestionRule, MaxMissedCleavages = maxMissedCleavages }; parseProteinFile.DigestProteinSequence(proteinSequence, ref peptideArray, digestionOptions, "xlinkProt"); return peptideArray; }
/// <summary> /// Finds all theoretical cross links given 2 peptides and a protein sequence. /// </summary> /// <param name="proteinId">The identifier of the protein used for cross linking.</param> /// <param name="firstPeptide">The first peptide used for cross linking.</param> /// <param name="secondPeptide">The second peptide used for cross linking. null if linking the first peptide to itself.</param> /// <param name="proteinSequence">Protein sequence used.</param> /// <returns>An IEnumerable of CrossLink objects.</returns> private static IEnumerable<CrossLink> FindCrossLinks(string proteinId, clsInSilicoDigest.PeptideInfoClass firstPeptide, clsInSilicoDigest.PeptideInfoClass secondPeptide, string proteinSequence) { var crossLinkList = new List<CrossLink>(); // If 1 peptide if (secondPeptide == null) { // Create cross-link for unmodified peptide crossLinkList.Add(new CrossLink(proteinId, firstPeptide, null, firstPeptide.Mass, ModType.None)); // Check for inter-linked peptides (will not always find) var peptideString = firstPeptide.SequenceOneLetter; if (peptideString.Last() == 'K') { // Remove last character if it is a K peptideString = peptideString.Substring(0, peptideString.Length - 1); } // Count the number of cross-link characters in the sequence var numCrossLinkCharacters = peptideString.Count(m_crossLinkCharacters.Contains); // If we are dealing with the peptide located at the very beginning of the protein sequence, then pretend we have an extra Lysine since we can cross-link with the first amino acid if (proteinSequence.StartsWith(peptideString)) { numCrossLinkCharacters++; } // If 0 Lysines are found, then we are done if (numCrossLinkCharacters == 0) return crossLinkList; // Type 0 for (var i = 1; i <= numCrossLinkCharacters; i++) { var modifiedMass = firstPeptide.Mass + (i * CrossLinkConstants.DEAD_END_MASS); crossLinkList.Add(new CrossLink(proteinId, firstPeptide, null, modifiedMass, ModType.Zero)); } // Type 1 if (numCrossLinkCharacters >= 2) { for (var i = 1; i <= numCrossLinkCharacters - 1; i++) { var modifiedMass = firstPeptide.Mass + (i * CrossLinkConstants.LINKER_MASS); crossLinkList.Add(new CrossLink(proteinId, firstPeptide, null, modifiedMass, ModType.One)); } } // Type 0 and Type 1 mix if (numCrossLinkCharacters >= 3) { for (var i = 1; i <= numCrossLinkCharacters / 2; i++) { var numLysinesLeft = numCrossLinkCharacters - (i * 2); for (var j = 1; j <= numLysinesLeft; j++) { var modifiedMass = firstPeptide.Mass + (i * CrossLinkConstants.LINKER_MASS) + (j * CrossLinkConstants.DEAD_END_MASS); crossLinkList.Add(new CrossLink(proteinId, firstPeptide, null, modifiedMass, ModType.ZeroOne)); } } } } // If 2 peptides else { // First strip the last character from both peptide sequences if it is K, otherwise, leave it alone var firstPeptideString = firstPeptide.SequenceOneLetter; var secondPeptideString = secondPeptide.SequenceOneLetter; if (firstPeptideString.Last() == 'K') { firstPeptideString = firstPeptideString.Substring(0, firstPeptideString.Length - 1); } if (secondPeptideString.Last() == 'K') { secondPeptideString = secondPeptideString.Substring(0, secondPeptideString.Length - 1); } // Then count the number of Cross-Link characters in each sequence var firstPeptideNumCrossLinkCharacters = firstPeptideString.Count(m_crossLinkCharacters.Contains); var secondPeptideNumCrossLinkCharacters = secondPeptideString.Count(m_crossLinkCharacters.Contains); // If we are dealing with the peptide located at the very beginning of the protein sequence, then pretend we have an extra Lysine since we can cross-link with the first amino acid if (proteinSequence.StartsWith(firstPeptideString)) { firstPeptideNumCrossLinkCharacters++; } if (proteinSequence.StartsWith(secondPeptideString)) { secondPeptideNumCrossLinkCharacters++; } // If either peptide does not have a Lysine, then no cross-link is possible; exit if (firstPeptideNumCrossLinkCharacters == 0 || secondPeptideNumCrossLinkCharacters == 0) { return crossLinkList; } // Add up the number of Lysines var numLysines = firstPeptideNumCrossLinkCharacters + secondPeptideNumCrossLinkCharacters; for (var i = 1; i <= numLysines / 2; i++) { var numLysinesLeft = numLysines - (i * 2); for (var j = 0; j <= numLysinesLeft; j++) { var modifiedMass = firstPeptide.Mass + secondPeptide.Mass + (i * CrossLinkConstants.LINKER_MASS) + (j * CrossLinkConstants.DEAD_END_MASS); // Type 2 if (j == 0) { crossLinkList.Add(new CrossLink(proteinId, firstPeptide, secondPeptide, modifiedMass, ModType.Two)); } // Type 2 and Type 0 mix else { crossLinkList.Add(new CrossLink(proteinId, firstPeptide, secondPeptide, modifiedMass, ModType.ZeroTwo)); } } } } return crossLinkList; }