/// <summary> /// Makes the string representing a detected sequence variation, including any modifications on a variant amino acid. /// takes in the variant as well as the bool value of wheter the peptid eintersects the variant. (this allows for identified /// variants that cause the cleavage site for the peptide. /// </summary> /// <param name="p"></param> /// <param name="d"></param> /// <returns></returns> public string SequenceVariantString(SequenceVariation applied, bool intersects) { if (intersects == true) { bool startAtNTerm = applied.OneBasedBeginPosition == 1 && OneBasedStartResidueInProtein == 1; bool onlyPeptideStartAtNTerm = OneBasedStartResidueInProtein == 1 && applied.OneBasedBeginPosition != 1; int modResidueScale = 0; if (startAtNTerm) { modResidueScale = 1; } else if (onlyPeptideStartAtNTerm) { modResidueScale = 2; } else { modResidueScale = 3; } int lengthDiff = applied.VariantSequence.Length - applied.OriginalSequence.Length; var modsOnVariantOneIsNTerm = AllModsOneIsNterminus .Where(kv => kv.Key == 1 && applied.OneBasedBeginPosition == 1 || applied.OneBasedBeginPosition <= kv.Key - 2 + OneBasedStartResidueInProtein && kv.Key - 2 + OneBasedStartResidueInProtein <= applied.OneBasedEndPosition) .ToDictionary(kv => kv.Key - applied.OneBasedBeginPosition + (modResidueScale), kv => kv.Value); PeptideWithSetModifications variantWithAnyMods = new PeptideWithSetModifications(Protein, DigestionParams, applied.OneBasedBeginPosition == 1 ? applied.OneBasedBeginPosition : applied.OneBasedBeginPosition - 1, applied.OneBasedEndPosition, CleavageSpecificityForFdrCategory, PeptideDescription, MissedCleavages, modsOnVariantOneIsNTerm, NumFixedMods); return($"{applied.OriginalSequence}{applied.OneBasedBeginPosition}{variantWithAnyMods.FullSequence.Substring(applied.OneBasedBeginPosition == 1 ? 0 : 1)}"); } //if the variant caused a cleavage site leading the the peptide sequence (variant does not intersect but is identified) else { return($"{applied.OriginalSequence}{ applied.OneBasedBeginPosition}{applied.VariantSequence}"); } }
public PeptideWithSetModifications Localize(int j, double massToLocalize) { var dictWithLocalizedMass = new Dictionary <int, Modification>(AllModsOneIsNterminus); double massOfExistingMod = 0; if (dictWithLocalizedMass.TryGetValue(j + 2, out Modification modToReplace)) { massOfExistingMod = (double)modToReplace.MonoisotopicMass; dictWithLocalizedMass.Remove(j + 2); } dictWithLocalizedMass.Add(j + 2, new Modification(_locationRestriction: "Anywhere.", _monoisotopicMass: massToLocalize + massOfExistingMod)); var peptideWithLocalizedMass = new PeptideWithSetModifications(Protein, _digestionParams, OneBasedStartResidueInProtein, OneBasedEndResidueInProtein, CleavageSpecificityForFdrCategory, PeptideDescription, MissedCleavages, dictWithLocalizedMass, NumFixedMods); return(peptideWithLocalizedMass); }
/// <summary> /// Checks if sequence variant and peptide intersect, also checks if the seuqence variatn can be identified whether they intersect /// or not (ie if the variant causes a cleavage site generating the peptide). Returns a tuple with item 1 being a bool value /// representing if the varaint intersects the peptide and item 2 beign abool that represents if the variatn is identified. /// </summary> /// <param name="pep"></param> /// <param name="appliedVariation"></param> /// <returns></returns> public (bool intersects, bool identifies) IntersectsAndIdentifiesVariation(SequenceVariation appliedVariation) { // does it intersect? //possible locations for variant start site bool VariantStartsBeforePeptide = appliedVariation.OneBasedBeginPosition < OneBasedStartResidueInProtein; bool VariantStartsAtPeptideStart = appliedVariation.OneBasedBeginPosition == OneBasedStartResidueInProtein; bool VariantStartsInsidePeptide = appliedVariation.OneBasedBeginPosition >= OneBasedStartResidueInProtein && appliedVariation.OneBasedBeginPosition < OneBasedEndResidueInProtein; bool VariantStartsAtPeptideEnd = appliedVariation.OneBasedBeginPosition == OneBasedEndResidueInProtein; //possibe locations for variant end stite bool VariantEndsAtPeptideStart = appliedVariation.OneBasedEndPosition == OneBasedStartResidueInProtein; bool VariantEndsInsidePeptide = appliedVariation.OneBasedEndPosition > OneBasedStartResidueInProtein && appliedVariation.OneBasedEndPosition <= OneBasedEndResidueInProtein; bool VariantEndsAtPeptideEnd = appliedVariation.OneBasedEndPosition == OneBasedEndResidueInProtein; bool VariantEndsAfterPeptide = appliedVariation.OneBasedEndPosition > OneBasedEndResidueInProtein; bool intersects = false; bool identifies = false; //start and end combinations that lead to variants being intersected by the peptide sequnce if (VariantStartsBeforePeptide || VariantStartsAtPeptideStart) { if (VariantEndsAtPeptideStart || VariantEndsInsidePeptide || VariantEndsAtPeptideEnd || VariantEndsAfterPeptide) { intersects = true; } } else if (VariantStartsInsidePeptide) { if (VariantEndsInsidePeptide || VariantEndsAfterPeptide || VariantEndsAtPeptideEnd) { intersects = true; } } else if (VariantStartsAtPeptideEnd) { if (VariantEndsAfterPeptide || VariantEndsAtPeptideEnd) { intersects = true; } } if (intersects == true) { int lengthDiff = appliedVariation.VariantSequence.Length - appliedVariation.OriginalSequence.Length; int intersectOneBasedStart = Math.Max(OneBasedStartResidueInProtein, appliedVariation.OneBasedBeginPosition); int intersectOneBasedEnd = Math.Min(OneBasedEndResidueInProtein, appliedVariation.OneBasedEndPosition + lengthDiff); int intersectSize = intersectOneBasedEnd - intersectOneBasedStart + 1; // if the original sequence within the peptide is shorter or longer than the variant sequence within the peptide, there is a sequence change int variantZeroBasedStartInPeptide = intersectOneBasedStart - appliedVariation.OneBasedBeginPosition; bool origSeqIsShort = appliedVariation.OriginalSequence.Length - variantZeroBasedStartInPeptide < intersectSize; bool origSeqIsLong = appliedVariation.OriginalSequence.Length > intersectSize && OneBasedEndResidueInProtein > intersectOneBasedEnd; if (origSeqIsShort || origSeqIsLong) { identifies = true; } else { // crosses the entire variant sequence (needed to identify truncations and certain deletions, like KAAAAAAAAA -> K, but also catches synonymous variations A -> A) bool crossesEntireVariant = intersectSize == appliedVariation.VariantSequence.Length; if (crossesEntireVariant == true) { // is the variant sequence intersecting the peptide different than the original sequence? string originalAtIntersect = appliedVariation.OriginalSequence.Substring(intersectOneBasedStart - appliedVariation.OneBasedBeginPosition, intersectSize); string variantAtIntersect = appliedVariation.VariantSequence.Substring(intersectOneBasedStart - appliedVariation.OneBasedBeginPosition, intersectSize); identifies = originalAtIntersect != variantAtIntersect; } } } //checks to see if the variant causes a cleavage event creating the peptide. This is how a variant can be identified without intersecting //with the peptide itself else { //We need to account for any variants that occur in the protien prior to the variant in question. //This information is used to calculate a scaling factor to calculate the AA that proceeds the peptide seqeunce in the original (variant free) protein List <SequenceVariation> VariantsThatAffectPreviousAAPosition = Protein.AppliedSequenceVariations.Where(v => v.OneBasedEndPosition <= OneBasedStartResidueInProtein).ToList(); int totalLengthDifference = 0; foreach (var variant in VariantsThatAffectPreviousAAPosition) { totalLengthDifference += variant.VariantSequence.Length - variant.OriginalSequence.Length; } //need to determine what the cleavage sites are for the protease used (will allow us to determine if new cleavage sites were made by variant) List <DigestionMotif> proteasesCleavageSites = DigestionParams.Protease.DigestionMotifs; //if the variant ends the AA before the peptide starts then it may have caused c-terminal cleavage //see if the protease used for digestion has C-terminal cleavage sites List <string> cTerminalResidue = proteasesCleavageSites.Where(dm => dm.CutIndex == 1).Select(d => d.InducingCleavage).ToList(); if (appliedVariation.OneBasedEndPosition == (OneBasedStartResidueInProtein - 1)) { if (cTerminalResidue.Count > 0) { // get the AA that proceeds the peptide from the variant protein (AKA the last AA in the variant) PeptideWithSetModifications previousAA_Variant = new PeptideWithSetModifications(Protein, DigestionParams, OneBasedStartResidueInProtein - 1, OneBasedStartResidueInProtein - 1, CleavageSpecificity.Full, "full", 0, AllModsOneIsNterminus, NumFixedMods); // get the AA that proceeds the peptide sequence in the original protein (wihtout any applied variants) PeptideWithSetModifications previousAA_Original = new PeptideWithSetModifications(Protein.NonVariantProtein, DigestionParams, (OneBasedStartResidueInProtein - 1) - totalLengthDifference, (OneBasedStartResidueInProtein - 1) - totalLengthDifference, CleavageSpecificity.Full, "full", 0, AllModsOneIsNterminus, NumFixedMods); bool newSite = cTerminalResidue.Contains(previousAA_Variant.BaseSequence); bool oldSite = cTerminalResidue.Contains(previousAA_Original.BaseSequence); // if the new AA causes a cleavage event, and that cleavage event would not have occurred without the variant then it is identified if (newSite == true && oldSite == false) { identifies = true; } } } //if the variant begins the AA after the peptide ends then it may have caused n-terminal cleavage else if (appliedVariation.OneBasedBeginPosition == (OneBasedEndResidueInProtein + 1)) { //see if the protease used for digestion has N-terminal cleavage sites List <string> nTerminalResidue = proteasesCleavageSites.Where(dm => dm.CutIndex == 0).Select(d => d.InducingCleavage).ToList(); // stop gain variation can create a peptide this checks for this with cTerminal cleavage proteases if (cTerminalResidue.Count > 0) { if (appliedVariation.VariantSequence == "*") { PeptideWithSetModifications lastAAofPeptide = new PeptideWithSetModifications(Protein, DigestionParams, OneBasedEndResidueInProtein, OneBasedEndResidueInProtein, CleavageSpecificity.Full, "full", 0, AllModsOneIsNterminus, NumFixedMods); bool oldSite = cTerminalResidue.Contains(lastAAofPeptide.BaseSequence); if (oldSite == false) { identifies = true; } } } if (nTerminalResidue.Count > 0) { if (Protein.Length >= OneBasedEndResidueInProtein + 1) { //get the AA that follows the peptide sequence fromt he variant protein (AKA the first AA of the varaint) PeptideWithSetModifications nextAA_Variant = new PeptideWithSetModifications(Protein, DigestionParams, OneBasedEndResidueInProtein + 1, OneBasedEndResidueInProtein + 1, CleavageSpecificity.Full, "full", 0, AllModsOneIsNterminus, NumFixedMods); // checks to make sure the original protein has an amino acid following the peptide (an issue with stop loss variants or variatns that add AA after the previous stop residue) // no else statement because if the peptide end residue was the previous protein stop site, there is no way to truly identify the variant. // if the peptide were to extend into the stop loss region then the peptide would intesect the variant and this code block would not be triggered. if (Protein.NonVariantProtein.Length >= OneBasedEndResidueInProtein + 1) { // get the AA that follows the peptide sequence in the original protein (without any applied variants) PeptideWithSetModifications nextAA_Original = new PeptideWithSetModifications(Protein.NonVariantProtein, DigestionParams, (OneBasedEndResidueInProtein + 1) - totalLengthDifference, (OneBasedEndResidueInProtein + 1) - totalLengthDifference, CleavageSpecificity.Full, "full", 0, AllModsOneIsNterminus, NumFixedMods); bool newSite = nTerminalResidue.Contains(nextAA_Variant.BaseSequence); bool oldSite = nTerminalResidue.Contains(nextAA_Original.BaseSequence); // if the new AA causes a cleavage event, and that cleavage event would not have occurred without the variant then it is identified if (newSite == true && oldSite == false) { identifies = true; } } } //for stop gain varations that cause peptide else { // get the AA that follows the peptide sequence in the original protein (without any applied variants) PeptideWithSetModifications nextAA_Original = new PeptideWithSetModifications(Protein.NonVariantProtein, DigestionParams, (OneBasedEndResidueInProtein + 1) - totalLengthDifference, (OneBasedEndResidueInProtein + 1) - totalLengthDifference, CleavageSpecificity.Full, "full", 0, AllModsOneIsNterminus, NumFixedMods); bool oldSite = nTerminalResidue.Contains(nextAA_Original.BaseSequence); // if the new AA causes a cleavage event, and that cleavage event would not have occurred without the variant then it is identified if (oldSite == false) { identifies = true; } } } } } return(intersects, identifies); }