/// <summary> /// Adjusts modification indices. /// </summary> /// <param name="variant"></param> /// <param name="modificationDictionary"></param> /// <returns></returns> internal Dictionary <int, List <Modification> > AdjustModificationIndices(SequenceVariation variant, IDictionary <int, List <Modification> > modificationDictionary) { Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); if (modificationDictionary == null) { return(mods); } int sequenceLengthChange = variant.VariantSequence.Length - variant.OriginalSequence.Length; foreach (KeyValuePair <int, List <Modification> > kv in modificationDictionary) { if (variant.OneBasedBeginPosition > kv.Key) { mods.Add(kv.Key, kv.Value); } else if (variant.OneBasedEndPosition < kv.Key && kv.Key + sequenceLengthChange <= BaseSequence.Length) { mods.Add(kv.Key + sequenceLengthChange, kv.Value); } else // sequence variant conflicts with modification site (modification site substitution) { continue; } } return(mods); }
public override bool Equals(object obj) { SequenceVariation s = obj as SequenceVariation; return(s != null && OneBasedBeginPosition == s.OneBasedBeginPosition && OneBasedEndPosition == s.OneBasedEndPosition && (s.OriginalSequence == null && OriginalSequence == null || OriginalSequence.Equals(s.OriginalSequence)) && (s.VariantSequence == null && VariantSequence == null || VariantSequence.Equals(s.VariantSequence)) && (s.Description == null && Description == null || Description.Equals(s.Description)) && (s.OneBasedModifications == null && OneBasedModifications == null || s.OneBasedModifications.Keys.ToList().SequenceEqual(OneBasedModifications.Keys.ToList()) && s.OneBasedModifications.Values.SelectMany(m => m).ToList().SequenceEqual(OneBasedModifications.Values.SelectMany(m => m).ToList()))); }
/// <summary> /// Adjusts the indices of sequence variations due to applying a single additional variant /// </summary> /// <param name="variantGettingApplied"></param> /// <param name="alreadyAppliedVariations"></param> /// <returns></returns> internal static List <SequenceVariation> AdjustSequenceVariationIndices(SequenceVariation variantGettingApplied, string variantAppliedProteinSequence, IEnumerable <SequenceVariation> alreadyAppliedVariations) { List <SequenceVariation> variations = new List <SequenceVariation>(); if (alreadyAppliedVariations == null) { return(variations); } foreach (SequenceVariation v in alreadyAppliedVariations) { int addedIdx = alreadyAppliedVariations .Where(applied => applied.OneBasedEndPosition < v.OneBasedBeginPosition) .Sum(applied => applied.VariantSequence.Length - applied.OriginalSequence.Length); // variant was entirely before the one being applied (shouldn't happen because of order of applying variants) // or it's the current variation if (v.Description.Equals(variantGettingApplied.Description) || v.OneBasedEndPosition - addedIdx < variantGettingApplied.OneBasedBeginPosition) { variations.Add(v); } // adjust indices based on new included sequence, minding possible overlaps to be filtered later else { int intersectOneBasedStart = Math.Max(variantGettingApplied.OneBasedBeginPosition, v.OneBasedBeginPosition); int intersectOneBasedEnd = Math.Min(variantGettingApplied.OneBasedEndPosition, v.OneBasedEndPosition); int overlap = intersectOneBasedEnd < intersectOneBasedStart ? 0 : // no overlap intersectOneBasedEnd - intersectOneBasedStart + 1; // there's some overlap int sequenceLengthChange = variantGettingApplied.VariantSequence.Length - variantGettingApplied.OriginalSequence.Length; int begin = v.OneBasedBeginPosition + sequenceLengthChange - overlap; if (begin > variantAppliedProteinSequence.Length) { continue; // cut out by a stop gain } int end = v.OneBasedEndPosition + sequenceLengthChange - overlap; if (end > variantAppliedProteinSequence.Length) { end = variantAppliedProteinSequence.Length; // end shortened by a stop gain } variations.Add(new SequenceVariation( begin, end, v.OriginalSequence, v.VariantSequence, v.Description.Description, v.OneBasedModifications.ToDictionary(kv => kv.Key, kv => kv.Value))); } } return(variations); }
/// <summary> /// Eliminates proteolysis products that overlap sequence variations. /// Since frameshift indels are written across the remaining sequence, /// this eliminates proteolysis products that conflict with large deletions and other structural variations. /// </summary> /// <param name="variants"></param> /// <param name="proteolysisProducts"></param> /// <returns></returns> internal static List <ProteolysisProduct> AdjustProteolysisProductIndices(SequenceVariation variant, string variantAppliedProteinSequence, Protein protein, IEnumerable <ProteolysisProduct> proteolysisProducts) { List <ProteolysisProduct> products = new List <ProteolysisProduct>(); if (proteolysisProducts == null) { return(products); } int sequenceLengthChange = variant.VariantSequence.Length - variant.OriginalSequence.Length; foreach (ProteolysisProduct p in proteolysisProducts.Where(p => p.OneBasedEndPosition.HasValue && p.OneBasedBeginPosition.HasValue)) { // proteolysis product is entirely before the variant if (variant.OneBasedBeginPosition > p.OneBasedEndPosition) { products.Add(p); } // proteolysis product straddles the variant, but the cleavage site(s) are still intact; the ends aren't considered cleavage sites else if ((p.OneBasedBeginPosition < variant.OneBasedBeginPosition || p.OneBasedBeginPosition == 1 || p.OneBasedBeginPosition == 2) && (p.OneBasedEndPosition > variant.OneBasedEndPosition || p.OneBasedEndPosition == protein.NonVariantProtein.BaseSequence.Length)) { if (variant.VariantSequence.EndsWith("*")) { products.Add(new ProteolysisProduct(p.OneBasedBeginPosition, variantAppliedProteinSequence.Length, p.Type)); } else if (p.OneBasedEndPosition + sequenceLengthChange <= variantAppliedProteinSequence.Length) { products.Add(new ProteolysisProduct(p.OneBasedBeginPosition, p.OneBasedEndPosition + sequenceLengthChange, p.Type)); } else { // cleavage site is not intact } } // proteolysis product is after the variant and there is no stop gain else if (p.OneBasedBeginPosition > variant.OneBasedEndPosition && p.OneBasedBeginPosition + sequenceLengthChange <= variantAppliedProteinSequence.Length && p.OneBasedEndPosition + sequenceLengthChange <= variantAppliedProteinSequence.Length && !variant.VariantSequence.EndsWith("*")) { products.Add(new ProteolysisProduct(p.OneBasedBeginPosition + sequenceLengthChange, p.OneBasedEndPosition + sequenceLengthChange, p.Type)); } else // sequence variant conflicts with proteolysis cleavage site (cleavage site was lost) { continue; } } return(products); }
/// <summary> /// Applies a single variant to a protein sequence /// </summary> /// <param name="variantGettingApplied"></param> /// <returns></returns> internal static Protein ApplySingleVariant(SequenceVariation variantGettingApplied, Protein protein, string individual) { string seqBefore = protein.BaseSequence.Substring(0, variantGettingApplied.OneBasedBeginPosition - 1); string seqVariant = variantGettingApplied.VariantSequence; int afterIdx = variantGettingApplied.OneBasedBeginPosition + variantGettingApplied.OriginalSequence.Length - 1; SequenceVariation variantAfterApplication = new SequenceVariation( variantGettingApplied.OneBasedBeginPosition, variantGettingApplied.OneBasedBeginPosition + variantGettingApplied.VariantSequence.Length - 1, variantGettingApplied.OriginalSequence, variantGettingApplied.VariantSequence, variantGettingApplied.Description.Description, variantGettingApplied.OneBasedModifications.ToDictionary(kv => kv.Key, kv => kv.Value)); // check to see if there is incomplete indel overlap, which would lead to weird variant sequences // complete overlap is okay, since it will be overwritten; this can happen if there are two alternate alleles, // e.g. reference sequence is wrong at that point bool intersectsAppliedRegionIncompletely = protein.AppliedSequenceVariations.Any(x => variantGettingApplied.Intersects(x) && !variantGettingApplied.Includes(x)); IEnumerable <SequenceVariation> appliedVariations = new[] { variantAfterApplication }; string seqAfter = null; if (intersectsAppliedRegionIncompletely) { // use original protein sequence for the remaining sequence seqAfter = protein.BaseSequence.Length - afterIdx <= 0 ? "" : protein.NonVariantProtein.BaseSequence.Substring(afterIdx); } else { // use this variant protein sequence for the remaining sequence seqAfter = protein.BaseSequence.Length - afterIdx <= 0 ? "" : protein.BaseSequence.Substring(afterIdx); appliedVariations = appliedVariations .Concat(protein.AppliedSequenceVariations.Where(x => !variantGettingApplied.Includes(x))) .ToList(); } string variantSequence = (seqBefore + seqVariant + seqAfter).Split('*')[0]; // there may be a stop gained // adjust indices List <ProteolysisProduct> adjustedProteolysisProducts = AdjustProteolysisProductIndices(variantGettingApplied, variantSequence, protein, protein.ProteolysisProducts); Dictionary <int, List <Modification> > adjustedModifications = AdjustModificationIndices(variantGettingApplied, variantSequence, protein); List <SequenceVariation> adjustedAppliedVariations = AdjustSequenceVariationIndices(variantGettingApplied, variantSequence, appliedVariations); return(new Protein(variantSequence, protein, adjustedAppliedVariations, adjustedProteolysisProducts, adjustedModifications, individual)); }
/// <summary> /// Determines whether this interval includes the queried interval /// </summary> /// <param name="segment"></param> /// <returns></returns> internal bool Includes(SequenceVariation segment) { return(OneBasedBeginPosition <= segment.OneBasedBeginPosition && OneBasedEndPosition >= segment.OneBasedEndPosition); }
/// <summary> /// Determines whether this interval overlaps the queried interval /// </summary> /// <param name="segment"></param> /// <returns></returns> internal bool Intersects(SequenceVariation segment) { return(segment.OneBasedEndPosition >= OneBasedBeginPosition && segment.OneBasedBeginPosition <= OneBasedEndPosition); }
/// <summary> /// Adjusts modification indices. /// </summary> /// <param name="variant"></param> /// <param name="modificationDictionary"></param> /// <returns></returns> internal static Dictionary <int, List <Modification> > AdjustModificationIndices(SequenceVariation variant, string variantAppliedProteinSequence, Protein protein) { IDictionary <int, List <Modification> > modificationDictionary = protein.OneBasedPossibleLocalizedModifications; IDictionary <int, List <Modification> > variantModificationDictionary = variant.OneBasedModifications; Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); int sequenceLengthChange = variant.VariantSequence.Length - variant.OriginalSequence.Length; // change modification indices for variant sequence if (modificationDictionary != null) { foreach (KeyValuePair <int, List <Modification> > kv in modificationDictionary) { if (kv.Key > variantAppliedProteinSequence.Length) { continue; // it was cut out by a stop gain } // mod is before the variant else if (kv.Key < variant.OneBasedBeginPosition) { mods.Add(kv.Key, kv.Value); } // mod is after the variant and not affected by a stop gain else if (variant.OneBasedEndPosition < kv.Key && kv.Key + sequenceLengthChange <= variantAppliedProteinSequence.Length) { mods.Add(kv.Key + sequenceLengthChange, kv.Value); } else // sequence variant conflicts with modification site (modification site substitution) { continue; } } } // sequence variant modifications are indexed to the variant sequence // NOTE: this code assumes variants are added from end to beginning of protein, so that previously added variant mods are adjusted above if (variantModificationDictionary != null) { foreach (var kv in variantModificationDictionary) { if (mods.TryGetValue(kv.Key, out var modsAtPos)) { modsAtPos.AddRange(kv.Value); } else { mods.Add(kv.Key, kv.Value); } } } return(mods); }
/// <summary> /// Determines if the modification falls on a variant amino acid /// </summary> /// <param name="protein"></param> /// <param name=""></param> /// <returns></returns> public static bool IsSequenceVariantModification(SequenceVariation appliedVariant, int variantProteinIndex) { return(appliedVariant != null && appliedVariant.Includes(variantProteinIndex)); }