예제 #1
0
        /// <summary>
        /// Adjusts modification indices.
        /// </summary>
        /// <param name="variant"></param>
        /// <param name="modificationDictionary"></param>
        /// <returns></returns>
        internal Dictionary <int, List <Modification> > AdjustModificationIndices(SequenceVariation variant, IDictionary <int, List <Modification> > modificationDictionary)
        {
            Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            if (modificationDictionary == null)
            {
                return(mods);
            }
            int sequenceLengthChange = variant.VariantSequence.Length - variant.OriginalSequence.Length;

            foreach (KeyValuePair <int, List <Modification> > kv in modificationDictionary)
            {
                if (variant.OneBasedBeginPosition > kv.Key)
                {
                    mods.Add(kv.Key, kv.Value);
                }
                else if (variant.OneBasedEndPosition < kv.Key && kv.Key + sequenceLengthChange <= BaseSequence.Length)
                {
                    mods.Add(kv.Key + sequenceLengthChange, kv.Value);
                }
                else // sequence variant conflicts with modification site (modification site substitution)
                {
                    continue;
                }
            }
            return(mods);
        }
예제 #2
0
        public override bool Equals(object obj)
        {
            SequenceVariation s = obj as SequenceVariation;

            return(s != null &&
                   OneBasedBeginPosition == s.OneBasedBeginPosition &&
                   OneBasedEndPosition == s.OneBasedEndPosition &&
                   (s.OriginalSequence == null && OriginalSequence == null || OriginalSequence.Equals(s.OriginalSequence)) &&
                   (s.VariantSequence == null && VariantSequence == null || VariantSequence.Equals(s.VariantSequence)) &&
                   (s.Description == null && Description == null || Description.Equals(s.Description)) &&
                   (s.OneBasedModifications == null && OneBasedModifications == null ||
                    s.OneBasedModifications.Keys.ToList().SequenceEqual(OneBasedModifications.Keys.ToList()) &&
                    s.OneBasedModifications.Values.SelectMany(m => m).ToList().SequenceEqual(OneBasedModifications.Values.SelectMany(m => m).ToList())));
        }
예제 #3
0
        /// <summary>
        /// Adjusts the indices of sequence variations due to applying a single additional variant
        /// </summary>
        /// <param name="variantGettingApplied"></param>
        /// <param name="alreadyAppliedVariations"></param>
        /// <returns></returns>
        internal static List <SequenceVariation> AdjustSequenceVariationIndices(SequenceVariation variantGettingApplied, string variantAppliedProteinSequence, IEnumerable <SequenceVariation> alreadyAppliedVariations)
        {
            List <SequenceVariation> variations = new List <SequenceVariation>();

            if (alreadyAppliedVariations == null)
            {
                return(variations);
            }
            foreach (SequenceVariation v in alreadyAppliedVariations)
            {
                int addedIdx = alreadyAppliedVariations
                               .Where(applied => applied.OneBasedEndPosition < v.OneBasedBeginPosition)
                               .Sum(applied => applied.VariantSequence.Length - applied.OriginalSequence.Length);

                // variant was entirely before the one being applied (shouldn't happen because of order of applying variants)
                // or it's the current variation
                if (v.Description.Equals(variantGettingApplied.Description) || v.OneBasedEndPosition - addedIdx < variantGettingApplied.OneBasedBeginPosition)
                {
                    variations.Add(v);
                }

                // adjust indices based on new included sequence, minding possible overlaps to be filtered later
                else
                {
                    int intersectOneBasedStart = Math.Max(variantGettingApplied.OneBasedBeginPosition, v.OneBasedBeginPosition);
                    int intersectOneBasedEnd   = Math.Min(variantGettingApplied.OneBasedEndPosition, v.OneBasedEndPosition);
                    int overlap = intersectOneBasedEnd < intersectOneBasedStart ? 0 : // no overlap
                                  intersectOneBasedEnd - intersectOneBasedStart + 1;  // there's some overlap
                    int sequenceLengthChange = variantGettingApplied.VariantSequence.Length - variantGettingApplied.OriginalSequence.Length;
                    int begin = v.OneBasedBeginPosition + sequenceLengthChange - overlap;
                    if (begin > variantAppliedProteinSequence.Length)
                    {
                        continue; // cut out by a stop gain
                    }
                    int end = v.OneBasedEndPosition + sequenceLengthChange - overlap;
                    if (end > variantAppliedProteinSequence.Length)
                    {
                        end = variantAppliedProteinSequence.Length; // end shortened by a stop gain
                    }
                    variations.Add(new SequenceVariation(
                                       begin,
                                       end,
                                       v.OriginalSequence,
                                       v.VariantSequence,
                                       v.Description.Description,
                                       v.OneBasedModifications.ToDictionary(kv => kv.Key, kv => kv.Value)));
                }
            }
            return(variations);
        }
예제 #4
0
        /// <summary>
        /// Eliminates proteolysis products that overlap sequence variations.
        /// Since frameshift indels are written across the remaining sequence,
        /// this eliminates proteolysis products that conflict with large deletions and other structural variations.
        /// </summary>
        /// <param name="variants"></param>
        /// <param name="proteolysisProducts"></param>
        /// <returns></returns>
        internal static List <ProteolysisProduct> AdjustProteolysisProductIndices(SequenceVariation variant, string variantAppliedProteinSequence, Protein protein, IEnumerable <ProteolysisProduct> proteolysisProducts)
        {
            List <ProteolysisProduct> products = new List <ProteolysisProduct>();

            if (proteolysisProducts == null)
            {
                return(products);
            }
            int sequenceLengthChange = variant.VariantSequence.Length - variant.OriginalSequence.Length;

            foreach (ProteolysisProduct p in proteolysisProducts.Where(p => p.OneBasedEndPosition.HasValue && p.OneBasedBeginPosition.HasValue))
            {
                // proteolysis product is entirely before the variant
                if (variant.OneBasedBeginPosition > p.OneBasedEndPosition)
                {
                    products.Add(p);
                }
                // proteolysis product straddles the variant, but the cleavage site(s) are still intact; the ends aren't considered cleavage sites
                else if ((p.OneBasedBeginPosition < variant.OneBasedBeginPosition || p.OneBasedBeginPosition == 1 || p.OneBasedBeginPosition == 2) &&
                         (p.OneBasedEndPosition > variant.OneBasedEndPosition || p.OneBasedEndPosition == protein.NonVariantProtein.BaseSequence.Length))
                {
                    if (variant.VariantSequence.EndsWith("*"))
                    {
                        products.Add(new ProteolysisProduct(p.OneBasedBeginPosition, variantAppliedProteinSequence.Length, p.Type));
                    }
                    else if (p.OneBasedEndPosition + sequenceLengthChange <= variantAppliedProteinSequence.Length)
                    {
                        products.Add(new ProteolysisProduct(p.OneBasedBeginPosition, p.OneBasedEndPosition + sequenceLengthChange, p.Type));
                    }
                    else
                    {
                        // cleavage site is not intact
                    }
                }
                // proteolysis product is after the variant and there is no stop gain
                else if (p.OneBasedBeginPosition > variant.OneBasedEndPosition &&
                         p.OneBasedBeginPosition + sequenceLengthChange <= variantAppliedProteinSequence.Length &&
                         p.OneBasedEndPosition + sequenceLengthChange <= variantAppliedProteinSequence.Length &&
                         !variant.VariantSequence.EndsWith("*"))
                {
                    products.Add(new ProteolysisProduct(p.OneBasedBeginPosition + sequenceLengthChange, p.OneBasedEndPosition + sequenceLengthChange, p.Type));
                }
                else // sequence variant conflicts with proteolysis cleavage site (cleavage site was lost)
                {
                    continue;
                }
            }
            return(products);
        }
예제 #5
0
        /// <summary>
        /// Applies a single variant to a protein sequence
        /// </summary>
        /// <param name="variantGettingApplied"></param>
        /// <returns></returns>
        internal static Protein ApplySingleVariant(SequenceVariation variantGettingApplied, Protein protein, string individual)
        {
            string seqBefore  = protein.BaseSequence.Substring(0, variantGettingApplied.OneBasedBeginPosition - 1);
            string seqVariant = variantGettingApplied.VariantSequence;
            int    afterIdx   = variantGettingApplied.OneBasedBeginPosition + variantGettingApplied.OriginalSequence.Length - 1;

            SequenceVariation variantAfterApplication = new SequenceVariation(
                variantGettingApplied.OneBasedBeginPosition,
                variantGettingApplied.OneBasedBeginPosition + variantGettingApplied.VariantSequence.Length - 1,
                variantGettingApplied.OriginalSequence,
                variantGettingApplied.VariantSequence,
                variantGettingApplied.Description.Description,
                variantGettingApplied.OneBasedModifications.ToDictionary(kv => kv.Key, kv => kv.Value));

            // check to see if there is incomplete indel overlap, which would lead to weird variant sequences
            // complete overlap is okay, since it will be overwritten; this can happen if there are two alternate alleles,
            //    e.g. reference sequence is wrong at that point
            bool intersectsAppliedRegionIncompletely          = protein.AppliedSequenceVariations.Any(x => variantGettingApplied.Intersects(x) && !variantGettingApplied.Includes(x));
            IEnumerable <SequenceVariation> appliedVariations = new[] { variantAfterApplication };
            string seqAfter = null;

            if (intersectsAppliedRegionIncompletely)
            {
                // use original protein sequence for the remaining sequence
                seqAfter = protein.BaseSequence.Length - afterIdx <= 0 ? "" : protein.NonVariantProtein.BaseSequence.Substring(afterIdx);
            }
            else
            {
                // use this variant protein sequence for the remaining sequence
                seqAfter          = protein.BaseSequence.Length - afterIdx <= 0 ? "" : protein.BaseSequence.Substring(afterIdx);
                appliedVariations = appliedVariations
                                    .Concat(protein.AppliedSequenceVariations.Where(x => !variantGettingApplied.Includes(x)))
                                    .ToList();
            }
            string variantSequence = (seqBefore + seqVariant + seqAfter).Split('*')[0]; // there may be a stop gained

            // adjust indices
            List <ProteolysisProduct> adjustedProteolysisProducts        = AdjustProteolysisProductIndices(variantGettingApplied, variantSequence, protein, protein.ProteolysisProducts);
            Dictionary <int, List <Modification> > adjustedModifications = AdjustModificationIndices(variantGettingApplied, variantSequence, protein);
            List <SequenceVariation> adjustedAppliedVariations           = AdjustSequenceVariationIndices(variantGettingApplied, variantSequence, appliedVariations);

            return(new Protein(variantSequence, protein, adjustedAppliedVariations, adjustedProteolysisProducts, adjustedModifications, individual));
        }
예제 #6
0
 /// <summary>
 /// Determines whether this interval includes the queried interval
 /// </summary>
 /// <param name="segment"></param>
 /// <returns></returns>
 internal bool Includes(SequenceVariation segment)
 {
     return(OneBasedBeginPosition <= segment.OneBasedBeginPosition && OneBasedEndPosition >= segment.OneBasedEndPosition);
 }
예제 #7
0
 /// <summary>
 /// Determines whether this interval overlaps the queried interval
 /// </summary>
 /// <param name="segment"></param>
 /// <returns></returns>
 internal bool Intersects(SequenceVariation segment)
 {
     return(segment.OneBasedEndPosition >= OneBasedBeginPosition && segment.OneBasedBeginPosition <= OneBasedEndPosition);
 }
예제 #8
0
        /// <summary>
        /// Adjusts modification indices.
        /// </summary>
        /// <param name="variant"></param>
        /// <param name="modificationDictionary"></param>
        /// <returns></returns>
        internal static Dictionary <int, List <Modification> > AdjustModificationIndices(SequenceVariation variant, string variantAppliedProteinSequence, Protein protein)
        {
            IDictionary <int, List <Modification> > modificationDictionary        = protein.OneBasedPossibleLocalizedModifications;
            IDictionary <int, List <Modification> > variantModificationDictionary = variant.OneBasedModifications;
            Dictionary <int, List <Modification> >  mods = new Dictionary <int, List <Modification> >();
            int sequenceLengthChange = variant.VariantSequence.Length - variant.OriginalSequence.Length;

            // change modification indices for variant sequence
            if (modificationDictionary != null)
            {
                foreach (KeyValuePair <int, List <Modification> > kv in modificationDictionary)
                {
                    if (kv.Key > variantAppliedProteinSequence.Length)
                    {
                        continue; // it was cut out by a stop gain
                    }
                    // mod is before the variant
                    else if (kv.Key < variant.OneBasedBeginPosition)
                    {
                        mods.Add(kv.Key, kv.Value);
                    }
                    // mod is after the variant and not affected by a stop gain
                    else if (variant.OneBasedEndPosition < kv.Key && kv.Key + sequenceLengthChange <= variantAppliedProteinSequence.Length)
                    {
                        mods.Add(kv.Key + sequenceLengthChange, kv.Value);
                    }
                    else // sequence variant conflicts with modification site (modification site substitution)
                    {
                        continue;
                    }
                }
            }

            // sequence variant modifications are indexed to the variant sequence
            //    NOTE: this code assumes variants are added from end to beginning of protein, so that previously added variant mods are adjusted above
            if (variantModificationDictionary != null)
            {
                foreach (var kv in variantModificationDictionary)
                {
                    if (mods.TryGetValue(kv.Key, out var modsAtPos))
                    {
                        modsAtPos.AddRange(kv.Value);
                    }
                    else
                    {
                        mods.Add(kv.Key, kv.Value);
                    }
                }
            }

            return(mods);
        }
예제 #9
0
 /// <summary>
 /// Determines if the modification falls on a variant amino acid
 /// </summary>
 /// <param name="protein"></param>
 /// <param name=""></param>
 /// <returns></returns>
 public static bool IsSequenceVariantModification(SequenceVariation appliedVariant, int variantProteinIndex)
 {
     return(appliedVariant != null && appliedVariant.Includes(variantProteinIndex));
 }