Esempio n. 1
0
        /// <summary>
        /// constructor
        /// </summary>
        public HgvsCodingNomenclature(TranscriptAnnotation ta, Transcript transcript, VariantFeature variant,
                                      ICompressedSequence compressedSequence, bool isGenomicDuplicate)
        {
            _ta                 = ta;
            _transcript         = transcript;
            _variant            = variant;
            _compressedSequence = compressedSequence;
            _isGenomicDuplicate = isGenomicDuplicate;

            _sb = new StringBuilder();

            // get reference sequence strand
            var transcriptOnReverseStrand = transcript.Gene.OnReverseStrand;

            // this may be different to the input one for insertions/deletions
            var    altAllele = ta.AlternateAllele;
            string variationFeatureSequence = altAllele.AlternateAllele;

            // get the reverse complement of the vfs if needed
            if (transcriptOnReverseStrand)
            {
                variationFeatureSequence = SequenceUtilities.GetReverseComplement(variationFeatureSequence);
            }

            // calculate the reference start and end
            GetReferenceCoordinates(transcript, altAllele, out _hgvsStart, out _hgvsEnd);

            // decide event type from HGVS nomenclature
            _hgvsNotation = new HgvsNotation(ta.TranscriptReferenceAllele, variationFeatureSequence,
                                             FormatUtilities.CombineIdAndVersion(transcript.Id, transcript.Version), _hgvsStart, _hgvsEnd,
                                             _transcript.Translation != null);
        }
        /// <summary>
        /// returns true if this insertion has the same amino acids preceding it [TranscriptVariationAllele.pm:1494 _check_for_peptide_duplication]
        /// </summary>
        private bool IsAminoAcidDuplicate(HgvsNotation hn, string transcriptPeptides)
        {
            // sanity check: return false if the alternate amino acid is null
            if (hn.AlternateAminoAcids == null)
            {
                return(false);
            }

            var testAminoAcidPos = hn.Start - hn.AlternateAminoAcidsLen - 1;

            if (testAminoAcidPos < 0)
            {
                return(false);
            }

            var precedingAminoAcids = testAminoAcidPos + hn.AlternateAminoAcidsLen <= transcriptPeptides.Length
                ? transcriptPeptides.Substring(testAminoAcidPos, hn.AlternateAminoAcidsLen)
                : "";

            // update our HGVS notation
            if (testAminoAcidPos >= 0 && precedingAminoAcids == hn.AlternateAminoAcids)
            {
                hn.Type   = ProteinChange.Duplication;
                hn.End    = hn.Start - 1;
                hn.Start -= hn.AlternateAminoAcidsLen;
                hn.AlternateAbbreviation = _aminoAcids.GetAbbreviations(hn.AlternateAminoAcids);
                return(true);
            }

            return(false);
        }
 /// <summary>
 /// returns a string with the HGVS representation for either the single position or the ranged position
 /// </summary>
 private static string GetHgvsRangeString(HgvsNotation hn)
 {
     if (hn.Start == hn.End)
     {
         return(hn.ReferenceAbbreviation + hn.Start + hn.AlternateAbbreviation);
     }
     return(hn.ReferenceAbbreviation + hn.Start + '_' + hn.AlternateAbbreviation + hn.End);
 }
        /// <summary>
        /// constructor
        /// </summary>
        public HgvsProteinNomenclature(VariantEffect variantEffect, TranscriptAnnotation ta, Transcript transcript,
                                       VariantFeature variant, ICompressedSequence compressedSequence, AminoAcids aminoAcids)
        {
            _variantEffect      = variantEffect;
            _ta                 = ta;
            _transcript         = transcript;
            _variant            = variant;
            _compressedSequence = compressedSequence;
            _aminoAcids         = aminoAcids;

            _hgvsNotation = new HgvsNotation(_ta.ReferenceAminoAcids, _ta.AlternateAminoAcids,
                                             FormatUtilities.CombineIdAndVersion(_transcript.Translation.ProteinId, _transcript.Translation.ProteinVersion),
                                             _ta.ProteinBegin, _ta.ProteinEnd);
        }
Esempio n. 5
0
        /// <summary>
        /// HGVS aligns changes 3'
        /// e.g. given a ATG/- deletion in C[ATG]ATGT, we want to move to: CATG[ATG]T
        ///      given a   A/- deletion in  TA[A]AAAA, we want to move to:  TAAAAA[A]
        ///      given a  AA/- deletion in  TA[AA]AAA, we want to move to:  TAAAA[AA]
        /// </summary>
        private static void SwapEndpoints(HgvsNotation hn)
        {
            if (hn.Start.Offset == null)
            {
                hn.Start.Offset = 0;
            }
            if (hn.End.Offset == null)
            {
                hn.End.Offset = 0;
            }

            if (!hn.End.HasStopCodonNotation && hn.Start.Position + hn.Start.Offset > hn.End.Position + hn.End.Offset)
            {
                var temp = hn.Start;
                hn.Start = hn.End;
                hn.End   = temp;
            }
        }
Esempio n. 6
0
        /// <summary>
        /// get the genomic change that resulted from this variation [Sequence.pm:482 hgvs_variant_notation]
        /// </summary>
        private void GetGenomicChange(Transcript transcript, HgvsNotation hn, bool isGenomicDuplicate)
        {
            hn.Type = GenomicChange.Unknown;

            // make sure our positions are defined
            if (hn.Start.Position == null || hn.End.Position == null)
            {
                return;
            }

            int displayStart = (int)hn.Start.Position;
            int displayEnd   = (int)hn.End.Position;

            // length of the reference allele. Negative lengths make no sense
            int refLength = displayEnd - displayStart + 1;

            if (refLength < 0)
            {
                refLength = 0;
            }

            // length of alternative allele
            var altLength = hn.AlternateBases.Length;

            // sanity check: make sure that the alleles are different
            if (hn.ReferenceBases == hn.AlternateBases)
            {
                return;
            }

            // deletion
            if (altLength == 0)
            {
                hn.Type = GenomicChange.Deletion;
                return;
            }

            if (refLength == altLength)
            {
                // substitution
                if (refLength == 1)
                {
                    hn.Type = GenomicChange.Substitution;
                    return;
                }

                // inversion
                var rcRefAllele = SequenceUtilities.GetReverseComplement(hn.ReferenceBases);
                hn.Type = hn.AlternateBases == rcRefAllele ? GenomicChange.Inversion : GenomicChange.InDel;
                return;
            }

            // If this is an insertion, we should check if the preceeding reference nucleotides
            // match the insertion. In that case it should be annotated as a multiplication.
            if (refLength == 0)
            {
                int prevPosition = displayEnd - altLength;

                if (!isGenomicDuplicate && _compressedSequence != null && prevPosition >= 0)
                {
                    // Get the same number of nucleotides preceding the insertion as the length of
                    // the insertion
                    var precedingBases = SequenceUtilities.GetSubSubstring(transcript.Start, transcript.End,
                                                                           transcript.Gene.OnReverseStrand, prevPosition, prevPosition + altLength - 1, _compressedSequence);
                    if (precedingBases == hn.AlternateBases)
                    {
                        isGenomicDuplicate = true;
                    }
                }

                if (isGenomicDuplicate)
                {
                    hn.Type = GenomicChange.Duplication;

                    // for duplication, the hgvs positions are deceremented by alt allele length
                    var incrementLength = altLength;
                    hn.Start.Position = displayStart - incrementLength;
                    hn.End.Position   = hn.Start.Position + incrementLength - 1;

                    hn.AlleleMultiple = 2;
                    hn.ReferenceBases = hn.AlternateBases;
                    return;
                }

                // otherwise just an insertion
                hn.Type           = GenomicChange.Insertion;
                hn.Start.Position = displayEnd;
                hn.End.Position   = displayStart;
                return;
            }

            // Otherwise, the reference and allele are of different lengths. By default, this is
            // a delins but we need to check if the alt allele is a multiplication of the reference.
            // Check if the length of the alt allele is a multiple of the reference allele
            if (altLength % refLength == 0)
            {
                hn.AlleleMultiple = altLength / refLength;
                string multRefAllele = string.Concat(Enumerable.Repeat(hn.ReferenceBases, hn.AlleleMultiple));

                if (hn.AlternateBases == multRefAllele)
                {
                    hn.Type = hn.AlleleMultiple == 2 ? GenomicChange.Duplication : GenomicChange.Multiple;
                    return;
                }
            }

            // deletion/insertion
            hn.Type = GenomicChange.InDel;
        }