コード例 #1
0
        private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd,
                                                        string transcriptAltAllele, ITranscript transcript, bool isMitochondrial, string proteinId, int start,
                                                        int end)
        {
            var peptideSeq    = transcript.Translation.PeptideSeq;
            var altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, isMitochondrial);

            if (start > end)
            {
                Swap.Int(ref start, ref end);
            }

            var frameshiftedParameters = HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq);

            start = frameshiftedParameters.Item1;
            var refAminoAcid = frameshiftedParameters.Item2;
            var altAminoAcid = frameshiftedParameters.Item3;

            var refAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(refAminoAcid);

            if (altAminoAcid == AminoAcids.StopCodonChar)
            {
                return(HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, "Ter"));
            }

            var altAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(altAminoAcid);
            var countToStop     = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true);

            return(HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop));
        }
コード例 #2
0
        public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence,
                                                int regionStart, int regionEnd)
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele))
            {
                return(null);
            }

            var onReverseStrand = transcript.Gene.OnReverseStrand;

            var refAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele;
            var altAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele;

            // decide event type from HGVS nomenclature
            var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant);

            var variantStart = variant.Start;
            var variantEnd   = variant.End;

            if (genomicChange == GenomicChange.Duplication)
            {
                (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand);
            }

            var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart);
            var endPositionOffset   = variantStart == variantEnd
                ? startPositionOffset
                : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd);

            if (onReverseStrand)
            {
                var tmp = startPositionOffset;
                startPositionOffset = endPositionOffset;
                endPositionOffset   = tmp;
            }

            // sanity check: make sure we have coordinates
            if (startPositionOffset == null || endPositionOffset == null)
            {
                return(null);
            }

            var transcriptLen = transcript.End - transcript.Start + 1;

            //_hgvs notation past the transcript
            if (startPositionOffset.Position > transcriptLen || endPositionOffset.Position > transcriptLen)
            {
                return(null);
            }

            var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange,
                                                 startPositionOffset, endPositionOffset, transcript.Translation != null);

            // generic formatting
            return(hgvsNotation.ToString());
        }
コード例 #3
0
        internal static ProteinChange GetProteinChange(int start, string refAminoAcids, string altAminoAcids,
                                                       string peptideSeq, IVariantEffect variantEffect)
        {
            if (refAminoAcids == altAminoAcids ||
                variantEffect.IsStopRetained())
            {
                return(ProteinChange.None);
            }

            //insertion before the transcript
            if (refAminoAcids.Length == 0 && start == 1)
            {
                return(ProteinChange.None);
            }

            if (variantEffect.IsStartLost())
            {
                return(ProteinChange.StartLost);
            }

            // todo: add start gained
            // according to var nom, only if the Stop codon is effected, we call it an extension
            if (variantEffect.IsStopLost() && refAminoAcids.StartsWith(AminoAcids.StopCodon))
            {
                return(ProteinChange.Extension);
            }

            if (variantEffect.IsFrameshiftVariant())
            {
                return(ProteinChange.Frameshift);
            }


            if (altAminoAcids.Length > refAminoAcids.Length && HgvsUtilities.IsAminoAcidDuplicate(start, altAminoAcids, peptideSeq))
            {
                return(ProteinChange.Duplication);
            }

            if (refAminoAcids.Length == 0 && altAminoAcids.Length != 0)
            {
                return(ProteinChange.Insertion);
            }

            if (refAminoAcids.Length != 0 && altAminoAcids.Length == 0)
            {
                return(ProteinChange.Deletion);
            }

            if (refAminoAcids.Length == 1 && altAminoAcids.Length == 1)
            {
                return(ProteinChange.Substitution);
            }

            // the only remaining possibility is deletions/insertions
            return(ProteinChange.DelIns);
        }
コード例 #4
0
        public static GenomicChange GetGenomicChange(IInterval interval, bool onReverseStrand, ISequence refSequence, ISimpleVariant variant)
        {
            // length of the reference allele. Negative lengths make no sense
            int refLength = variant.End - variant.Start + 1;

            if (refLength < 0)
            {
                refLength = 0;
            }

            // length of alternative allele
            int altLength = variant.AltAllele.Length;

            // sanity check: make sure that the alleles are different
            if (variant.RefAllele == variant.AltAllele)
            {
                return(GenomicChange.Unknown);
            }

            // deletion
            if (altLength == 0)
            {
                return(GenomicChange.Deletion);
            }

            if (refLength == altLength)
            {
                // substitution
                if (refLength == 1)
                {
                    return(GenomicChange.Substitution);
                }

                // inversion
                string rcRefAllele = SequenceUtilities.GetReverseComplement(variant.RefAllele);
                return(variant.AltAllele == rcRefAllele ? GenomicChange.Inversion : GenomicChange.DelIns);
            }

            // deletion/insertion
            if (refLength != 0)
            {
                return(GenomicChange.DelIns);
            }

            // If this is an insertion, we should check if the preceding reference nucleotides
            // match the insertion. In that case it should be annotated as a multiplication.
            bool isGenomicDuplicate = HgvsUtilities.IsDuplicateWithinInterval(refSequence, variant, interval, onReverseStrand);

            return(isGenomicDuplicate ? GenomicChange.Duplication : GenomicChange.Insertion);
        }
コード例 #5
0
        public static string GetNotation(string refseqAccession, ISimpleVariant variant, ISequence refSequence,
                                         IInterval referenceInterval)
        {
            var rotatedVariant = VariantRotator.Right(variant, referenceInterval, refSequence, false);
            var start          = Math.Min(rotatedVariant.Start, rotatedVariant.End);
            var end            = Math.Max(rotatedVariant.Start, rotatedVariant.End);
            var referenceBases = rotatedVariant.RefAllele;
            var alternateBases = rotatedVariant.AltAllele;
            var type           = HgvsCodingNomenclature.GetGenomicChange(referenceInterval, false, refSequence, rotatedVariant);

            if (type == GenomicChange.Duplication && variant.Type == VariantType.insertion)
            {
                referenceBases = alternateBases;
                end            = start;
                start          = end - referenceBases.Length + 1;
            }

            return(HgvsUtilities.FormatDnaNotation(start.ToString(), end.ToString(), refseqAccession, referenceBases, alternateBases, type, NotationType));
        }
コード例 #6
0
        public static string GetHgvsProteinAnnotation(
            ITranscript transcript,
            string refAminoAcids,
            string altAminoAcids,
            string transcriptAltAllele,
            IMappedPosition position,
            VariantEffect variantEffect,
            ISimpleVariant variant,
            ISequence refSequence,
            string hgvscNotation,
            bool isMitochondrial)
        {
            if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation))
            {
                return(null);
            }

            var peptideSeq = transcript.Translation.PeptideSeq;

            // Amino acid seq should never go past the stop codon
            refAminoAcids = !refAminoAcids.EndsWith(AminoAcids.StopCodon) && refAminoAcids.Contains(AminoAcids.StopCodon)
                ? refAminoAcids.OptimizedSplit(AminoAcids.StopCodon[0])[0] + AminoAcids.StopCodon
                : refAminoAcids;

            int proteinStart = position.ProteinStart;

            HgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq);

            var end             = proteinStart + refAminoAcids.Length - 1;
            var refAbbreviation = AminoAcids.GetAbbreviations(refAminoAcids);
            var altAbbreviation = AminoAcids.GetAbbreviations(altAminoAcids);

            var proteinId     = transcript.Translation.ProteinId.WithVersion;
            var proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect);

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (proteinChange)
            {
            case ProteinChange.Substitution:
                return(HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation));

            case ProteinChange.Unknown:
                return(HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation));

            case ProteinChange.Deletion:
                return(HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained()));

            case ProteinChange.Duplication:
                proteinStart -= altAminoAcids.Length;
                return(HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation));

            case ProteinChange.Frameshift:
                return(GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele,
                                                 transcript, isMitochondrial, proteinId, proteinStart, end));

            case ProteinChange.None:
                return(HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained()));

            case ProteinChange.DelIns:
                return(HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation));

            case ProteinChange.Insertion:
                Swap.Int(ref proteinStart, ref end);
                return(HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq));

            case ProteinChange.Extension:
                var altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, position.CdsEnd,
                                                                             transcriptAltAllele, transcript, isMitochondrial);
                altAbbreviation = proteinStart <= altPeptideSequence.Length ? AminoAcids.ConvertAminoAcidToAbbreviation(altPeptideSequence[proteinStart - 1]): "Ter";
                var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, proteinStart - 1, false);

                return(HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation, countToStop));

            case ProteinChange.StartLost:
                return(HgvspNotation.GetStartLostNotation(proteinId, proteinStart, end, refAbbreviation));
            }

            return(null);
        }
コード例 #7
0
        public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence,
                                                int regionStart, int regionEnd, string transcriptRef, string transcriptAlt)
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele))
            {
                return(null);
            }

            // do not report HGVSc notation when variant lands inside gap region
            if (regionStart > -1 && regionEnd > -1)
            {
                var startRegion = transcript.TranscriptRegions[regionStart];
                var endRegion   = transcript.TranscriptRegions[regionEnd];
                if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap &&
                    endRegion.Type == TranscriptRegionType.Gap)
                {
                    return(null);
                }
            }

            bool onReverseStrand = transcript.Gene.OnReverseStrand;

            string refAllele = string.IsNullOrEmpty(transcriptRef)? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele
                : transcriptRef;
            string altAllele = string.IsNullOrEmpty(transcriptAlt)
                ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele
                : transcriptAlt;

            // decide event type from HGVS nomenclature
            var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant);

            int variantStart = variant.Start;
            int variantEnd   = variant.End;

            if (genomicChange == GenomicChange.Duplication)
            {
                (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand);
            }

            var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart, true);
            var endPositionOffset   = variantStart == variantEnd
                ? startPositionOffset
                : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd, false);

            if (onReverseStrand)
            {
                var tmp = startPositionOffset;
                startPositionOffset = endPositionOffset;
                endPositionOffset   = tmp;
            }

            if (startPositionOffset == null && variant.Type == VariantType.insertion)
            {
                startPositionOffset = new PositionOffset(endPositionOffset.Position + 1, endPositionOffset.Offset, $"{endPositionOffset.Position + 1}", endPositionOffset.HasStopCodonNotation);
            }

            // sanity check: make sure we have coordinates
            if (startPositionOffset == null || endPositionOffset == null)
            {
                return(null);
            }

            var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange,
                                                 startPositionOffset, endPositionOffset, transcript.Translation != null);

            // generic formatting
            return(hgvsNotation.ToString());
        }