private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd, string transcriptAltAllele, ITranscript transcript, bool isMitochondrial, string proteinId, int start, int end) { var peptideSeq = transcript.Translation.PeptideSeq; var altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, isMitochondrial); if (start > end) { Swap.Int(ref start, ref end); } var frameshiftedParameters = HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq); start = frameshiftedParameters.Item1; var refAminoAcid = frameshiftedParameters.Item2; var altAminoAcid = frameshiftedParameters.Item3; var refAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(refAminoAcid); if (altAminoAcid == AminoAcids.StopCodonChar) { return(HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, "Ter")); } var altAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(altAminoAcid); var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true); return(HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop)); }
public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence, int regionStart, int regionEnd) { // sanity check: don't try to handle odd characters, make sure this is not a reference allele, // and make sure that we have protein coordinates if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) { return(null); } var onReverseStrand = transcript.Gene.OnReverseStrand; var refAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele; var altAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele; // decide event type from HGVS nomenclature var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant); var variantStart = variant.Start; var variantEnd = variant.End; if (genomicChange == GenomicChange.Duplication) { (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand); } var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart); var endPositionOffset = variantStart == variantEnd ? startPositionOffset : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd); if (onReverseStrand) { var tmp = startPositionOffset; startPositionOffset = endPositionOffset; endPositionOffset = tmp; } // sanity check: make sure we have coordinates if (startPositionOffset == null || endPositionOffset == null) { return(null); } var transcriptLen = transcript.End - transcript.Start + 1; //_hgvs notation past the transcript if (startPositionOffset.Position > transcriptLen || endPositionOffset.Position > transcriptLen) { return(null); } var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange, startPositionOffset, endPositionOffset, transcript.Translation != null); // generic formatting return(hgvsNotation.ToString()); }
internal static ProteinChange GetProteinChange(int start, string refAminoAcids, string altAminoAcids, string peptideSeq, IVariantEffect variantEffect) { if (refAminoAcids == altAminoAcids || variantEffect.IsStopRetained()) { return(ProteinChange.None); } //insertion before the transcript if (refAminoAcids.Length == 0 && start == 1) { return(ProteinChange.None); } if (variantEffect.IsStartLost()) { return(ProteinChange.StartLost); } // todo: add start gained // according to var nom, only if the Stop codon is effected, we call it an extension if (variantEffect.IsStopLost() && refAminoAcids.StartsWith(AminoAcids.StopCodon)) { return(ProteinChange.Extension); } if (variantEffect.IsFrameshiftVariant()) { return(ProteinChange.Frameshift); } if (altAminoAcids.Length > refAminoAcids.Length && HgvsUtilities.IsAminoAcidDuplicate(start, altAminoAcids, peptideSeq)) { return(ProteinChange.Duplication); } if (refAminoAcids.Length == 0 && altAminoAcids.Length != 0) { return(ProteinChange.Insertion); } if (refAminoAcids.Length != 0 && altAminoAcids.Length == 0) { return(ProteinChange.Deletion); } if (refAminoAcids.Length == 1 && altAminoAcids.Length == 1) { return(ProteinChange.Substitution); } // the only remaining possibility is deletions/insertions return(ProteinChange.DelIns); }
public static GenomicChange GetGenomicChange(IInterval interval, bool onReverseStrand, ISequence refSequence, ISimpleVariant variant) { // length of the reference allele. Negative lengths make no sense int refLength = variant.End - variant.Start + 1; if (refLength < 0) { refLength = 0; } // length of alternative allele int altLength = variant.AltAllele.Length; // sanity check: make sure that the alleles are different if (variant.RefAllele == variant.AltAllele) { return(GenomicChange.Unknown); } // deletion if (altLength == 0) { return(GenomicChange.Deletion); } if (refLength == altLength) { // substitution if (refLength == 1) { return(GenomicChange.Substitution); } // inversion string rcRefAllele = SequenceUtilities.GetReverseComplement(variant.RefAllele); return(variant.AltAllele == rcRefAllele ? GenomicChange.Inversion : GenomicChange.DelIns); } // deletion/insertion if (refLength != 0) { return(GenomicChange.DelIns); } // If this is an insertion, we should check if the preceding reference nucleotides // match the insertion. In that case it should be annotated as a multiplication. bool isGenomicDuplicate = HgvsUtilities.IsDuplicateWithinInterval(refSequence, variant, interval, onReverseStrand); return(isGenomicDuplicate ? GenomicChange.Duplication : GenomicChange.Insertion); }
public static string GetNotation(string refseqAccession, ISimpleVariant variant, ISequence refSequence, IInterval referenceInterval) { var rotatedVariant = VariantRotator.Right(variant, referenceInterval, refSequence, false); var start = Math.Min(rotatedVariant.Start, rotatedVariant.End); var end = Math.Max(rotatedVariant.Start, rotatedVariant.End); var referenceBases = rotatedVariant.RefAllele; var alternateBases = rotatedVariant.AltAllele; var type = HgvsCodingNomenclature.GetGenomicChange(referenceInterval, false, refSequence, rotatedVariant); if (type == GenomicChange.Duplication && variant.Type == VariantType.insertion) { referenceBases = alternateBases; end = start; start = end - referenceBases.Length + 1; } return(HgvsUtilities.FormatDnaNotation(start.ToString(), end.ToString(), refseqAccession, referenceBases, alternateBases, type, NotationType)); }
public static string GetHgvsProteinAnnotation( ITranscript transcript, string refAminoAcids, string altAminoAcids, string transcriptAltAllele, IMappedPosition position, VariantEffect variantEffect, ISimpleVariant variant, ISequence refSequence, string hgvscNotation, bool isMitochondrial) { if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation)) { return(null); } var peptideSeq = transcript.Translation.PeptideSeq; // Amino acid seq should never go past the stop codon refAminoAcids = !refAminoAcids.EndsWith(AminoAcids.StopCodon) && refAminoAcids.Contains(AminoAcids.StopCodon) ? refAminoAcids.OptimizedSplit(AminoAcids.StopCodon[0])[0] + AminoAcids.StopCodon : refAminoAcids; int proteinStart = position.ProteinStart; HgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq); var end = proteinStart + refAminoAcids.Length - 1; var refAbbreviation = AminoAcids.GetAbbreviations(refAminoAcids); var altAbbreviation = AminoAcids.GetAbbreviations(altAminoAcids); var proteinId = transcript.Translation.ProteinId.WithVersion; var proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect); // ReSharper disable once SwitchStatementMissingSomeCases switch (proteinChange) { case ProteinChange.Substitution: return(HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation)); case ProteinChange.Unknown: return(HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation)); case ProteinChange.Deletion: return(HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained())); case ProteinChange.Duplication: proteinStart -= altAminoAcids.Length; return(HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation)); case ProteinChange.Frameshift: return(GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele, transcript, isMitochondrial, proteinId, proteinStart, end)); case ProteinChange.None: return(HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained())); case ProteinChange.DelIns: return(HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation)); case ProteinChange.Insertion: Swap.Int(ref proteinStart, ref end); return(HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq)); case ProteinChange.Extension: var altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele, transcript, isMitochondrial); altAbbreviation = proteinStart <= altPeptideSequence.Length ? AminoAcids.ConvertAminoAcidToAbbreviation(altPeptideSequence[proteinStart - 1]): "Ter"; var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, proteinStart - 1, false); return(HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation, countToStop)); case ProteinChange.StartLost: return(HgvspNotation.GetStartLostNotation(proteinId, proteinStart, end, refAbbreviation)); } return(null); }
public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence, int regionStart, int regionEnd, string transcriptRef, string transcriptAlt) { // sanity check: don't try to handle odd characters, make sure this is not a reference allele, // and make sure that we have protein coordinates if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) { return(null); } // do not report HGVSc notation when variant lands inside gap region if (regionStart > -1 && regionEnd > -1) { var startRegion = transcript.TranscriptRegions[regionStart]; var endRegion = transcript.TranscriptRegions[regionEnd]; if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap && endRegion.Type == TranscriptRegionType.Gap) { return(null); } } bool onReverseStrand = transcript.Gene.OnReverseStrand; string refAllele = string.IsNullOrEmpty(transcriptRef)? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele : transcriptRef; string altAllele = string.IsNullOrEmpty(transcriptAlt) ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele : transcriptAlt; // decide event type from HGVS nomenclature var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant); int variantStart = variant.Start; int variantEnd = variant.End; if (genomicChange == GenomicChange.Duplication) { (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand); } var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart, true); var endPositionOffset = variantStart == variantEnd ? startPositionOffset : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd, false); if (onReverseStrand) { var tmp = startPositionOffset; startPositionOffset = endPositionOffset; endPositionOffset = tmp; } if (startPositionOffset == null && variant.Type == VariantType.insertion) { startPositionOffset = new PositionOffset(endPositionOffset.Position + 1, endPositionOffset.Offset, $"{endPositionOffset.Position + 1}", endPositionOffset.HasStopCodonNotation); } // sanity check: make sure we have coordinates if (startPositionOffset == null || endPositionOffset == null) { return(null); } var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange, startPositionOffset, endPositionOffset, transcript.Translation != null); // generic formatting return(hgvsNotation.ToString()); }