/// <summary> /// constructor /// </summary> public HgvsCodingNomenclature(TranscriptAnnotation ta, Transcript transcript, VariantFeature variant, ICompressedSequence compressedSequence, bool isGenomicDuplicate) { _ta = ta; _transcript = transcript; _variant = variant; _compressedSequence = compressedSequence; _isGenomicDuplicate = isGenomicDuplicate; _sb = new StringBuilder(); // get reference sequence strand var transcriptOnReverseStrand = transcript.Gene.OnReverseStrand; // this may be different to the input one for insertions/deletions var altAllele = ta.AlternateAllele; string variationFeatureSequence = altAllele.AlternateAllele; // get the reverse complement of the vfs if needed if (transcriptOnReverseStrand) { variationFeatureSequence = SequenceUtilities.GetReverseComplement(variationFeatureSequence); } // calculate the reference start and end GetReferenceCoordinates(transcript, altAllele, out _hgvsStart, out _hgvsEnd); // decide event type from HGVS nomenclature _hgvsNotation = new HgvsNotation(ta.TranscriptReferenceAllele, variationFeatureSequence, FormatUtilities.CombineIdAndVersion(transcript.Id, transcript.Version), _hgvsStart, _hgvsEnd, _transcript.Translation != null); }
/// <summary> /// returns true if this insertion has the same amino acids preceding it [TranscriptVariationAllele.pm:1494 _check_for_peptide_duplication] /// </summary> private bool IsAminoAcidDuplicate(HgvsNotation hn, string transcriptPeptides) { // sanity check: return false if the alternate amino acid is null if (hn.AlternateAminoAcids == null) { return(false); } var testAminoAcidPos = hn.Start - hn.AlternateAminoAcidsLen - 1; if (testAminoAcidPos < 0) { return(false); } var precedingAminoAcids = testAminoAcidPos + hn.AlternateAminoAcidsLen <= transcriptPeptides.Length ? transcriptPeptides.Substring(testAminoAcidPos, hn.AlternateAminoAcidsLen) : ""; // update our HGVS notation if (testAminoAcidPos >= 0 && precedingAminoAcids == hn.AlternateAminoAcids) { hn.Type = ProteinChange.Duplication; hn.End = hn.Start - 1; hn.Start -= hn.AlternateAminoAcidsLen; hn.AlternateAbbreviation = _aminoAcids.GetAbbreviations(hn.AlternateAminoAcids); return(true); } return(false); }
/// <summary> /// returns a string with the HGVS representation for either the single position or the ranged position /// </summary> private static string GetHgvsRangeString(HgvsNotation hn) { if (hn.Start == hn.End) { return(hn.ReferenceAbbreviation + hn.Start + hn.AlternateAbbreviation); } return(hn.ReferenceAbbreviation + hn.Start + '_' + hn.AlternateAbbreviation + hn.End); }
/// <summary> /// constructor /// </summary> public HgvsProteinNomenclature(VariantEffect variantEffect, TranscriptAnnotation ta, Transcript transcript, VariantFeature variant, ICompressedSequence compressedSequence, AminoAcids aminoAcids) { _variantEffect = variantEffect; _ta = ta; _transcript = transcript; _variant = variant; _compressedSequence = compressedSequence; _aminoAcids = aminoAcids; _hgvsNotation = new HgvsNotation(_ta.ReferenceAminoAcids, _ta.AlternateAminoAcids, FormatUtilities.CombineIdAndVersion(_transcript.Translation.ProteinId, _transcript.Translation.ProteinVersion), _ta.ProteinBegin, _ta.ProteinEnd); }
/// <summary> /// HGVS aligns changes 3' /// e.g. given a ATG/- deletion in C[ATG]ATGT, we want to move to: CATG[ATG]T /// given a A/- deletion in TA[A]AAAA, we want to move to: TAAAAA[A] /// given a AA/- deletion in TA[AA]AAA, we want to move to: TAAAA[AA] /// </summary> private static void SwapEndpoints(HgvsNotation hn) { if (hn.Start.Offset == null) { hn.Start.Offset = 0; } if (hn.End.Offset == null) { hn.End.Offset = 0; } if (!hn.End.HasStopCodonNotation && hn.Start.Position + hn.Start.Offset > hn.End.Position + hn.End.Offset) { var temp = hn.Start; hn.Start = hn.End; hn.End = temp; } }
/// <summary> /// get the genomic change that resulted from this variation [Sequence.pm:482 hgvs_variant_notation] /// </summary> private void GetGenomicChange(Transcript transcript, HgvsNotation hn, bool isGenomicDuplicate) { hn.Type = GenomicChange.Unknown; // make sure our positions are defined if (hn.Start.Position == null || hn.End.Position == null) { return; } int displayStart = (int)hn.Start.Position; int displayEnd = (int)hn.End.Position; // length of the reference allele. Negative lengths make no sense int refLength = displayEnd - displayStart + 1; if (refLength < 0) { refLength = 0; } // length of alternative allele var altLength = hn.AlternateBases.Length; // sanity check: make sure that the alleles are different if (hn.ReferenceBases == hn.AlternateBases) { return; } // deletion if (altLength == 0) { hn.Type = GenomicChange.Deletion; return; } if (refLength == altLength) { // substitution if (refLength == 1) { hn.Type = GenomicChange.Substitution; return; } // inversion var rcRefAllele = SequenceUtilities.GetReverseComplement(hn.ReferenceBases); hn.Type = hn.AlternateBases == rcRefAllele ? GenomicChange.Inversion : GenomicChange.InDel; return; } // If this is an insertion, we should check if the preceeding reference nucleotides // match the insertion. In that case it should be annotated as a multiplication. if (refLength == 0) { int prevPosition = displayEnd - altLength; if (!isGenomicDuplicate && _compressedSequence != null && prevPosition >= 0) { // Get the same number of nucleotides preceding the insertion as the length of // the insertion var precedingBases = SequenceUtilities.GetSubSubstring(transcript.Start, transcript.End, transcript.Gene.OnReverseStrand, prevPosition, prevPosition + altLength - 1, _compressedSequence); if (precedingBases == hn.AlternateBases) { isGenomicDuplicate = true; } } if (isGenomicDuplicate) { hn.Type = GenomicChange.Duplication; // for duplication, the hgvs positions are deceremented by alt allele length var incrementLength = altLength; hn.Start.Position = displayStart - incrementLength; hn.End.Position = hn.Start.Position + incrementLength - 1; hn.AlleleMultiple = 2; hn.ReferenceBases = hn.AlternateBases; return; } // otherwise just an insertion hn.Type = GenomicChange.Insertion; hn.Start.Position = displayEnd; hn.End.Position = displayStart; return; } // Otherwise, the reference and allele are of different lengths. By default, this is // a delins but we need to check if the alt allele is a multiplication of the reference. // Check if the length of the alt allele is a multiple of the reference allele if (altLength % refLength == 0) { hn.AlleleMultiple = altLength / refLength; string multRefAllele = string.Concat(Enumerable.Repeat(hn.ReferenceBases, hn.AlleleMultiple)); if (hn.AlternateBases == multRefAllele) { hn.Type = hn.AlleleMultiple == 2 ? GenomicChange.Duplication : GenomicChange.Multiple; return; } } // deletion/insertion hn.Type = GenomicChange.InDel; }