public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence, int regionStart, int regionEnd) { // sanity check: don't try to handle odd characters, make sure this is not a reference allele, // and make sure that we have protein coordinates if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) { return(null); } var onReverseStrand = transcript.Gene.OnReverseStrand; var refAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele; var altAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele; // decide event type from HGVS nomenclature var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant); var variantStart = variant.Start; var variantEnd = variant.End; if (genomicChange == GenomicChange.Duplication) { (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand); } var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart); var endPositionOffset = variantStart == variantEnd ? startPositionOffset : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd); if (onReverseStrand) { var tmp = startPositionOffset; startPositionOffset = endPositionOffset; endPositionOffset = tmp; } // sanity check: make sure we have coordinates if (startPositionOffset == null || endPositionOffset == null) { return(null); } var transcriptLen = transcript.End - transcript.Start + 1; //_hgvs notation past the transcript if (startPositionOffset.Position > transcriptLen || endPositionOffset.Position > transcriptLen) { return(null); } var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange, startPositionOffset, endPositionOffset, transcript.Translation != null); // generic formatting return(hgvsNotation.ToString()); }
private static bool IsHgvspNull(string transcriptAltAllele, int cdsStart, int cdsEnd, ISimpleVariant variant, string hgvscNotation) { return(string.IsNullOrEmpty(hgvscNotation) || variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(transcriptAltAllele) || cdsStart == -1 || cdsEnd == -1); }
/// <summary> /// return a string representing the cDNA-level effect of this allele in HGVS format [TranscriptVariationAllele.pm:568 hgvs_transcript] /// </summary> public void SetAnnotation() { // sanity check: don't try to handle odd characters, make sure this is not a reference allele, // and make sure that we have protein coordinates if (_variant.IsReference || SequenceUtilities.HasNonCanonicalBase(_ta.TranscriptAlternateAllele)) { return; } GetGenomicChange(_transcript, _hgvsNotation, _isGenomicDuplicate); GetCdnaPosition(_hgvsNotation.Start); if (_hgvsStart == _hgvsEnd) { _hgvsNotation.End = _hgvsNotation.Start; } else { GetCdnaPosition(_hgvsNotation.End); } // sanity check: make sure we have coordinates if (_hgvsNotation.Start.Position == null || _hgvsNotation.End.Position == null) { return; } var transcriptLen = _transcript.End - _transcript.Start + 1; //_hgvs notation past the transcript if (_hgvsNotation.Start.Position > transcriptLen || _hgvsNotation.End.Position > transcriptLen) { return; } // make sure that start is always less than end SwapEndpoints(_hgvsNotation); // generic formatting _ta.HgvsCodingSequenceName = FormatHgvsString(); }
/// <summary> /// return a string representing the protein-level effect of this allele in HGVS format [TranscriptVariationAllele.pm:717 hgvs_protein] /// </summary> public void SetAnnotation() { // sanity check: don't try to handle odd characters, make sure this is not a reference allele, // and make sure that we have protein coordinates if (_variant.IsReference || !_ta.HasValidCdsEnd || !_ta.HasValidCdsEnd || SequenceUtilities.HasNonCanonicalBase(_ta.TranscriptAlternateAllele)) { return; } // check if this is a stop retained variant if (_variantEffect.IsStopRetained()) { _ta.HgvsProteinSequenceName = $"{_ta.HgvsCodingSequenceName}(p.=)"; return; } // clip the alleles AminoAcids.RemovePrefixAndSuffix(_hgvsNotation); // set the protein change _hgvsNotation.Type = GetGeneralProteinChange(); if (_hgvsNotation.Type != ProteinChange.None) { _hgvsNotation.Type = GetSpecificProteinChange(); // convert ref & alt peptides taking into account HGVS rules GetHgvsPeptides(_ta); } // no protein change - return transcript nomenclature with flag for neutral protein consequence if (_hgvsNotation.Type == ProteinChange.None) { _ta.HgvsProteinSequenceName = $"{_ta.HgvsCodingSequenceName}(p.=)"; return; } // string formatting _ta.HgvsProteinSequenceName = GetHgvsProteinFormat(_ta); }
public void HasNonCanonicalBase(string bases, bool expectedResult) { var observedResult = SequenceUtilities.HasNonCanonicalBase(bases); Assert.Equal(expectedResult, observedResult); }
public void NonCanonical(string bases) { Assert.True(SequenceUtilities.HasNonCanonicalBase(bases)); }
public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence, int regionStart, int regionEnd, string transcriptRef, string transcriptAlt) { // sanity check: don't try to handle odd characters, make sure this is not a reference allele, // and make sure that we have protein coordinates if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) { return(null); } // do not report HGVSc notation when variant lands inside gap region if (regionStart > -1 && regionEnd > -1) { var startRegion = transcript.TranscriptRegions[regionStart]; var endRegion = transcript.TranscriptRegions[regionEnd]; if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap && endRegion.Type == TranscriptRegionType.Gap) { return(null); } } bool onReverseStrand = transcript.Gene.OnReverseStrand; string refAllele = string.IsNullOrEmpty(transcriptRef)? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele : transcriptRef; string altAllele = string.IsNullOrEmpty(transcriptAlt) ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele : transcriptAlt; // decide event type from HGVS nomenclature var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant); int variantStart = variant.Start; int variantEnd = variant.End; if (genomicChange == GenomicChange.Duplication) { (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand); } var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart, true); var endPositionOffset = variantStart == variantEnd ? startPositionOffset : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd, false); if (onReverseStrand) { var tmp = startPositionOffset; startPositionOffset = endPositionOffset; endPositionOffset = tmp; } if (startPositionOffset == null && variant.Type == VariantType.insertion) { startPositionOffset = new PositionOffset(endPositionOffset.Position + 1, endPositionOffset.Offset, $"{endPositionOffset.Position + 1}", endPositionOffset.HasStopCodonNotation); } // sanity check: make sure we have coordinates if (startPositionOffset == null || endPositionOffset == null) { return(null); } var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange, startPositionOffset, endPositionOffset, transcript.Translation != null); // generic formatting return(hgvsNotation.ToString()); }