public void GetSubSubstring() { const string expectedResult = "CGTG"; var sequence = new SimpleSequence("GGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCC"); var observedResult = SequenceUtilities.GetSubSubstring(4, 10, true, 1, 4, sequence); Assert.Equal(expectedResult, observedResult); }
/// <summary> /// get the genomic change that resulted from this variation [Sequence.pm:482 hgvs_variant_notation] /// </summary> private void GetGenomicChange(Transcript transcript, HgvsNotation hn, bool isGenomicDuplicate) { hn.Type = GenomicChange.Unknown; // make sure our positions are defined if (hn.Start.Position == null || hn.End.Position == null) { return; } int displayStart = (int)hn.Start.Position; int displayEnd = (int)hn.End.Position; // length of the reference allele. Negative lengths make no sense int refLength = displayEnd - displayStart + 1; if (refLength < 0) { refLength = 0; } // length of alternative allele var altLength = hn.AlternateBases.Length; // sanity check: make sure that the alleles are different if (hn.ReferenceBases == hn.AlternateBases) { return; } // deletion if (altLength == 0) { hn.Type = GenomicChange.Deletion; return; } if (refLength == altLength) { // substitution if (refLength == 1) { hn.Type = GenomicChange.Substitution; return; } // inversion var rcRefAllele = SequenceUtilities.GetReverseComplement(hn.ReferenceBases); hn.Type = hn.AlternateBases == rcRefAllele ? GenomicChange.Inversion : GenomicChange.InDel; return; } // If this is an insertion, we should check if the preceeding reference nucleotides // match the insertion. In that case it should be annotated as a multiplication. if (refLength == 0) { int prevPosition = displayEnd - altLength; if (!isGenomicDuplicate && _compressedSequence != null && prevPosition >= 0) { // Get the same number of nucleotides preceding the insertion as the length of // the insertion var precedingBases = SequenceUtilities.GetSubSubstring(transcript.Start, transcript.End, transcript.Gene.OnReverseStrand, prevPosition, prevPosition + altLength - 1, _compressedSequence); if (precedingBases == hn.AlternateBases) { isGenomicDuplicate = true; } } if (isGenomicDuplicate) { hn.Type = GenomicChange.Duplication; // for duplication, the hgvs positions are deceremented by alt allele length var incrementLength = altLength; hn.Start.Position = displayStart - incrementLength; hn.End.Position = hn.Start.Position + incrementLength - 1; hn.AlleleMultiple = 2; hn.ReferenceBases = hn.AlternateBases; return; } // otherwise just an insertion hn.Type = GenomicChange.Insertion; hn.Start.Position = displayEnd; hn.End.Position = displayStart; return; } // Otherwise, the reference and allele are of different lengths. By default, this is // a delins but we need to check if the alt allele is a multiplication of the reference. // Check if the length of the alt allele is a multiple of the reference allele if (altLength % refLength == 0) { hn.AlleleMultiple = altLength / refLength; string multRefAllele = string.Concat(Enumerable.Repeat(hn.ReferenceBases, hn.AlleleMultiple)); if (hn.AlternateBases == multRefAllele) { hn.Type = hn.AlleleMultiple == 2 ? GenomicChange.Duplication : GenomicChange.Multiple; return; } } // deletion/insertion hn.Type = GenomicChange.InDel; }