Пример #1
0
        public void GetSubSubstring()
        {
            const string expectedResult = "CGTG";
            var          sequence       = new SimpleSequence("GGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCC");
            var          observedResult = SequenceUtilities.GetSubSubstring(4, 10, true, 1, 4, sequence);

            Assert.Equal(expectedResult, observedResult);
        }
Пример #2
0
        /// <summary>
        /// get the genomic change that resulted from this variation [Sequence.pm:482 hgvs_variant_notation]
        /// </summary>
        private void GetGenomicChange(Transcript transcript, HgvsNotation hn, bool isGenomicDuplicate)
        {
            hn.Type = GenomicChange.Unknown;

            // make sure our positions are defined
            if (hn.Start.Position == null || hn.End.Position == null)
            {
                return;
            }

            int displayStart = (int)hn.Start.Position;
            int displayEnd   = (int)hn.End.Position;

            // length of the reference allele. Negative lengths make no sense
            int refLength = displayEnd - displayStart + 1;

            if (refLength < 0)
            {
                refLength = 0;
            }

            // length of alternative allele
            var altLength = hn.AlternateBases.Length;

            // sanity check: make sure that the alleles are different
            if (hn.ReferenceBases == hn.AlternateBases)
            {
                return;
            }

            // deletion
            if (altLength == 0)
            {
                hn.Type = GenomicChange.Deletion;
                return;
            }

            if (refLength == altLength)
            {
                // substitution
                if (refLength == 1)
                {
                    hn.Type = GenomicChange.Substitution;
                    return;
                }

                // inversion
                var rcRefAllele = SequenceUtilities.GetReverseComplement(hn.ReferenceBases);
                hn.Type = hn.AlternateBases == rcRefAllele ? GenomicChange.Inversion : GenomicChange.InDel;
                return;
            }

            // If this is an insertion, we should check if the preceeding reference nucleotides
            // match the insertion. In that case it should be annotated as a multiplication.
            if (refLength == 0)
            {
                int prevPosition = displayEnd - altLength;

                if (!isGenomicDuplicate && _compressedSequence != null && prevPosition >= 0)
                {
                    // Get the same number of nucleotides preceding the insertion as the length of
                    // the insertion
                    var precedingBases = SequenceUtilities.GetSubSubstring(transcript.Start, transcript.End,
                                                                           transcript.Gene.OnReverseStrand, prevPosition, prevPosition + altLength - 1, _compressedSequence);
                    if (precedingBases == hn.AlternateBases)
                    {
                        isGenomicDuplicate = true;
                    }
                }

                if (isGenomicDuplicate)
                {
                    hn.Type = GenomicChange.Duplication;

                    // for duplication, the hgvs positions are deceremented by alt allele length
                    var incrementLength = altLength;
                    hn.Start.Position = displayStart - incrementLength;
                    hn.End.Position   = hn.Start.Position + incrementLength - 1;

                    hn.AlleleMultiple = 2;
                    hn.ReferenceBases = hn.AlternateBases;
                    return;
                }

                // otherwise just an insertion
                hn.Type           = GenomicChange.Insertion;
                hn.Start.Position = displayEnd;
                hn.End.Position   = displayStart;
                return;
            }

            // Otherwise, the reference and allele are of different lengths. By default, this is
            // a delins but we need to check if the alt allele is a multiplication of the reference.
            // Check if the length of the alt allele is a multiple of the reference allele
            if (altLength % refLength == 0)
            {
                hn.AlleleMultiple = altLength / refLength;
                string multRefAllele = string.Concat(Enumerable.Repeat(hn.ReferenceBases, hn.AlleleMultiple));

                if (hn.AlternateBases == multRefAllele)
                {
                    hn.Type = hn.AlleleMultiple == 2 ? GenomicChange.Duplication : GenomicChange.Multiple;
                    return;
                }
            }

            // deletion/insertion
            hn.Type = GenomicChange.InDel;
        }