Пример #1
0
        public string GetCdnaSequence()
        {
            if (_sequence != null)
            {
                return(_sequence);
            }

            var sb = StringBuilderCache.Acquire();

            foreach (var region in _regions)
            {
                if (region.Type != TranscriptRegionType.Exon)
                {
                    continue;
                }
                sb.Append(_compressedSequence.Substring(region.Start - 1, region.End - region.Start + 1));
            }

            if (_onReverseStrand)
            {
                string reverseComplement = SequenceUtilities.GetReverseComplement(sb.ToString());
                sb.Clear();
                sb.Append(reverseComplement);
            }

            ApplyRnaEdits(sb);

            _sequence = StringBuilderCache.GetStringAndRelease(sb);
            return(_sequence);
        }
Пример #2
0
        /// <summary>
        /// Retrieves all Exon sequences and concats them together.
        /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq]
        /// </summary>
        private static string GetSplicedSequence(ISequence refSequence, ITranscriptRegion[] regions, bool onReverseStrand)
        {
            var sb = StringBuilderCache.Acquire();

            foreach (var region in regions)
            {
                if (region.Type != TranscriptRegionType.Exon)
                {
                    continue;
                }
                var exonLength = region.End - region.Start + 1;

                // sanity check: handle the situation where no reference has been provided
                if (refSequence == null)
                {
                    sb.Append(new string('N', exonLength));
                    continue;
                }

                sb.Append(refSequence.Substring(region.Start - 1, exonLength));
            }

            var results = StringBuilderCache.GetStringAndRelease(sb);

            return(onReverseStrand ? SequenceUtilities.GetReverseComplement(results) : results);
        }
Пример #3
0
        public static List <SequenceResult> GetVerificationResults(List <string> sequences)
        {
            List <SequenceResult> verificationResults = new List <SequenceResult>();

            foreach (string sequenceAsString in sequences)
            {
                Sequence       sequence = SequenceUtilities.GetSequenceFromString(sequenceAsString);
                SequenceResult result   = new SequenceResult(sequence);

                int  i = 0;
                bool correspondingSequence = false;

                while (i < sequence.PossiblePermutations.Count && !correspondingSequence)
                {
                    string   currentPermutation = sequence.PossiblePermutations[i];
                    Sequence serializedSequence = sequence.Serialize(currentPermutation);

                    if (serializedSequence.ReadsFrom.DictionaryEqual(sequence.ReadsFrom) &&
                        serializedSequence.FinalWrites.DictionaryEqual(sequence.FinalWrites))
                    {
                        correspondingSequence = true;
                        result.IsValid        = true;
                    }

                    result.TestedPermutations.Add(serializedSequence, correspondingSequence);

                    i++;
                }

                verificationResults.Add(result);
            }

            return(verificationResults);
        }
Пример #4
0
        /// <summary>
        /// constructor
        /// </summary>
        public HgvsCodingNomenclature(TranscriptAnnotation ta, Transcript transcript, VariantFeature variant,
                                      ICompressedSequence compressedSequence, bool isGenomicDuplicate)
        {
            _ta                 = ta;
            _transcript         = transcript;
            _variant            = variant;
            _compressedSequence = compressedSequence;
            _isGenomicDuplicate = isGenomicDuplicate;

            _sb = new StringBuilder();

            // get reference sequence strand
            var transcriptOnReverseStrand = transcript.Gene.OnReverseStrand;

            // this may be different to the input one for insertions/deletions
            var    altAllele = ta.AlternateAllele;
            string variationFeatureSequence = altAllele.AlternateAllele;

            // get the reverse complement of the vfs if needed
            if (transcriptOnReverseStrand)
            {
                variationFeatureSequence = SequenceUtilities.GetReverseComplement(variationFeatureSequence);
            }

            // calculate the reference start and end
            GetReferenceCoordinates(transcript, altAllele, out _hgvsStart, out _hgvsEnd);

            // decide event type from HGVS nomenclature
            _hgvsNotation = new HgvsNotation(ta.TranscriptReferenceAllele, variationFeatureSequence,
                                             FormatUtilities.CombineIdAndVersion(transcript.Id, transcript.Version), _hgvsStart, _hgvsEnd,
                                             _transcript.Translation != null);
        }
Пример #5
0
        private static string GetRotatingBases(ISimpleVariant simpleVariant, bool onReverseStrand)
        {
            string rotatingBases = simpleVariant.Type == VariantType.insertion ? simpleVariant.AltAllele : simpleVariant.RefAllele;

            rotatingBases = onReverseStrand ? SequenceUtilities.GetReverseComplement(rotatingBases) : rotatingBases;
            return(rotatingBases);
        }
Пример #6
0
        public void GetSubSubstring()
        {
            const string expectedResult = "CGTG";
            var          sequence       = new SimpleSequence("GGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCC");
            var          observedResult = SequenceUtilities.GetSubSubstring(4, 10, true, 1, 4, sequence);

            Assert.Equal(expectedResult, observedResult);
        }
Пример #7
0
        public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence,
                                                int regionStart, int regionEnd)
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele))
            {
                return(null);
            }

            var onReverseStrand = transcript.Gene.OnReverseStrand;

            var refAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele;
            var altAllele = onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele;

            // decide event type from HGVS nomenclature
            var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant);

            var variantStart = variant.Start;
            var variantEnd   = variant.End;

            if (genomicChange == GenomicChange.Duplication)
            {
                (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand);
            }

            var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart);
            var endPositionOffset   = variantStart == variantEnd
                ? startPositionOffset
                : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd);

            if (onReverseStrand)
            {
                var tmp = startPositionOffset;
                startPositionOffset = endPositionOffset;
                endPositionOffset   = tmp;
            }

            // sanity check: make sure we have coordinates
            if (startPositionOffset == null || endPositionOffset == null)
            {
                return(null);
            }

            var transcriptLen = transcript.End - transcript.Start + 1;

            //_hgvs notation past the transcript
            if (startPositionOffset.Position > transcriptLen || endPositionOffset.Position > transcriptLen)
            {
                return(null);
            }

            var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange,
                                                 startPositionOffset, endPositionOffset, transcript.Translation != null);

            // generic formatting
            return(hgvsNotation.ToString());
        }
Пример #8
0
 private static bool IsHgvspNull(string transcriptAltAllele, int cdsStart, int cdsEnd, ISimpleVariant variant,
                                 string hgvscNotation)
 {
     return(string.IsNullOrEmpty(hgvscNotation) ||
            variant.Type == VariantType.reference ||
            SequenceUtilities.HasNonCanonicalBase(transcriptAltAllele) ||
            cdsStart == -1 ||
            cdsEnd == -1);
 }
Пример #9
0
        public static GenomicChange GetGenomicChange(IInterval interval, bool onReverseStrand, ISequence refSequence, ISimpleVariant variant)
        {
            // length of the reference allele. Negative lengths make no sense
            int refLength = variant.End - variant.Start + 1;

            if (refLength < 0)
            {
                refLength = 0;
            }

            // length of alternative allele
            int altLength = variant.AltAllele.Length;

            // sanity check: make sure that the alleles are different
            if (variant.RefAllele == variant.AltAllele)
            {
                return(GenomicChange.Unknown);
            }

            // deletion
            if (altLength == 0)
            {
                return(GenomicChange.Deletion);
            }

            if (refLength == altLength)
            {
                // substitution
                if (refLength == 1)
                {
                    return(GenomicChange.Substitution);
                }

                // inversion
                string rcRefAllele = SequenceUtilities.GetReverseComplement(variant.RefAllele);
                return(variant.AltAllele == rcRefAllele ? GenomicChange.Inversion : GenomicChange.DelIns);
            }

            // deletion/insertion
            if (refLength != 0)
            {
                return(GenomicChange.DelIns);
            }

            // If this is an insertion, we should check if the preceding reference nucleotides
            // match the insertion. In that case it should be annotated as a multiplication.
            bool isGenomicDuplicate = HgvsUtilities.IsDuplicateWithinInterval(refSequence, variant, interval, onReverseStrand);

            return(isGenomicDuplicate ? GenomicChange.Duplication : GenomicChange.Insertion);
        }
Пример #10
0
        public string GetCodingSequence()
        {
            var sb = StringBuilderCache.Acquire(Length);

            // account for the exon phase (forward orientation)
            if (_startExonPhase > 0 && !_geneOnReverseStrand)
            {
                sb.Append('N', _startExonPhase);
            }

            foreach (var region in _regions)
            {
                if (region.Type != TranscriptRegionType.Exon)
                {
                    continue;
                }

                // handle exons that are entirely in the UTR
                if (region.End < _codingRegion.Start || region.Start > _codingRegion.End)
                {
                    continue;
                }

                int tempBegin = region.Start;
                int tempEnd   = region.End;

                // trim the first and last exons
                if (_codingRegion.Start >= tempBegin && _codingRegion.Start <= tempEnd)
                {
                    tempBegin = _codingRegion.Start;
                }
                if (_codingRegion.End >= tempBegin && _codingRegion.End <= tempEnd)
                {
                    tempEnd = _codingRegion.End;
                }

                sb.Append(_compressedSequence.Substring(tempBegin - 1, tempEnd - tempBegin + 1));
            }

            // account for the exon phase (reverse orientation)
            if (_startExonPhase > 0 && _geneOnReverseStrand)
            {
                sb.Append('N', _startExonPhase);
            }

            var s = StringBuilderCache.GetStringAndRelease(sb);

            return(_geneOnReverseStrand ? SequenceUtilities.GetReverseComplement(s) : s);
        }
Пример #11
0
        public void ThirdTest()
        {
            List <int> testSequence = new List <int>()
            {
                1, 1, 2, 5, 11, 4, 4
            };

            var actual   = SequenceUtilities.GetLongestSubsequenceOfEqualNumbers(testSequence);
            var expected = new List <int>()
            {
                1, 1
            };

            CollectionAssert.AreEqual(expected, actual);
        }
Пример #12
0
        private static void MapCdnaCoordinates(Transcript transcript, TranscriptAnnotation ta, VariantAlternateAllele altAllele)
        {
            if (transcript.Gene.OnReverseStrand)
            {
                ta.TranscriptReferenceAllele = SequenceUtilities.GetReverseComplement(altAllele.ReferenceAllele);
                ta.TranscriptAlternateAllele = SequenceUtilities.GetReverseComplement(altAllele.AlternateAllele);
            }
            else
            {
                ta.TranscriptReferenceAllele = altAllele.ReferenceAllele;
                ta.TranscriptAlternateAllele = altAllele.AlternateAllele;
            }

            CdnaMapper.MapCoordinates(altAllele.Start, altAllele.End, ta, transcript);
        }
Пример #13
0
        public void SecondTest()
        {
            List <int> testSequence = new List <int>()
            {
                5, 5, 5, 3, 2, 1, 4, 3, 5, 5
            };

            var actual   = SequenceUtilities.GetLongestSubsequenceOfEqualNumbers(testSequence);
            var expected = new List <int>()
            {
                5, 5, 5
            };

            CollectionAssert.AreEqual(expected, actual);
        }
Пример #14
0
        private static string GetDownstreamSeq(IInterval simpleVariant, IInterval rotateRegion,
                                               ISequence refSequence, bool onReverseStrand, string rotatingBases)
        {
            int basesToEnd       = onReverseStrand ? simpleVariant.Start - rotateRegion.Start : rotateRegion.End - simpleVariant.End;
            int downStreamLength =
                Math.Min(basesToEnd,
                         Math.Max(rotatingBases.Length,
                                  MaxDownstreamLength)); // for large rotatingBases, we need to factor in its length but still make sure that we do not go past the end of transcript

            string downStreamSeq = onReverseStrand
                ? SequenceUtilities.GetReverseComplement(
                refSequence.Substring(simpleVariant.Start - 1 - downStreamLength, downStreamLength))
                : refSequence.Substring(simpleVariant.End, downStreamLength);

            return(downStreamSeq);
        }
Пример #15
0
        private long CollatzLengt(long n)
        {
            long len      = 0;
            var  original = n;

            while (n > 1)
            {
                if (_memory.TryGetValue(n, out long value))
                {
                    return(len + value);
                }
                len++;
                n = SequenceUtilities.Collatz(n);
            }
            len++;
            _memory[original] = len;
            return(len);
        }
Пример #16
0
        public string GetCodingSequence()
        {
            if (_sequence != null)
            {
                return(_sequence);
            }

            var sb = StringBuilderCache.Acquire(Length);

            // account for the exon phase (forward orientation)
            if (_startExonPhase > 0 && !_geneOnReverseStrand)
            {
                sb.Append('N', _startExonPhase);
            }

            foreach (var region in _regions)
            {
                // handle exons that are entirely in the UTR
                if (region.Type != TranscriptRegionType.Exon || region.End < _codingRegion.Start || region.Start > _codingRegion.End)
                {
                    continue;
                }
                AddCodingRegion(region, sb);
            }

            // account for the exon phase (reverse orientation)
            if (_startExonPhase > 0 && _geneOnReverseStrand)
            {
                sb.Append('N', _startExonPhase);
            }
            if (_geneOnReverseStrand)
            {
                var revComp = SequenceUtilities.GetReverseComplement(sb.ToString());
                sb.Clear();
                sb.Append(revComp);
            }
            //RNA edits for transcripts on reverse strand come with reversed bases. So, no positional or base adjustment necessary
            // ref: unit test with NM_031947.3, chr5:140682196-140683630
            ApplyRnaEdits(sb);
            _sequence = StringBuilderCache.GetStringAndRelease(sb);

            return(_sequence);
        }
Пример #17
0
        /// <summary>
        /// extracts the coding sequence corresponding to the listed exons
        /// </summary>
        public string Sequence()
        {
            _sb.Clear();

            // account for the exon phase (forward orientation)
            if (_startExonPhase > 0 && !_geneOnReverseStrand)
            {
                _sb.Append('N', _startExonPhase);
            }

            foreach (var map in _cdnaMaps)
            {
                // handle exons that are entirely in the UTR
                if (map.GenomicEnd < _start || map.GenomicStart > _end)
                {
                    continue;
                }

                int tempBegin = map.GenomicStart;
                int tempEnd   = map.GenomicEnd;

                // trim the first and last exons
                if (_start >= tempBegin && _start <= tempEnd)
                {
                    tempBegin = _start;
                }
                if (_end >= tempBegin && _end <= tempEnd)
                {
                    tempEnd = _end;
                }

                _sb.Append(_sequence.Substring(tempBegin - 1, tempEnd - tempBegin + 1));
            }

            // account for the exon phase (reverse orientation)
            if (_startExonPhase > 0 && _geneOnReverseStrand)
            {
                _sb.Append('N', _startExonPhase);
            }

            return(_geneOnReverseStrand ? SequenceUtilities.GetReverseComplement(_sb.ToString()) : _sb.ToString());
        }
Пример #18
0
        /// <summary>
        /// Retrieves all Exon sequences and concats them together.
        /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq]
        /// </summary>
        private static string GetSplicedSequence(ICompressedSequence compressedSequence, CdnaCoordinateMap[] cdnaMaps, bool onReverseStrand)
        {
            var sb = new StringBuilder();

            foreach (var exon in cdnaMaps)
            {
                var exonLength = exon.GenomicEnd - exon.GenomicStart + 1;

                // sanity check: handle the situation where no reference has been provided
                if (compressedSequence == null)
                {
                    sb.Append(new string('N', exonLength));
                    continue;
                }

                sb.Append(compressedSequence.Substring(exon.GenomicStart - 1, exonLength));
            }

            return(onReverseStrand ? SequenceUtilities.GetReverseComplement(sb.ToString()) : sb.ToString());
        }
Пример #19
0
        /// <summary>
        /// return a string representing the cDNA-level effect of this allele in HGVS format [TranscriptVariationAllele.pm:568 hgvs_transcript]
        /// </summary>
        public void SetAnnotation()
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (_variant.IsReference || SequenceUtilities.HasNonCanonicalBase(_ta.TranscriptAlternateAllele))
            {
                return;
            }

            GetGenomicChange(_transcript, _hgvsNotation, _isGenomicDuplicate);

            GetCdnaPosition(_hgvsNotation.Start);
            if (_hgvsStart == _hgvsEnd)
            {
                _hgvsNotation.End = _hgvsNotation.Start;
            }
            else
            {
                GetCdnaPosition(_hgvsNotation.End);
            }

            // sanity check: make sure we have coordinates
            if (_hgvsNotation.Start.Position == null || _hgvsNotation.End.Position == null)
            {
                return;
            }

            var transcriptLen = _transcript.End - _transcript.Start + 1;

            //_hgvs notation past the transcript
            if (_hgvsNotation.Start.Position > transcriptLen || _hgvsNotation.End.Position > transcriptLen)
            {
                return;
            }

            // make sure that start is always less than end
            SwapEndpoints(_hgvsNotation);

            // generic formatting
            _ta.HgvsCodingSequenceName = FormatHgvsString();
        }
Пример #20
0
        /// <summary>
        /// return a string representing the protein-level effect of this allele in HGVS format [TranscriptVariationAllele.pm:717 hgvs_protein]
        /// </summary>
        public void SetAnnotation()
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (_variant.IsReference || !_ta.HasValidCdsEnd || !_ta.HasValidCdsEnd ||
                SequenceUtilities.HasNonCanonicalBase(_ta.TranscriptAlternateAllele))
            {
                return;
            }

            // check if this is a stop retained variant
            if (_variantEffect.IsStopRetained())
            {
                _ta.HgvsProteinSequenceName = $"{_ta.HgvsCodingSequenceName}(p.=)";
                return;
            }

            // clip the alleles
            AminoAcids.RemovePrefixAndSuffix(_hgvsNotation);

            // set the protein change
            _hgvsNotation.Type = GetGeneralProteinChange();

            if (_hgvsNotation.Type != ProteinChange.None)
            {
                _hgvsNotation.Type = GetSpecificProteinChange();

                // convert ref & alt peptides taking into account HGVS rules
                GetHgvsPeptides(_ta);
            }

            // no protein change - return transcript nomenclature with flag for neutral protein consequence
            if (_hgvsNotation.Type == ProteinChange.None)
            {
                _ta.HgvsProteinSequenceName = $"{_ta.HgvsCodingSequenceName}(p.=)";
                return;
            }

            // string formatting
            _ta.HgvsProteinSequenceName = GetHgvsProteinFormat(_ta);
        }
Пример #21
0
        private void CheckNonCanonicalSpliceSurr(IAnnotatedTranscript ta, Transcript transcript,
                                                 HashSet <LofteeFilter.Filter> filters, ICompressedSequence sequence)
        {
            if (ta.Exons == null)
            {
                return;
            }
            int affectedExonIndex = Convert.ToInt32(ta.Exons.Split('/').First().Split('-').First());
            var totalExons        = transcript.CdnaMaps.Length;

            string surrDonor    = null;
            string surrAcceptor = null;

            if (totalExons <= 1)
            {
                return;
            }

            var onReverseStrand = transcript.Gene.OnReverseStrand;

            if (affectedExonIndex > 1)
            {
                var intron        = onReverseStrand ? transcript.Introns[totalExons - affectedExonIndex] : transcript.Introns[affectedExonIndex - 2];
                int acceptorStart = onReverseStrand ? intron.Start : intron.End - 1;
                var acceptorSeq   = sequence.Substring(acceptorStart - 1, 2);
                surrAcceptor = onReverseStrand ? SequenceUtilities.GetReverseComplement(acceptorSeq) : acceptorSeq;
            }

            if (affectedExonIndex < totalExons)
            {
                var intron     = onReverseStrand ? transcript.Introns[totalExons - affectedExonIndex - 1] : transcript.Introns[affectedExonIndex - 1];
                int donorStart = onReverseStrand ? intron.End - 1 : intron.Start;
                var donorSeq   = sequence.Substring(donorStart - 1, 2);
                surrDonor = onReverseStrand ? SequenceUtilities.GetReverseComplement(donorSeq) : donorSeq;
            }

            if (surrAcceptor != null && surrAcceptor != "AG" || surrDonor != null && surrDonor != "GT")
            {
                filters.Add(LofteeFilter.Filter.non_can_splice_surr);
            }
        }
Пример #22
0
        private void CheckNagnagSite(Transcript transcript, IAnnotatedAlternateAllele allele,
                                     HashSet <LofteeFilter.Flag> flags, ICompressedSequence sequence)
        {
            if (allele.ReferenceBegin == null || allele.ReferenceEnd == null ||
                allele.ReferenceBegin.Value != allele.ReferenceEnd.Value)
            {
                return;
            }

            int pos = allele.ReferenceBegin.Value;

            string upStreamSeq   = sequence.Substring(pos - 6, 6);
            string downStreamSeq = sequence.Substring(pos, 5);

            var combineSeq = transcript.Gene.OnReverseStrand
                ? SequenceUtilities.GetReverseComplement(upStreamSeq + downStreamSeq)
                : upStreamSeq + downStreamSeq;

            if (Regex.Match(combineSeq, "[A|T|C|G]AG[A|T|C|G]AG").Success)
            {
                flags.Add(LofteeFilter.Flag.nagnag_site);
            }
        }
Пример #23
0
 public void NonCanonical(string bases)
 {
     Assert.True(SequenceUtilities.HasNonCanonicalBase(bases));
 }
Пример #24
0
        public static ISimpleVariant Right(ISimpleVariant simpleVariant, IInterval rotateRegion, ISequence refSequence, bool onReverseStrand)
        {
            if (refSequence == null)
            {
                return(simpleVariant);
            }

            if (simpleVariant.Type != VariantType.deletion && simpleVariant.Type != VariantType.insertion)
            {
                return(simpleVariant);
            }

            if (VariantStartOverlapsRegion(simpleVariant, rotateRegion, onReverseStrand))
            {
                return(simpleVariant);
            }
            // if variant is before the transcript start, do not perform 3 prime shift

            string rotatingBases = GetRotatingBases(simpleVariant, onReverseStrand);

            string downStreamSeq = GetDownstreamSeq(simpleVariant, rotateRegion, refSequence, onReverseStrand, rotatingBases);

            string combinedSequence = rotatingBases + downStreamSeq;

            int shiftStart, shiftEnd;
            var hasShifted = false;

            // probably a VEP bug, just use it for consistency
            int numBases = rotatingBases.Length;

            for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++)
            {
                if (combinedSequence[shiftStart] != combinedSequence[shiftEnd])
                {
                    break;
                }
                hasShifted = true;
            }

            if (!hasShifted)
            {
                return(simpleVariant);
            }

            // create a new alternative allele
            string rotatedSequence = combinedSequence.Substring(shiftStart, numBases);
            int    rotatedStart    = simpleVariant.Start + shiftStart;
            int    rotatedEnd      = simpleVariant.End + shiftStart;

            if (onReverseStrand)
            {
                rotatedSequence = SequenceUtilities.GetReverseComplement(rotatedSequence);
                rotatedStart    = simpleVariant.Start - shiftStart;
                rotatedEnd      = simpleVariant.End - shiftStart;
            }

            string rotatedRefAllele = simpleVariant.RefAllele;
            string rotatedAltAllele = simpleVariant.AltAllele;

            if (simpleVariant.Type == VariantType.insertion)
            {
                rotatedAltAllele = rotatedSequence;
            }
            else
            {
                rotatedRefAllele = rotatedSequence;
            }

            return(new SimpleVariant(simpleVariant.Chromosome, rotatedStart, rotatedEnd, rotatedRefAllele,
                                     rotatedAltAllele, simpleVariant.Type));
        }
Пример #25
0
        /// <summary>
        /// get the genomic change that resulted from this variation [Sequence.pm:482 hgvs_variant_notation]
        /// </summary>
        private void GetGenomicChange(Transcript transcript, HgvsNotation hn, bool isGenomicDuplicate)
        {
            hn.Type = GenomicChange.Unknown;

            // make sure our positions are defined
            if (hn.Start.Position == null || hn.End.Position == null)
            {
                return;
            }

            int displayStart = (int)hn.Start.Position;
            int displayEnd   = (int)hn.End.Position;

            // length of the reference allele. Negative lengths make no sense
            int refLength = displayEnd - displayStart + 1;

            if (refLength < 0)
            {
                refLength = 0;
            }

            // length of alternative allele
            var altLength = hn.AlternateBases.Length;

            // sanity check: make sure that the alleles are different
            if (hn.ReferenceBases == hn.AlternateBases)
            {
                return;
            }

            // deletion
            if (altLength == 0)
            {
                hn.Type = GenomicChange.Deletion;
                return;
            }

            if (refLength == altLength)
            {
                // substitution
                if (refLength == 1)
                {
                    hn.Type = GenomicChange.Substitution;
                    return;
                }

                // inversion
                var rcRefAllele = SequenceUtilities.GetReverseComplement(hn.ReferenceBases);
                hn.Type = hn.AlternateBases == rcRefAllele ? GenomicChange.Inversion : GenomicChange.InDel;
                return;
            }

            // If this is an insertion, we should check if the preceeding reference nucleotides
            // match the insertion. In that case it should be annotated as a multiplication.
            if (refLength == 0)
            {
                int prevPosition = displayEnd - altLength;

                if (!isGenomicDuplicate && _compressedSequence != null && prevPosition >= 0)
                {
                    // Get the same number of nucleotides preceding the insertion as the length of
                    // the insertion
                    var precedingBases = SequenceUtilities.GetSubSubstring(transcript.Start, transcript.End,
                                                                           transcript.Gene.OnReverseStrand, prevPosition, prevPosition + altLength - 1, _compressedSequence);
                    if (precedingBases == hn.AlternateBases)
                    {
                        isGenomicDuplicate = true;
                    }
                }

                if (isGenomicDuplicate)
                {
                    hn.Type = GenomicChange.Duplication;

                    // for duplication, the hgvs positions are deceremented by alt allele length
                    var incrementLength = altLength;
                    hn.Start.Position = displayStart - incrementLength;
                    hn.End.Position   = hn.Start.Position + incrementLength - 1;

                    hn.AlleleMultiple = 2;
                    hn.ReferenceBases = hn.AlternateBases;
                    return;
                }

                // otherwise just an insertion
                hn.Type           = GenomicChange.Insertion;
                hn.Start.Position = displayEnd;
                hn.End.Position   = displayStart;
                return;
            }

            // Otherwise, the reference and allele are of different lengths. By default, this is
            // a delins but we need to check if the alt allele is a multiplication of the reference.
            // Check if the length of the alt allele is a multiple of the reference allele
            if (altLength % refLength == 0)
            {
                hn.AlleleMultiple = altLength / refLength;
                string multRefAllele = string.Concat(Enumerable.Repeat(hn.ReferenceBases, hn.AlleleMultiple));

                if (hn.AlternateBases == multRefAllele)
                {
                    hn.Type = hn.AlleleMultiple == 2 ? GenomicChange.Duplication : GenomicChange.Multiple;
                    return;
                }
            }

            // deletion/insertion
            hn.Type = GenomicChange.InDel;
        }
Пример #26
0
        public void HasNonCanonicalBase(string bases, bool expectedResult)
        {
            var observedResult = SequenceUtilities.HasNonCanonicalBase(bases);

            Assert.Equal(expectedResult, observedResult);
        }
Пример #27
0
        public void GetReverseComplement(string bases, string expectedResult)
        {
            var observedResult = SequenceUtilities.GetReverseComplement(bases);

            Assert.Equal(expectedResult, observedResult);
        }
Пример #28
0
        public void FindMotif_Tests(string needle, string haystack, IEnumerable <int> expected)
        {
            var actual = SequenceUtilities.FindMotif(needle, haystack).ToList();

            CollectionAssert.AreEquivalent(expected, actual);
        }
Пример #29
0
        public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence,
                                                int regionStart, int regionEnd, string transcriptRef, string transcriptAlt)
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele))
            {
                return(null);
            }

            // do not report HGVSc notation when variant lands inside gap region
            if (regionStart > -1 && regionEnd > -1)
            {
                var startRegion = transcript.TranscriptRegions[regionStart];
                var endRegion   = transcript.TranscriptRegions[regionEnd];
                if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap &&
                    endRegion.Type == TranscriptRegionType.Gap)
                {
                    return(null);
                }
            }

            bool onReverseStrand = transcript.Gene.OnReverseStrand;

            string refAllele = string.IsNullOrEmpty(transcriptRef)? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele
                : transcriptRef;
            string altAllele = string.IsNullOrEmpty(transcriptAlt)
                ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele
                : transcriptAlt;

            // decide event type from HGVS nomenclature
            var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant);

            int variantStart = variant.Start;
            int variantEnd   = variant.End;

            if (genomicChange == GenomicChange.Duplication)
            {
                (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand);
            }

            var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart, true);
            var endPositionOffset   = variantStart == variantEnd
                ? startPositionOffset
                : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd, false);

            if (onReverseStrand)
            {
                var tmp = startPositionOffset;
                startPositionOffset = endPositionOffset;
                endPositionOffset   = tmp;
            }

            if (startPositionOffset == null && variant.Type == VariantType.insertion)
            {
                startPositionOffset = new PositionOffset(endPositionOffset.Position + 1, endPositionOffset.Offset, $"{endPositionOffset.Position + 1}", endPositionOffset.HasStopCodonNotation);
            }

            // sanity check: make sure we have coordinates
            if (startPositionOffset == null || endPositionOffset == null)
            {
                return(null);
            }

            var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange,
                                                 startPositionOffset, endPositionOffset, transcript.Translation != null);

            // generic formatting
            return(hgvsNotation.ToString());
        }