Ejemplo n.º 1
0
        public void Create()
        {
            // ENST00000374673.3
            var sequence = new SimpleSequence(
                "GGGGTGTGTCTCCAGGGCCTTCCGCACTCAGCCAGGGAGAGCAAACAAACAGGCTTGGGGGACTGGGGAGGGGGGAAAGCGGAGGGGCAGGGTAGGGGCGGGGCAGGAGTGGAAGGCGGGGCAGGAGCAAGCGGCCTGGGCAGGGCAAGGGGGCCTCAGCTGGACCCTCGGATACTCACGGCAGTTGGCTTCATCAGTTCGGTCCTCACAGTCAAAGTCACCATCGCAGCGCCACAGCTTGAGGGCACAATGTCCATTCCCGCAGGGGAACTCGTTGGGCTCACAGGGTGGCGGGGGGCCTAGGAGACCGGGCAGGGGTCAGCAGCATCCTCCCGGGCCAGCTTCCTGCTCCCCGCACCCACCTGCACCCCTGCCGGTGCGCACCACAGTCTAGCTCATCGCTGCCGTCCTCGCAGTCCTCCTGTCCGTCGCAGAGGTAGTCTCTGGGGATGCAGTGCCCATTGCGGCATGCGGCCTCCTGGGGCCCACAGGGCAGGGGCCTGACGGAACCGGGAAGCAGGGGCTGAGGAGCGTGGGTGACTGGTGGCTGTCGCATGATGGTTGTCTCTGGCCGGGGCGGTAAAGATGTCGTCTCCACAAGGAGAGAGAATGTGGGGCTGATACCCAGGACTGGCTCCTCTGTGGATAGATTCCGCTTGGCATTTGGCAGAAGCAGATGGCTCCTCACCTGCTCCTTGTCCCCAACCCTCCCCAGGCCCACCCTGTACTCCCCAACACCACTCCCTGCCACCCCCTGCCTGGCTCTGTCATCACCCTTCCTATGCCCCCATCCTCTGCCTGCACCAAACCCTCATAGTCCTTGATGGGCTCCAAGACCCAGGTGTAGGACCCTGGCCCTCCCCTGGCACCCAAACCACTCGTGGCCCCGGACATCCCCTCACCACAATTGAGCTCATCAGACATGTCCCTGCAGTCGGGCCGCCGGTCACAGCGATACTCCAGGGCCACACACTCATTGTAGCTGTGGCAGGCAAACTCGGCCTCCGTGCAGGCTCTTGGGAACTGGGGCACTGCAGGTGGAAAGGAAGCAGACTGGAGTCAGAGGCGGCAGGAGGCAGGTGCGGGAAGCTGTAGGTGCTGTGTGGCTGGAGTGGGCTCCAGGGCCCTGTGTCAGGCAGCTCGGTTTCTGGCAGGCACAACGAGGGCAAGCAGCACACACTAGACACATCCACAGCACACGTGGGGCATGGGACATGCGGCAGTGGCCTCCCCCATCTCTAAAACAGACCCCACACACAGTTGACATGCCACACGCATGCAACCACCACACCACACACATGCAGGCCACAGCCTGGCCCAGTGAGGACAAAGAAGGAGGGGAGAAGGGAGTGCCCAGCTGTCTTGGGCTGTGCCCAGCCAGCCATCTTGCCCACACCCTTCTTTCCTCTCCATCCTTTAAAAAATTTTTTTCTCTCTTCTTTTTTATTTTTTTAGAGACAGGGTCTTGCTACGTTGCCCAAGCTGGTCTCGAACTCTTTGCCTCAAGCAATCCTCCCGTCTTGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAGCCCCTGCACCCGGCCTCCTCTCCAACCTTAACTTCTCTAGGAACCTGGCTGGGCCTCGGCCTGGCTTACACTCTCACCTGGTGTCACTGCGACCGCCACAGCGGCCGGCGGGGGTGGGGGGGTCTGTGCTGGAAAGGAAGATGTGATCAGTGGCTGTTCCACCTGGGAGCCGGGAGCTGAGGGCTGCAGGGCTGGGCCACATTCCACCATCCCTAGCCAGGAGGACTTATTGAAAAGTGAGAGAGGAGGGCTGGACCCCCAGCAGTCTTTAGACCTGGGCCTGATGATGCAGAAGAGCAAGCTTGATCTCTGGGTGCAATAATTAAGGGTTTTTGTTTGTTTGTCTTGTTTTAGAGGCAGGGTTTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCCTAGCTCACTGCAGCCTCAAACTCCTGGGCTCCGGTGATCCTC",
                22213528);

            var codingRegion = new CodingRegion(22213728, 22215214, 1, 538, 538);

            var regions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 4, 22213728, 22213827, 439, 538),
                new TranscriptRegion(TranscriptRegionType.Intron, 3, 22213828, 22213912, 438, 439),
                new TranscriptRegion(TranscriptRegionType.Exon, 3, 22213913, 22214167, 184, 438),
                new TranscriptRegion(TranscriptRegionType.Intron, 2, 22214166, 22214430, 183, 184),
                new TranscriptRegion(TranscriptRegionType.Exon, 2, 22214431, 22214559, 55, 183),
                new TranscriptRegion(TranscriptRegionType.Intron, 1, 22214560, 22215160, 54, 55),
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22215161, 22215214, 1, 54)
            };

            const string expectedResults = "NCAGCACAGACCCCCCCACCCCCGCCGGCCGCTGTGGCGGTCGCAGTGACACCAGTGCCCCAGTTCCCAAGAGCCTGCACGGAGGCCGAGTTTGCCTGCCACAGCTACAATGAGTGTGTGGCCCTGGAGTATCGCTGTGACCGGCGGCCCGACTGCAGGGACATGTCTGATGAGCTCAATTGTGAGGAGCCAGTCCTGGGTATCAGCCCCACATTCTCTCTCCTTGTGGAGACGACATCTTTACCGCCCCGGCCAGAGACAACCATCATGCGACAGCCACCAGTCACCCACGCTCCTCAGCCCCTGCTTCCCGGTTCCGTCAGGCCCCTGCCCTGTGGGCCCCAGGAGGCCGCATGCCGCAATGGGCACTGCATCCCCAGAGACTACCTCTGCGACGGACAGGAGGACTGCGAGGACGGCAGCGATGAGCTAGACTGTGGCCCCCCGCCACCCTGTGAGCCCAACGAGTTCCCCTGCGGGAATGGACATTGTGCCCTCAAGCTGTGGCGCTGCGATGGTGACTTTGACTGTGAGGACCG";
            var          codingSequence  = new CodingSequence(sequence, codingRegion, regions, true, 1);
            var          observedResults = codingSequence.Substring(0, expectedResults.Length);

            Assert.Equal(expectedResults, observedResults);
        }
Ejemplo n.º 2
0
        public void IdentifyConflictingItems()
        {
            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + "TAAGCCAGCCAGCCAGCCAAGCTGGCCAAGCCAGACAGGCAGCCAAGCCAACCAAGACACCCAGGCAGCCAAGCCAGC", 16558315 - VariantUtils.MaxUpstreamLength);

            var refNameToChrom = new Dictionary <string, IChromosome> {
                { "22", Chrom22 }
            };

            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, refNameToChrom);

            var gnomadReader = new GnomadReader(new StreamReader(GetConflictingItemsStream()), sequenceProvider);

            var items = new List <ISupplementaryDataItem>();

            foreach (GnomadItem item in gnomadReader.GetItems())
            {
                //item.Trim();
                if (item.Position == 16558315)
                {
                    items.Add(item);
                }
            }

            items = SuppDataUtilities.RemoveConflictingAlleles(items, false);

            //two if the items were removed as conflicting items
            Assert.Equal(3, items.Count);
        }
Ejemplo n.º 3
0
        public void RnaEdits_in_coding_sequence_reverse_insertion()
        {
            //NM_000682.6, chrom: chr2:96778623-96781984
            var genomicSeq   = new SimpleSequence("CTTATTACAAAATATCCTTTATTGATAAAATAGCTCAGAGTTTAAAAAAAAAAAAAACACCACCTGCATGTCGCAATAAGAGGTCACAGGCAAGAACACTGGGGGTCCCATGGGGCGCACACAAGACCGGCCAGCAGAGGGTCACAGTCAGTCCCTCTCCTGGCCCAGCTCCCCACCACATCCCAGGGCGATACTCTGGCCTCAACAACCCACTGAGGACCAAGCTGGGAAGCCTCCCACACCCCAGGAAGGACTCTTTTTGGTCCCCTCCATTCTCTCTACACCCAGAAAACTCCCTCGGTGCCCTTCCAAATCTAGCAGGTCCATCTGGCCCATTCCCCCGACACCTGCCAAGCTAAGATGCCTACTGGCCCAATGTTGAAGCCAGGCCCTCTCCAAGGGAAGGCCGATAAACCTCCTTTCCACACTTCCAACTGTTCTGGGTGCCAGGTTTTGGGGTGGGACTGAGAACCAGGAAGCAGGGGTCCTCAATGCACAGCCCCATCAGCATTGCGGGGAGCAGCGTGGCTGGGTCCGAGGCAGTCCACAAGCACCCACCTGGGGGGATCAGTTGTGGTTCACAAGGACTCATTTGGGGCTTGGAGACCTGGCCGGGCACTCCAGTGGGAGGCTCCCCTAGGGGCGCACCAGGCTCTGATGCCAGTACCCCACCTGGGGGCGCTGCCACCTGTCACAGGCTCTCATCTTAGACTGTTGCCGAGGTGTGGATATTTTGAGCTGTCTTGGGGAGACAATTTGCCTCCTTGATGACAAAAGACTTATCCCCCACTGGGGAGACCCAAGCCACTAAAAACCCTCTTGGTGTTGCCGGTGAAATGTCGAAACGTTGTCATGTAGCGTAATAACTCAGACCTTTGCAGCCAGAAGAACACATTCTCAAAGAGATCCTTTAACTTGAAATAGTGATTCTGTCTGCCACTCCCGGCTTCCAGTTCGGGGTAGGAATTCACACACCCCAGGGACAGAACAAAAGTCTACAGGAAGACAGGTGGTGGTAAACACAGAGGAAAGGGATTTTTATATCACCATATAATCACATTTTTGGTTCTCTAGTGTGTTCCCCCACAGAGCTCAAAGCTTTCTGCAAAGCCTTTCATCTCCCTGCAGCAAGTAGGCAGTGAGCTATTGTCGCCCCGATTTTTGCAGGGGGTGAATGCCAGTGATCGGGGATCTCCCGTCGAGGCAGAGACCAGGCCTCCAAGACCGCCCCAGCGAGGCATCCACGTGGCCACCCACCTACCGGAGGGGTGCTGGGTAAGGAAGCCGATCCATTGTTCTGGCTTTCAAAGGAACCACAGATCCGAAAACAGGCAAAGGGGGAAAGGAGGGCCCAGAGACGATGCCACCCCATAAGCCCCCATCCCAGCGCCTGCCAGGGACCGCGAGTGCCTAGCGTGGGTGATCAGTCTTCGTTTCTTCCTCCCCCTCAGCAGCAGGCCCCACTGGGAAAAGTGGAAGGCTGGCTCCGTGCTCTTTGTGGGTGGGGGGGAGATGAAAAAGAAACGAAAACACCACAAGCAAGTGACCTGCCAGGAACACAAGGTCCTCAAGAAAGGGAAGCCCAGACATTGGTCTGGAGAGCATGGGGCTCTGGGAAGAAAGTGCTCTCTCTTCTCCTGGTCTTGGCTATGTTCCAGAGGATTTGAACCACCTCCATCGGCCTGTGCTCAGGGAGAGGGTGGAGAAGGGGTCCCCCACAGCTAAGCCGGCAAGGGGAAGCTTCACTGGGACCCTTGCTAGCAGCCCCCCTGCCCACCCCTCCCAAGGGGTTCCTAAGATGAGGCCTACAGGATCTGGGCAGGGAGCAGAAAGCCCAGGGGAGGCAGCCACACACAGCAGGGCAAGAAGCAGGGTGACCCCGGCGCCACCGCACCAACCCCACAGGGGCAGCGCAGGCGGGCTCACCAGGCCGTCTGGGTCCACGGGCGGCACAGGATCCTCCGGAAGGCACGGCGGAAGTCCTGGTTGAAGATGGTGTAGATAACAGGGTTCAGTGAGCTGTTGCAGTAGCCGATCCAGAAGAAGAACTGGAAGAGGCCATGGGGCACCTTGCAGTGCTTCGGGCAGATGGCTCCCAGGCTGTAGCTGAAGAAGAAGGGGAACCAGCAGAGCACAAAAACGCCAATGACCACAGCCAGCACGAAGGTGAAGCGCTTCTCCCGGGTCAGCTGCGCCCGTCGACGCCACCACTGCCCACCTATAGCACCCACGCCCCTGCCCAGGAGCACCTGGCCACGTAGGGTGGCCAGCACCCGGGAGCCCTGTGGCTGCTGCAGCGGGGGGCTGCAAGCTGAGGCCGGAGACACTGGCACTGCCTGGGGTTCACACTCTTCCTCCTCCTCCTCCTCCTCTTCAGCTTCATCCTCTGGAGATGCCCCACAAACACCCTCCTTCTGGCCCTGGCCTGAGTTGGGAAGGGCAGCCCAACTGGGTGGCAAGGCCCGGGTCCCAGTATCTTCAGGGGTCTCCCCCTCCTCCTTCTCCCCAGTGGACTTCGAGTGTCCGTTGACCTCTCTGGCAGAAGCCACAGAGGCCAGGGCTGGCAGTTTGGCTGAGGCCAAAGCCCCACCATGGTCGGGTCGGGGCTGCTTGGACTCACCCTGCCCAGGCCCCCCCTTGGCCCTGGGACCTCTGCGGTTGCTGCGTTTGGCGATCAGGTAGATGCGCAGGTAGACAAGGATCATGATGAGGCAAGGAGCAAAGAAAGATCCGATGCTGGAGGCCAGGATGTACCAGGCCTCCTGGTTGAGCTTGCACTGGGGGCGCCCGCGCGGCTGGGGGCCCTGGTCGCCCTTGTAGATGAGGGGCGGCAGCGAGATGACGGCGGCGATGAGCCACACAGTGAGGATGATGCACTTGATGCGGCGCGGGGTGCGCTTGGAGTTGTACTCCAGCGCGCGGCTCACGGCCCAGTAGCGGTCCAGGCTGATGGCGCACAGGTGCACGATGGACGAGGTGCAGAAGAGCACGTCGAGCGCCAGGTACACCTCGCACCACGTGCGCCGGAAGTACCAGTAGCCCAGCAGCTCGTTGGCCAGCGAGAAAGGGATGATGAGCGTGGCCACCAGGATGTCGGCGGCGGCCAGCGACACCAGGAACAGGTTCTGAGGGGCGCGCAGCGAGCGGCTGGTCAACACAGCCAGGATGACCAGAGCGTTGCCGAAGATGGTAAAGAGAATGAGGAAGGTGATGGCCGCCGCTATGGCCGCTGTGGCCTGCACGGAGTAGGGGTCCTGGTGGTCCATGACGGGGCGGGAGGTGGGCAGAGGGAGCGCTGCCCGCCCAGTGCGCACCGTGGACGACAGCGCTGCCCGGCTCGGCTAGACAAGAGCGTCGCCCCT", 96778623 - 1);
            var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1344);

            var regions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 96778623, 96780986, 1008, 3371),
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987, 96781984, 1, 998)
            };

            var rnaEdits = new IRnaEdit[]
            {
                new RnaEdit(999, 998, "AGAGGAGGA")
            };
            const byte startExonPhase  = 0;
            const bool onReverseStrand = true;
            var        codingSequence  = new CdnaSequence(genomicSeq, codingRegion, regions, onReverseStrand, rnaEdits);

            var expectedCodingSeq = "ATGGACCACCAGGACCCCTACTCCGTGCAGGCCACAGCGGCCATAGCGGCGGCCATCACCTTCCTCATTCTCTTTACCATCTTCGGCAACGCTCTGGTCATCCTGGCTGTGTTGACCAGCCGCTCGCTGCGCGCCCCTCAGAACCTGTTCCTGGTGTCGCTGGCCGCCGCCGACATCCTGGTGGCCACGCTCATCATCCCTTTCTCGCTGGCCAACGAGCTGCTGGGCTACTGGTACTTCCGGCGCACGTGGTGCGAGGTGTACCTGGCGCTCGACGTGCTCTTCTGCACCTCGTCCATCGTGCACCTGTGCGCCATCAGCCTGGACCGCTACTGGGCCGTGAGCCGCGCGCTGGAGTACAACTCCAAGCGCACCCCGCGCCGCATCAAGTGCATCATCCTCACTGTGTGGCTCATCGCCGCCGTCATCTCGCTGCCGCCCCTCATCTACAAGGGCGACCAGGGCCCCCAGCCGCGCGGGCGCCCCCAGTGCAAGCTCAACCAGGAGGCCTGGTACATCCTGGCCTCCAGCATCGGATCTTTCTTTGCTCCTTGCCTCATCATGATCCTTGTCTACCTGCGCATCTACCTGATCGCCAAACGCAGCAACCGCAGAGGTCCCAGGGCCAAGGGGGGGCCTGGGCAGGGTGAGTCCAAGCAGCCCCGACCCGACCATGGTGGGGCTTTGGCCTCAGCCAAACTGCCAGCCCTGGCCTCTGTGGCTTCTGCCAGAGAGGTCAACGGACACTCGAAGTCCACTGGGGAGAAGGAGGAGGGGGAGACCCCTGAAGATACTGGGACCCGGGCCTTGCCACCCAGTTGGGCTGCCCTTCCCAACTCAGGCCAGGGCCAGAAGGAGGGTGTTTGTGGGGCATCTCCAGAGGATGAAGCTGAAGAGGAGGAAGAGGAGGAGGAGGAGGAGGAAGAGTGTGAACCCCAGGCAGTGCCAGTGTCTCCGGCCTCAGCTTGCAGCCCCCCGCTGCAGCAGCCACAGGGCTCCCGGGTGCTGGCCACCCTACGTGGCCAGGTGCTCCTGGGCAGGGGCGTGGGTGCTATAGGTGGGCAGTGGTGGCGTCGACGGGCGCAGCTGACCCGGGAGAAGCGCTTCACCTTCGTGCTGGCTGTGGTCATTGGCGTTTTTGTGCTCTGCTGGTTCCCCTTCTTCTTCAGCTACAGCCTGGGAGCCATCTGCCCGAAGCACTGCAAGGTGCCCCATGGCCTCTTCCAGTTCTTCTTCTGGATCGGCTACTGCAACAGCTCACTGAACCCTGTTATCTACACCATCTTCAACCAGGACTTCCGCCGTGCCTTCCGGAGGATCCTGTGCCGCCCGTGGACCCAGACGGCCTGGTGA";

            var rnaEditLength = rnaEdits[0].Bases.Length;

            Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart - 1, codingRegion.Length + rnaEditLength));
        }
Ejemplo n.º 4
0
        public void With_rnaEdits_reverse_deletion_utr()
        {
            //NM_001317107.1 chr14:22138125-22139232
            var genomicSeq   = new SimpleSequence("ATATGGTATGTAACTTATTCTTTGCAAGGCGCTTCTTTAATTTGGAGCACCACGTATCCTAAGGACGTAGACATTTTCATTTTTCTTCTTTTCTCTCTTTTCTCCCCACTAACTTGTTTAAGGCACTCTTCATTTCTTCATTCCTAAGGGTATAGATAATGGGGTTCAGCAGGGGGGTGACTGCAGTGAAAAACACAGATACTGCCTTGTCCTCTGGGAGGCTGGTGGATGGGCGGGAATAGATGAAGATGCAGTGTCCCAGGAACAGTGTAACTACAGTGAGATGGGCTGCACAGGTGGACAGGGCCTTCCACTTGCCCTTGGAGATCTGCTGCCTCAGACTCACCAGGATGACTGCGTAGGACACCACCAGGACCACAAAACAGACCACGGAGATCAATCCACTGTTGGAGACAATGAGGATCTCAAGGACGTGGGTGTGTCAATGCAGGCCAGCTTGATCACCTGAGGTACATCACAGAAGAAGTTGTCAATCTCATCAGGACCACAGTAGGGCAGCTTGATGGTAAGGGAGGTGAGGGCTATGGAGTGGATGGTCCCTCCTGTCCAGAGGGCCACAGCCAGCAGCACACATACCTTCCAGTTCATCACTATCATGTACTGCAGGGGTTTACAGATGGCCACATACCGATCATAGGCCATGACGGTGAGGAGGAAGATCTCTGTGCAGGCAAAGAGGTGCAGGAAGAACATCTGGGTCACACAGGCATCAAAAGAGATGAGCTTTTCCTCTGACCACACGTCTCTCAGCATCTTGGGGACAGTGACAGTGGAGTGGCAGACATCAATAAAGGACAGGTTGCTGAGGAAGAAATACATGGGAGTATGGAGCCGGTGGTCATAGATAATAGTTATGACAATGAGAACATTCCCAATCAGTGTCAGGACATAAAAAATGAGGAACATGGAAAACATAGCTATCCGTGCCTTATGATTTACAGATAAACCTCTAAGCCGAAAATATGTCACTAAAGAAGTTTGATTGAGTAGGATGGCCTCTTCCATTCTCTTTGTTAGACAACCTGTAAAGAATTAGAAAAAAAGTCTAATATAACACAGTATCTGCATCAATCATTTGGTCATTTAA", 22138125 - 1);
            var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948);

            var regions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125, 22138561, 670, 1106),
                new TranscriptRegion(TranscriptRegionType.Gap, 1, 22138562, 22138563, 669, 670),
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564, 22139232, 1, 669)
            };

            var rnaEdits = new IRnaEdit[]
            {
                new RnaEdit(905, 905, "T"),
                new RnaEdit(796, 796, "C"),
                new RnaEdit(679, 679, "A"),
                new RnaEdit(670, 671, "")
            };

            const byte startExonPhase = 0;
            var        codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, true, rnaEdits);

            var expectedCodingSeq = "ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA";

            Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart - 1, codingRegion.Length));
        }
Ejemplo n.º 5
0
        public void Assign_WhenIntervalsNull_ReturnNull()
        {
            var sequence = new SimpleSequence("AAA");
            var codons   = Codons.GetCodons("A", "G", -1, -1, -1, -1, sequence);

            Assert.Equal("", codons.Reference);
            Assert.Equal("", codons.Alternate);
        }
Ejemplo n.º 6
0
        private static IEnumerable <ISupplementaryDataItem> GetConflictingGnomadItems()
        {
            var sequence         = new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + "AAAGAAAGAAAG", 17467787 - 1 - VariantUtils.MaxUpstreamLength);
            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);

            var gnomadReader = new GnomadReader(new StreamReader(GetChr22_17467787_17467799_genome()), null, sequenceProvider);

            return(gnomadReader.GetCombinedItems());
        }
Ejemplo n.º 7
0
        public void GetItems_test()
        {
            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + "T" + new string('G', 10329 - 10285) + "AC", 10284 - VariantUtils.MaxUpstreamLength);

            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);

            var reader = new DbSnpReader(GetStream(), sequenceProvider);

            var items = reader.GetItems().ToList();

            Assert.Equal(3, items.Count);
            Assert.Equal("\"rs866375379\"", items[0].GetJsonString());
        }
Ejemplo n.º 8
0
        private ISequenceProvider GetSequenceProvider()
        {
            var sequence = new SimpleSequence(new string('A', 99) + "TAGTCGGTTAA" + new string('A', 89) + "GCCCAT");

            //return seqProvider.Object;
            var refNameToChrom = new Dictionary <string, IChromosome>
            {
                { "1", _chrom1 },
                { "2", _chrom2 }
            };

            return(new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, refNameToChrom));
        }
Ejemplo n.º 9
0
        public void SerializeJson_NominalUsage()
        {
            var variant     = new Variant(ChromosomeUtilities.Chr1, 1263141, 1263143, "TAG", "", VariantType.deletion, "1:1263141:1263143", false, false, false, null, AnnotationBehavior.SmallVariants, false);
            var refSequence = new SimpleSequence(HgvsProteinNomenclatureTests.Enst00000343938GenomicSequence, 1260147 - 1);
            var transcript  = HgvsProteinNomenclatureTests.GetMockedTranscriptOnForwardStrand();

            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));
            var sb = StringBuilderCache.Acquire();

            annotatedTranscript.SerializeJson(sb);
            var jsonString = StringBuilderCache.GetStringAndRelease(sb);

            Assert.Contains("ENST00000343938.4:p.(Ter215GlyextTer43)", jsonString);
        }
Ejemplo n.º 10
0
        public void GetItems_test()
        {
            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + "TGTGTTGTTATTCTGTGTGCAT", 10114 - VariantUtils.MaxUpstreamLength);

            var refNameToChrom = new Dictionary <string, IChromosome> {
                { "1", Chrom1 }
            };

            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, refNameToChrom);

            var gnomadReader = new GnomadReader(new StreamReader(GetGnomadStream()), sequenceProvider);

            var items = gnomadReader.GetItems().ToList();

            Assert.Equal(2, items.Count);
            Assert.Equal("\"coverage\":218,\"failedFilter\":true,\"allAf\":0,\"allAn\":8734,\"allAc\":0,\"allHc\":0,\"afrAf\":0,\"afrAn\":2168,\"afrAc\":0,\"afrHc\":0,\"amrAf\":0,\"amrAn\":324,\"amrAc\":0,\"amrHc\":0,\"easAf\":0,\"easAn\":438,\"easAc\":0,\"easHc\":0,\"finAf\":0,\"finAn\":1296,\"finAc\":0,\"finHc\":0,\"nfeAf\":0,\"nfeAn\":4054,\"nfeAc\":0,\"nfeHc\":0,\"asjAf\":0,\"asjAn\":100,\"asjAc\":0,\"asjHc\":0,\"othAf\":0,\"othAn\":354,\"othAc\":0,\"othHc\":0", items[0].GetJsonString());
        }
Ejemplo n.º 11
0
        public void RnaEdits_snv_forward_no_utr()
        {
            //NR_002754.2
            var genomicSeq   = new SimpleSequence("actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgtttattagcagggctt", 11968210);
            var codingRegion = new CodingRegion(11968211, 11968329, 1, 119, 119);

            var regions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 11968211, 11968329, 1, 119)
            };

            var        rnaEdits       = new IRnaEdit[] { new RnaEdit(107, 107, "t") };
            const byte startExonPhase = 0;

            var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits);

            Assert.Equal("actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgttttttagcagggctt", codingSequence.GetCdnaSequence());
        }
Ejemplo n.º 12
0
        public void LeftShiftingItems()
        {
            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + "GCGCGC", 157100394 - 1 - VariantUtils.MaxUpstreamLength);

            var refNameToChrom = new Dictionary <string, IChromosome> {
                { "6", new Chromosome("chr6", "6", 6) }
            };

            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, refNameToChrom);

            var gnomadReader = new GnomadReader(new StreamReader(GetShiftingItemsStream()), sequenceProvider);

            var items = gnomadReader.GetItems().ToList();

            Assert.Equal(3, items.Count);
            Assert.Equal(157100397, items[0].Position);
            Assert.Equal(157100397, items[1].Position);
            Assert.Equal(157100397, items[2].Position);
        }
Ejemplo n.º 13
0
        public void RnaEdits_snv_forward_with_utr()
        {
            //NM_001144032.2 chr1:148644011-148644795
            var genomicSeq = new SimpleSequence("ACTATAAAGACAGTGAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGTGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCACGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTTTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATGGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA", 148644011 - 1);

            var codingRegion = new CodingRegion(148644086, 148644580, 76, 570, 495);

            var regions = new ITranscriptRegion[]
            {
                new TranscriptRegion(TranscriptRegionType.Exon, 1, 148644011, 148644795, 1, 785)
            };

            var rnaEdits = new IRnaEdit[]
            {
                new RnaEdit(420, 420, "C"),
                new RnaEdit(500, 500, "T"),
                new RnaEdit(737, 737, "T")
            };
            const byte startExonPhase = 0;

            var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits);

            Assert.Equal("ATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA", codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart - 1, codingRegion.Length));
        }
Ejemplo n.º 14
0
        private void DbsnpGaTsvWriter_write_sa_item()
        {
            var chromosome = new Chromosome("chr1", "1", 0);
            var chromDict  = new Dictionary <string, IChromosome>
            {
                { "chr1", chromosome },
                { "1", chromosome }
            };

            var randomDbsnpPath  = Path.GetTempPath();
            var sequenceProvider = new Mock <ISequenceProvider>();
            var simpleSequence   = new SimpleSequence("ATGCGGT", 99);

            sequenceProvider.SetupGet(x => x.Sequence).Returns(simpleSequence);
            sequenceProvider.Setup(x => x.RefNameToChromosome).Returns(chromDict);
            var dataVersion        = new DataSourceVersion("dbsnp", "77", 123456);
            var dbsnpWriter        = new SaTsvWriter(randomDbsnpPath, dataVersion, "GRCh37", 10, "dbsnp", "dbsnp", true, sequenceProvider.Object);
            var globalAlleleWriter = new SaTsvWriter(randomDbsnpPath, dataVersion, "GRCh37", 10, "globalAllele", "GMAF", true, sequenceProvider.Object);

            using (var dbsnpGaTsvWriter = new DbsnpGaTsvWriter(dbsnpWriter, globalAlleleWriter))
            {
                var dbSnpItemsPos100 = new List <SupplementaryDataItem>
                {
                    new DbSnpItem(chromosome, 100, 123456, "A", 0.2, "G", 0.4),
                    new DbSnpItem(chromosome, 100, 123458, "A", 0.2, "T", 0.4)
                };
                var dbSnpItemsPos103 = new List <SupplementaryDataItem>
                {
                    new DbSnpItem(chromosome, 103, 134567, "C", 0.5, "A", 0.5)
                };

                var dbSnpItemsPos104 = new List <SupplementaryDataItem>
                {
                    new DbSnpItem(chromosome, 104, 134590, "G", double.MinValue, "A", 0.75)
                };
                var dbSnpItemsPos106 = new List <SupplementaryDataItem>
                {
                    new DbSnpItem(chromosome, 106, 134257, "T", 0.3, "G", 0.45),
                    new DbSnpItem(chromosome, 106, 126753, "T", 0.3, "A", 0.25)
                };

                dbsnpGaTsvWriter.WritePosition(dbSnpItemsPos100);
                dbsnpGaTsvWriter.WritePosition(dbSnpItemsPos103);
                dbsnpGaTsvWriter.WritePosition(dbSnpItemsPos104);
                dbsnpGaTsvWriter.WritePosition(dbSnpItemsPos106);
            }

            var dbsnpFile        = Path.Combine(randomDbsnpPath, "dbsnp_77.tsv.gz");
            var globalAlleleFile = Path.Combine(randomDbsnpPath, "globalAllele_77.tsv.gz");
            var tsvReader        = new ParallelSaTsvReader(dbsnpFile);

            using (var tsvEnumerator = tsvReader.GetItems("1").GetEnumerator())
            {
                Assert.True(tsvEnumerator.MoveNext());
                Assert.Equal("\"ids\":[\"rs123456\"]", tsvEnumerator.Current.JsonStrings[0]);
                Assert.True(tsvEnumerator.MoveNext());
                Assert.Equal("\"ids\":[\"rs123458\"]", tsvEnumerator.Current.JsonStrings[0]);
            }

            var globalAlleleReader     = new ParallelSaTsvReader(globalAlleleFile);
            var globalAlleleEnumerator = globalAlleleReader.GetItems("1").GetEnumerator();

            Assert.True(globalAlleleEnumerator.MoveNext());
            Assert.Equal(100, globalAlleleEnumerator.Current.Position);
            Assert.Equal("\"globalMinorAllele\":\"T\",\"globalMinorAlleleFrequency\":0.4", globalAlleleEnumerator.Current.JsonStrings[0]);
            Assert.True(globalAlleleEnumerator.MoveNext());
            Assert.Equal(103, globalAlleleEnumerator.Current.Position);
            Assert.Equal("\"globalMinorAllele\":\"A\",\"globalMinorAlleleFrequency\":0.5", globalAlleleEnumerator.Current.JsonStrings[0]);

            Assert.True(globalAlleleEnumerator.MoveNext());
            Assert.Equal(104, globalAlleleEnumerator.Current.Position);
            Assert.Equal("", globalAlleleEnumerator.Current.JsonStrings[0]);

            Assert.True(globalAlleleEnumerator.MoveNext());
            Assert.Equal(106, globalAlleleEnumerator.Current.Position);
            Assert.Equal("\"globalMinorAllele\":\"T\",\"globalMinorAlleleFrequency\":0.3", globalAlleleEnumerator.Current.JsonStrings[0]);

            globalAlleleEnumerator.Dispose();
            File.Delete(dbsnpFile);
            File.Delete(globalAlleleFile);
        }
Ejemplo n.º 15
0
        private static ISequenceProvider GetSequenceProvider()
        {
            var sequence = new SimpleSequence(new string('A', 99) + "TAGTCGGTTAA" + new string('A', 89) + "GCCCAT");

            return(new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome));
        }