コード例 #1
0
        private static VariantAnnotationsParser GetVariantAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)
        {
            var parser = VariantAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)));

            parser.SequenceProvider = new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(parser.Assembly)));

            return(parser);
        }
コード例 #2
0
        public void CheckAltAndEndColumns_NoAltAndEnd_ThrowException()
        {
            var caParser = new VariantAnnotationsParser(null, null)
            {
                Tags = new[] { "#CHROM", "POS", "REF", "Note" }
            };

            Assert.Throws <UserErrorException>(() => caParser.CheckAltAndEndColumns());
        }
コード例 #3
0
        public void CheckPosAndRefColumns_InvalidPosOrRef_ThrowException()
        {
            var caParser = new VariantAnnotationsParser(null, null)
            {
                Tags = new[] { "#CHROM", "", "REF", "ALT" }
            };

            Assert.Throws <UserErrorException>(() => caParser.CheckPosAndRefColumns());

            caParser.Tags = new[] { "#CHROM", "POS", "REFERENCE", "ALT" };
            Assert.Throws <UserErrorException>(() => caParser.CheckPosAndRefColumns());
        }
コード例 #4
0
        public void ParseGenomeAssembly_IncorrectFormat_ThrowException()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly-hg20\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            Assert.Throws <UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));
        }
コード例 #5
0
        public void ParseTitle_Conflict_JsonTag()
        {
            const string text = "#title=topmed\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            Assert.Throws <UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));
        }
コード例 #6
0
        public void ParseHeaderLines_InconsistentFields()
        {
            const string invalidHeaderLines = "#title=IcslAlleleFrequencies\n" +
                                              "#assembly=GRCh38\n" +
                                              "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\n" +
                                              "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\tMore\n" +
                                              "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\n" +
                                              "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring";

            using (var parser = new VariantAnnotationsParser(GetReadStream(invalidHeaderLines), null))
            {
                Assert.Throws <UserErrorException>(() => parser.ParseHeaderLines());
            }
        }
コード例 #7
0
        public void GetItems_ExtractCustomFilters_failsOnLargeText()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\tanyNumber\tcustomFilter\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\t.\tFilter\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\tnumber\tstring\n" +
                                "chr1\t12783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\t1.000\tthe good variant, the bad variant and the ugly variant\n";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                Assert.Throws <UserErrorException>(() => custParser.GetItems().ToArray());
            }
        }
コード例 #8
0
        public void ParseHeaderLines_matchBy_sv()
        {
            const string headerLines = "#title=IcslAlleleFrequencies\n" +
                                       "#assembly=GRCh38\n" +
                                       "#matchVariantsBy=sv\n" +
                                       "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                       "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                       "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                       "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring";


            using (var custParser = new VariantAnnotationsParser(GetReadStream(headerLines), null))
            {
                custParser.ParseHeaderLines();
                Assert.Equal(ReportFor.StructuralVariants, custParser.ReportFor);
            }
        }
コード例 #9
0
        public void ParseHeaderLines_AsExpected()
        {
            const string headerLines = "#title=IcslAlleleFrequencies \n" +
                                       "#assembly=GRCh38\t\n" +
                                       "#matchVariantsBy=allele\n" +
                                       "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tdeNovoQual\tnotes\n" +
                                       "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\tScore\t.\n" +
                                       "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\t.\n" +
                                       "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tnumber\tstring";


            using (var custParser = new VariantAnnotationsParser(GetReadStream(headerLines), null))
            {
                custParser.ParseHeaderLines();
                var expectedJsonKeys = new[]
                { "refAllele", "altAllele", "allAc", "allAn", "allAf", "failedFilter", "pathogenicity", "deNovoQual", "notes" };
                var expectedIntervalJsonKeys = new[]
                { "start", "end", "allAc", "allAn", "allAf", "failedFilter", "pathogenicity", "deNovoQual", "notes" };
                var expectedCategories = new[]
                {
                    CustomAnnotationCategories.AlleleCount, CustomAnnotationCategories.AlleleNumber,
                    CustomAnnotationCategories.AlleleFrequency, CustomAnnotationCategories.Unknown,
                    CustomAnnotationCategories.Prediction, CustomAnnotationCategories.Score,
                    CustomAnnotationCategories.Unknown
                };
                var expectedDescriptions = new[] { "ALL", "ALL", "ALL", null, null, null, null };
                var expectedTypes        = new[]
                {
                    SaJsonValueType.Number,
                    SaJsonValueType.Number,
                    SaJsonValueType.Number,
                    SaJsonValueType.Bool,
                    SaJsonValueType.String,
                    SaJsonValueType.Number,
                    SaJsonValueType.String
                };

                Assert.Equal("IcslAlleleFrequencies", custParser.JsonTag);
                Assert.Equal(GenomeAssembly.GRCh38, custParser.Assembly);
                Assert.Equal(expectedJsonKeys, custParser.JsonKeys);
                Assert.Equal(expectedIntervalJsonKeys, custParser.IntervalJsonKeys);
                Assert.Equal(expectedCategories, custParser.Categories);
                Assert.Equal(expectedDescriptions, custParser.Descriptions);
                Assert.Equal(expectedTypes, custParser.ValueTypes);
            }
        }
コード例 #10
0
        public void ExtractItems_TrimmedAndLeftShifted()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var item = parser.ExtractItems("chr1\t12783\tA\tATA\t.\t20\t125568\t0.000159\ttrue\tVUS\t");
                Assert.Equal(12782, item.Position);
                Assert.Equal("", item.RefAllele);
                Assert.Equal("TA", item.AltAllele);
            }
        }
コード例 #11
0
        public void ParseHeader_version_and_description()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#version=v4.5\t\n" +
                                "#description=Internal allele frequencies\t\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                Assert.Equal("v4.5", parser.Version);
                Assert.Equal("Internal allele frequencies", parser.DataSourceDescription);
            }
        }
コード例 #12
0
        public void Extract_symbolic_alleles()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                parser.ExtractItems("chr1\t12783\tA\t<DEL>\t24486\t20\t125568\t0.000159\ttrue\tVUS\t");
                var intervals = parser.GetCustomIntervals();
                Assert.Single(intervals);
                Assert.Equal(12784, intervals[0].Start);
                Assert.Equal(24486, intervals[0].End);
            }
        }
コード例 #13
0
        public void GetItems_UnsortedData_ThrowException()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\tanyNumber\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\tnumber\n" +
                                "chr1\t12783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\t1.000\n" +
                                "chr1\t3302\tC\tA\t.\t53\t8928\t0.001421\tfalse\t.\t\t3\n" +
                                "chr1\t18972\tT\tC\t.\t10\t1000\t0.01\tfalse\t.\t\t100.1234567\n" +
                                "chr1\t46993\tA\t<DEL>\t50879\t50\t250\t0.001\tfalse\tbenign\t\t3.1415926";

            using (var caParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                Assert.Throws <UserErrorException>(() => caParser.GetItems().ToArray());
            }
        }
コード例 #14
0
        public void GetIntervals_noALT()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tEND\tnotes\n" +
                                "#categories\t.\t.\t.\t.\n" +
                                "#descriptions\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\tstring\n" +
                                "chr16\t20000000\tT\t70000000\tLots of false positives in this region";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Empty(items);
                var intervals = custParser.GetCustomIntervals();
                Assert.Single(intervals);
                Assert.Equal("\"start\":20000000,\"end\":70000000,\"notes\":\"Lots of false positives in this region\"", intervals[0].GetJsonString());
            }
        }
コード例 #15
0
        public void GetItems()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n" +
                                "chr1\t14783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\n" +
                                "chr2\t10302\tC\tA\t.\t53\t8928\t0.001421\tfalse\t.\t\n" +
                                "chr2\t46993\tA\t<DEL>\t50879\t50\t250\t0.001\tfalse\tbenign\t";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Equal(2, items.Length);
                Assert.Equal("\"refAllele\":\"G\",\"altAllele\":\"A\",\"allAc\":20,\"allAn\":125568,\"allAf\":0.000159,\"failedFilter\":true,\"pathogenicity\":\"VUS\"", items[0].GetJsonString());
                Assert.Equal("\"refAllele\":\"C\",\"altAllele\":\"A\",\"allAc\":53,\"allAn\":8928,\"allAf\":0.001421", items[1].GetJsonString());
            }
        }
コード例 #16
0
        public void GetIntervals_start()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tnotes\n" +
                                "#categories\t.\t.\t.\t.\t.\n" +
                                "#descriptions\t.\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tstring\n" +
                                "chr21\t10510818\tT\t.\t10699435\tinterval 1\n" +
                                "chr21\t10510818\tT\t<DEL>\t10699435\tinterval 2";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Empty(items);
                var intervals = custParser.GetCustomIntervals();
                Assert.Equal(2, intervals.Count);
                Assert.Equal("\"start\":10510818,\"end\":10699435,\"notes\":\"interval 1\"", intervals[0].GetJsonString());
                Assert.Equal("\"start\":10510819,\"end\":10699435,\"notes\":\"interval 2\"", intervals[1].GetJsonString());
            }
        }
コード例 #17
0
        public void GetIntervals()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=sv\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n" +
                                "chr1\t12783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\n" +
                                "chr1\t13302\tC\tA\t.\t53\t8928\t0.001421\tfalse\t.\t\n" +
                                "chr1\t46993\tA\t<DEL>\t50879\t50\t250\t0.001\tfalse\tbenign\t";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Equal(ReportFor.StructuralVariants, custParser.ReportFor);
                Assert.Equal(2, items.Length);

                var intervals = custParser.GetCustomIntervals();
                Assert.Single(intervals);
                Assert.Equal("\"start\":46993,\"end\":50879,\"allAc\":50,\"allAn\":250,\"allAf\":0.001,\"pathogenicity\":\"benign\"", intervals[0].GetJsonString());
            }
        }
コード例 #18
0
 public void IsValidNucleotideSequence_IsValidSequence_Pass()
 {
     Assert.True(VariantAnnotationsParser.IsValidAltAllele("actgnACTGN"));
     Assert.True(VariantAnnotationsParser.IsValidAltAllele("AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["));
     Assert.False(VariantAnnotationsParser.IsValidAltAllele("AC-GT"));
 }
コード例 #19
0
 public void IsValidNucleotideSequence_IsValidSequence_Pass()
 {
     Assert.True(VariantAnnotationsParser.IsValidNucleotideSequence("actgnACTGN"));
     Assert.False(VariantAnnotationsParser.IsValidNucleotideSequence("AC-GT"));
 }