Example #1
0
        private static VariantAnnotationsParser GetVariantAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)
        {
            var parser = VariantAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)));

            parser.SequenceProvider = new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(parser.Assembly)));

            return(parser);
        }
Example #2
0
        public void ParseGenomeAssembly_IncorrectFormat_ThrowException()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly-hg20\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            Assert.Throws <UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));
        }
Example #3
0
        public void ParseTitle_Conflict_JsonTag()
        {
            const string text = "#title=topmed\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            Assert.Throws <UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));
        }
Example #4
0
        public void GetItems_ExtractCustomFilters_failsOnLargeText()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\tanyNumber\tcustomFilter\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\t.\tFilter\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\tnumber\tstring\n" +
                                "chr1\t12783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\t1.000\tthe good variant, the bad variant and the ugly variant\n";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                Assert.Throws <UserErrorException>(() => custParser.GetItems().ToArray());
            }
        }
Example #5
0
        public void ParseHeader_version_and_description()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#version=v4.5\t\n" +
                                "#description=Internal allele frequencies\t\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                Assert.Equal("v4.5", parser.Version);
                Assert.Equal("Internal allele frequencies", parser.DataSourceDescription);
            }
        }
Example #6
0
        public void ExtractItems_TrimmedAndLeftShifted()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var item = parser.ExtractItems("chr1\t12783\tA\tATA\t.\t20\t125568\t0.000159\ttrue\tVUS\t");
                Assert.Equal(12782, item.Position);
                Assert.Equal("", item.RefAllele);
                Assert.Equal("TA", item.AltAllele);
            }
        }
Example #7
0
        public void Extract_symbolic_alleles()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n";

            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                parser.ExtractItems("chr1\t12783\tA\t<DEL>\t24486\t20\t125568\t0.000159\ttrue\tVUS\t");
                var intervals = parser.GetCustomIntervals();
                Assert.Single(intervals);
                Assert.Equal(12784, intervals[0].Start);
                Assert.Equal(24486, intervals[0].End);
            }
        }
Example #8
0
        public void GetItems_UnsortedData_ThrowException()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\tanyNumber\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\tnumber\n" +
                                "chr1\t12783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\t1.000\n" +
                                "chr1\t3302\tC\tA\t.\t53\t8928\t0.001421\tfalse\t.\t\t3\n" +
                                "chr1\t18972\tT\tC\t.\t10\t1000\t0.01\tfalse\t.\t\t100.1234567\n" +
                                "chr1\t46993\tA\t<DEL>\t50879\t50\t250\t0.001\tfalse\tbenign\t\t3.1415926";

            using (var caParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                Assert.Throws <UserErrorException>(() => caParser.GetItems().ToArray());
            }
        }
Example #9
0
        public void GetIntervals_noALT()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tEND\tnotes\n" +
                                "#categories\t.\t.\t.\t.\n" +
                                "#descriptions\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\tstring\n" +
                                "chr16\t20000000\tT\t70000000\tLots of false positives in this region";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Empty(items);
                var intervals = custParser.GetCustomIntervals();
                Assert.Single(intervals);
                Assert.Equal("\"start\":20000000,\"end\":70000000,\"notes\":\"Lots of false positives in this region\"", intervals[0].GetJsonString());
            }
        }
Example #10
0
        public void GetItems()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n" +
                                "chr1\t14783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\n" +
                                "chr2\t10302\tC\tA\t.\t53\t8928\t0.001421\tfalse\t.\t\n" +
                                "chr2\t46993\tA\t<DEL>\t50879\t50\t250\t0.001\tfalse\tbenign\t";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Equal(2, items.Length);
                Assert.Equal("\"refAllele\":\"G\",\"altAllele\":\"A\",\"allAc\":20,\"allAn\":125568,\"allAf\":0.000159,\"failedFilter\":true,\"pathogenicity\":\"VUS\"", items[0].GetJsonString());
                Assert.Equal("\"refAllele\":\"C\",\"altAllele\":\"A\",\"allAc\":53,\"allAn\":8928,\"allAf\":0.001421", items[1].GetJsonString());
            }
        }
Example #11
0
        public void GetIntervals_start()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=allele\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tnotes\n" +
                                "#categories\t.\t.\t.\t.\t.\n" +
                                "#descriptions\t.\t.\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tstring\n" +
                                "chr21\t10510818\tT\t.\t10699435\tinterval 1\n" +
                                "chr21\t10510818\tT\t<DEL>\t10699435\tinterval 2";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Empty(items);
                var intervals = custParser.GetCustomIntervals();
                Assert.Equal(2, intervals.Count);
                Assert.Equal("\"start\":10510818,\"end\":10699435,\"notes\":\"interval 1\"", intervals[0].GetJsonString());
                Assert.Equal("\"start\":10510819,\"end\":10699435,\"notes\":\"interval 2\"", intervals[1].GetJsonString());
            }
        }
Example #12
0
        public void GetIntervals()
        {
            const string text = "#title=IcslAlleleFrequencies\n" +
                                "#assembly=GRCh38\n" +
                                "#matchVariantsBy=sv\n" +
                                "#CHROM\tPOS\tREF\tALT\tEND\tallAc\tallAn\tallAf\tfailedFilter\tpathogenicity\tnotes\n" +
                                "#categories\t.\t.\t.\t.\tAlleleCount\tAlleleNumber\tAlleleFrequency\t.\tPrediction\t.\n" +
                                "#descriptions\t.\t.\t.\t.\tALL\tALL\tALL\t.\t.\t.\n" +
                                "#type\t.\t.\t.\t.\tnumber\tnumber\tnumber\tbool\tstring\tstring\n" +
                                "chr1\t12783\tG\tA\t.\t20\t125568\t0.000159\ttrue\tVUS\t\n" +
                                "chr1\t13302\tC\tA\t.\t53\t8928\t0.001421\tfalse\t.\t\n" +
                                "chr1\t46993\tA\t<DEL>\t50879\t50\t250\t0.001\tfalse\tbenign\t";

            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))
            {
                var items = custParser.GetItems().ToArray();
                Assert.Equal(ReportFor.StructuralVariants, custParser.ReportFor);
                Assert.Equal(2, items.Length);

                var intervals = custParser.GetCustomIntervals();
                Assert.Single(intervals);
                Assert.Equal("\"start\":46993,\"end\":50879,\"allAc\":50,\"allAn\":250,\"allAf\":0.001,\"pathogenicity\":\"benign\"", intervals[0].GetJsonString());
            }
        }