[InlineData("C", ".", "DP:AU:CU:GU:TU", "75:0,0:72,77:0,0:0,2")] // ref minor (AC) public void VariantFrequency_ReturnNull(string refAllele, string altAllele, string formatCol, string sampleCol) { var vcfLine = $"chr1\t5592503\t.\t{refAllele}\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.Null(sample.VariantFrequencies); }
public void EmptySample() { const string vcfLine = "chr7 127717248 MantaINV:267944:0:1:2:0:0 T <INV> . PASS END=140789466;SVTYPE=INV;SVLEN=13072218;INV5 PR:SR ."; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.True(sample.IsEmpty); }
public void DeNovoQuality() { const string vcfLine = "chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:DQ\t0/1:20"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.Equal(20, sample.DeNovoQuality); }
public void PiscesTotalDepth() { const string vcfLine = "chr1\t115251293\t.\tGA\tG\t100\tSB;LowVariantFreq\tDP=7882\tGT:GQ:AD:VF:NL:SB:GQX\t0/1:100:7588,294:0:20:-100.0000:100"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns, 7882); var samples = extractor.ExtractSamples(); var sample = samples[0]; var observedTotalDepth = sample.TotalDepth; const int expectedTotalDepth = 7882; Assert.Equal(expectedTotalDepth, observedTotalDepth); }
public void TotalDepth(string formatCol, string sampleCol, int?expectedTotalDepth) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedTotalDepth = sample?.TotalDepth; Assert.Equal(expectedTotalDepth, observedTotalDepth); }
public void Genotype(string sampleCol, string expectedGenotype) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:GQ:GQX:DP:DPF:AD\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedGenotype = sample?.Genotype; Assert.Equal(expectedGenotype, observedGenotype); }
public void FailedFilter(string sampleCol, bool?expectedFailedFilter) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:GQ:GQX:DP:DPF:FT\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedFailedFilter = sample?.FailedFilter; Assert.Equal(expectedFailedFilter, observedFailedFilter); }
public void VariantFrequencyMultiAllelic(string formatCol, string sampleCol, string expectedVariantFrequency) { string vcfLine = $"chr1\t5592503\t.\tC\tT,A\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(1, samples.Count); var sample = samples[0]; var observedVariantFrequency = sample?.VariantFrequency; Assert.Equal(expectedVariantFrequency, observedVariantFrequency); }
public void AlleleDepthsMultiAllelic(string formatCol, string sampleCol, int[] expectedAlleleDepths) { string vcfLine = $"chr1\t5592503\t.\tC\tT,A\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedAlleleDepths = sample?.AlleleDepths; Assert.Equal(expectedAlleleDepths, observedAlleleDepths); }
public void GenotypeQuality(string formatCol, string sampleCol, string expectedGenotypeQuality) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(1, samples.Count); var sample = samples[0]; var observedGenotypeQuality = sample?.GenotypeQuality; Assert.Equal(expectedGenotypeQuality, observedGenotypeQuality); }
public void ArtifactAdjustedQualityScore_LikelihoodRatioQualityScore() { const string vcfLine = "chr1\t2488109\t.\tG\tA\t5\tLowSupport\tDP=339\tGT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ\t0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.001"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.NotNull(sample.ArtifactAdjustedQualityScore); Assert.NotNull(sample.LikelihoodRatioQualityScore); Assert.Equal("3.366", sample.ArtifactAdjustedQualityScore.Value.ToString("0.###")); Assert.Equal("0.001", sample.LikelihoodRatioQualityScore.Value.ToString("0.###")); }
[InlineData("T", "GT:AD:DP:VF", "0/1:317,200:517:0.38685", "0.3869")] // VF (rounding issue) public void VariantFrequency_Nominal(string altAllele, string formatCol, string sampleCol, string expectedResults) { string vcfLine = $"chr1\t5592503\t.\tC\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.NotNull(sample?.VariantFrequencies); var observedResults = string.Join(',', sample.VariantFrequencies.Select(x => x.ToString("0.####"))); Assert.Equal(expectedResults, observedResults); }
public void EmptySamples() { // for NIR-1306 const string vcfLine = "chrX 2735147 . G A 38.25 VQSRTrancheSNP99.90to100.00 AC=3;AF=0.500;AN=6;BaseQRankSum=-0.602;DP=56;Dels=0.00;FS=30.019;HaplotypeScore=7.7259;MLEAC=3;MLEAF=0.500;MQ=41.18;MQ0=0;MQRankSum=0.098;QD=1.06;ReadPosRankSum=0.266;SB=-8.681e-03;VQSLOD=-6.0901;culprit=QD GT:AD:DP:GQ:PL 0:7,0:7:3:0,3,39 ./. 0/1:14,3:17:35:35,0,35 1/1:9,10:19:3:41,3,0"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(4, samples.Length); var sample = samples[1]; var observedGenotype = sample.Genotype; var observedVariantFrequency = sample.VariantFrequencies; Assert.Equal("./.", observedGenotype); Assert.Null(observedVariantFrequency); }
public void MajorChromosomeCopyTest() { // data from NIR-1095 // for NIR-1218 const string vcfLine = "1 9314202 Canvas:GAIN:1:9314202:9404148 N <CNV> 36 PASS SVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621 RC:BC:CN:MCC . 151:108:6:4"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Length); var sample = samples[1]; var observedMcc = sample?.IsLossOfHeterozygosity; Assert.False(observedMcc); }
public void Smn1() { const string vcfLine = "5\t70247773\t.\tC\tT\t366\tPASS\tSNVHPOL=4;MQ=60\tGT:DST:DID:DCS:SCH:PCN:PLG:MAD:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:-:70:Orphanet:-:3,3:6606,6607:41,49:368:364:81:11:39,42:21,20:18,22:-41.0:PASS:370,0,365"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.Equal(new[] { "-" }, sample?.DiseaseAffectedStatus); Assert.Equal(new[] { "70" }, sample?.DiseaseIds); Assert.Equal(new[] { "Orphanet" }, sample?.DiseaseClassificationSources); Assert.Equal("-", sample?.SilentCarrierHaplotype); Assert.Equal(new[] { 3, 3 }, sample?.ParalogousGeneCopyNumbers); Assert.Equal(new[] { 6606, 6607 }, sample?.ParalogousEntrezGeneIds); Assert.Equal(new[] { 41, 49 }, sample?.MpileupAlleleDepths); }
public void SplitReadCounts() { const string vcfLine = "chr7 127717248 MantaINV:267944:0:1:2:0:0 T <INV> . PASS END=140789466;SVTYPE=INV;SVLEN=13072218;INV5 PR:SR 78,0:65,0 157,42:252,63"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Length); var sample1 = samples[0]; Assert.Equal(new[] { 78, 0 }, sample1.PairEndReadCounts); Assert.Equal(new[] { 65, 0 }, sample1.SplitReadCounts); var sample2 = samples[1]; Assert.Equal(new[] { 157, 42 }, sample2.PairEndReadCounts); Assert.Equal(new[] { 252, 63 }, sample2.SplitReadCounts); }
public void SplitReadCounts() { var vcfLine = "chr7 127717248 MantaINV:267944:0:1:2:0:0 T <INV> . PASS END=140789466;SVTYPE=INV;SVLEN=13072218;INV5 PR:SR 78,0:65,0 157,42:252,63"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Count); var sample1 = samples[0]; Assert.Equal(new[] { "78", "0" }, sample1.PairEndReadCounts); Assert.Equal(new[] { "65", "0" }, sample1.SplitReadCounts); var sample2 = samples[1]; Assert.Equal(new[] { "157", "42" }, sample2.PairEndReadCounts); Assert.Equal(new[] { "252", "63" }, sample2.SplitReadCounts); }
public void GatkGenomeVcfSample() { // For NIR-1320 // the original AD= 15,11,0 var vcfLine = "1 30923 rs140337953 G T,<NON_REF> 264.77 PASS BaseQRankSum=0.259;DB;DP=26;MLEAC=1,0;MLEAF=0.500,0.00;MQ=43.87;MQ0=0;MQRankSum=-0.830;ReadPosRankSum=-0.156 GT:AD:GQ:PL:SB 0/1:15,11,20:99:293,0,330,337,363,700:8,7,3,8"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(true); Assert.Equal(1, samples.Count); var sample = samples[0]; var observedVariantFrequency = sample?.VariantFrequency; var expectedVariantFrequency = "0.4231"; Assert.Equal(expectedVariantFrequency, observedVariantFrequency); var expectedAlleleDepths = new[] { "15", "11" }; var observedAlleleDepths = sample?.AlleleDepths; Assert.Equal(expectedAlleleDepths, observedAlleleDepths); }
public void OutputCanvasCnvRelevantField() { var vcfLine = "1 9314201 Canvas:GAIN:1:9314202:9404148 N <CNV> 36 PASS SVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621 RC:BC:CN:MCC . 151:108:6:4"; var annotatedVariant = DataUtilities.GetVariant(Resources.CacheGRCh37("ENST00000377403_chr1_Ensembl84"), null, vcfLine); Assert.NotNull(annotatedVariant); JsonUtilities.AlleleEquals(annotatedVariant, "{\"altAllele\":\"CNV\",\"refAllele\":\"N\",\"begin\":9314202,\"chromosome\":\"1\",\"end\":9404148,\"variantType\":\"copy_number_variation\",\"vid\":\"1:9314202:9404148:6\",\"overlappingGenes\":[\"H6PD\"],\"transcripts\":{\"ensembl\":[{\"transcript\":\"ENST00000377403.2\",\"bioType\":\"protein_coding\",\"exons\":\"4-5/5\",\"introns\":\"3-4/4\",\"geneId\":\"ENSG00000049239\",\"hgnc\":\"H6PD\",\"consequence\":[\"copy_number_increase\"],\"isCanonical\":true,\"proteinId\":\"ENSP00000366620.1\"}]}}"); var cols = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(cols); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Count); var sample = samples[1]; var observedCn = sample?.CopyNumber; Assert.Equal("6", observedCn); }