public void ExtractSample_EmptySampleColumn_ReturnEmptySample() { var formatIndices = new FormatIndices(); var sample = SampleFieldExtractor.ExtractSample(null, formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.True(sample.IsEmpty); }
public void ExtractSample_EmptySampleColumn_ReturnEmptySample() { var formatIndices = new FormatIndices(); var sample = SampleFieldExtractor.ExtractSample(null, formatIndices, 1); Assert.True(sample.IsEmpty); }
public void ExtractSample_DotInSampleColumn_ReturnEmptySample() { var formatIndices = new FormatIndices(); var sample = SampleFieldExtractor.ExtractSample(".", formatIndices, 1, false); Assert.True(sample.IsEmpty); }
public void ExtractSample_DragenCNV_MCN_LOH(string formatField, string sampleField) { var formatIndices = new FormatIndices(); formatIndices.Set(formatField); var sample = SampleFieldExtractor.ExtractSample(sampleField, formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.True(sample.IsLossOfHeterozygosity); }
public void ExtractSample_ExpansionHunter() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC"); var sample = SampleFieldExtractor.ExtractSample("1/1:SPANNING/SPANNING:15/15:15-15/15-15:22/22:23/23:0/0:38.270270", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("1/1", sample.Genotype); Assert.Equal(new[] { 15, 15 }, sample.RepeatUnitCounts); }
public void ExtractSample_DragenCNV_AsExpected() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:CN:MCN"); var sample = SampleFieldExtractor.ExtractSample("0|1:3:1", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("0|1", sample.Genotype); Assert.Equal(3, sample.CopyNumber); Assert.Equal(1, sample.MinorHaplotypeCopyNumber); }
public void ExtractSample_DragenSomatic_AsExpected() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS"); var sample = SampleFieldExtractor.ExtractSample("0|1:3.96:33,8:0.195:13,6:20,2:41:17,16,4,4:13,20,4,4:534234", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("0|1", sample.Genotype); Assert.Equal(3.96, sample.SomaticQuality); Assert.Equal(new[] { 33, 8 }, sample.AlleleDepths); Assert.Equal(41, sample.TotalDepth); Assert.Equal(new[] { 8 / 41.0 }, sample.VariantFrequencies); }
[InlineData("C", ".", "DP:AU:CU:GU:TU", "75:0,0:72,77:0,0:0,2")] // ref minor (AC) public void VariantFrequency_ReturnNull(string refAllele, string altAllele, string formatCol, string sampleCol) { var vcfLine = $"chr1\t5592503\t.\t{refAllele}\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.Null(sample.VariantFrequencies); }
public void EmptySample() { const string vcfLine = "chr7 127717248 MantaINV:267944:0:1:2:0:0 T <INV> . PASS END=140789466;SVTYPE=INV;SVLEN=13072218;INV5 PR:SR ."; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.True(sample.IsEmpty); }
public void DeNovoQuality() { const string vcfLine = "chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:DQ\t0/1:20"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.Equal(20, sample.DeNovoQuality); }
public void ExtractSample_PEPE() { var formatIndices = new FormatIndices(); formatIndices.Set("GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ"); var sample = SampleFieldExtractor.ExtractSample("0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.000", formatIndices, GetSimplePositionUsingAlleleNum(1), null); Assert.Equal("0/1", sample.Genotype); Assert.Equal(5, sample.GenotypeQuality); Assert.Equal(new[] { 338, 1 }, sample.AlleleDepths); Assert.Equal(339, sample.TotalDepth); Assert.Equal(new[] { 0.00295 }, sample.VariantFrequencies); Assert.Equal(3.366f, sample.ArtifactAdjustedQualityScore); Assert.Equal(0.000f, sample.LikelihoodRatioQualityScore); }
public void FailedFilter(string sampleCol, bool?expectedFailedFilter) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:GQ:GQX:DP:DPF:FT\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedFailedFilter = sample?.FailedFilter; Assert.Equal(expectedFailedFilter, observedFailedFilter); }
public void AlleleDepthsMultiAllelic(string formatCol, string sampleCol, int[] expectedAlleleDepths) { string vcfLine = $"chr1\t5592503\t.\tC\tT,A\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedAlleleDepths = sample?.AlleleDepths; Assert.Equal(expectedAlleleDepths, observedAlleleDepths); }
public void VariantFrequencyMultiAllelic(string formatCol, string sampleCol, string expectedVariantFrequency) { string vcfLine = $"chr1\t5592503\t.\tC\tT,A\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(1, samples.Count); var sample = samples[0]; var observedVariantFrequency = sample?.VariantFrequency; Assert.Equal(expectedVariantFrequency, observedVariantFrequency); }
public void PiscesTotalDepth() { const string vcfLine = "chr1\t115251293\t.\tGA\tG\t100\tSB;LowVariantFreq\tDP=7882\tGT:GQ:AD:VF:NL:SB:GQX\t0/1:100:7588,294:0:20:-100.0000:100"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns, 7882); var samples = extractor.ExtractSamples(); var sample = samples[0]; var observedTotalDepth = sample.TotalDepth; const int expectedTotalDepth = 7882; Assert.Equal(expectedTotalDepth, observedTotalDepth); }
public void TotalDepth(string formatCol, string sampleCol, int?expectedTotalDepth) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedTotalDepth = sample?.TotalDepth; Assert.Equal(expectedTotalDepth, observedTotalDepth); }
public void Genotype(string sampleCol, string expectedGenotype) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:GQ:GQX:DP:DPF:AD\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; var observedGenotype = sample?.Genotype; Assert.Equal(expectedGenotype, observedGenotype); }
public void GenotypeQuality(string formatCol, string sampleCol, string expectedGenotypeQuality) { string vcfLine = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(1, samples.Count); var sample = samples[0]; var observedGenotypeQuality = sample?.GenotypeQuality; Assert.Equal(expectedGenotypeQuality, observedGenotypeQuality); }
public void ArtifactAdjustedQualityScore_LikelihoodRatioQualityScore() { const string vcfLine = "chr1\t2488109\t.\tG\tA\t5\tLowSupport\tDP=339\tGT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ\t0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.001"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.NotNull(sample.ArtifactAdjustedQualityScore); Assert.NotNull(sample.LikelihoodRatioQualityScore); Assert.Equal("3.366", sample.ArtifactAdjustedQualityScore.Value.ToString("0.###")); Assert.Equal("0.001", sample.LikelihoodRatioQualityScore.Value.ToString("0.###")); }
[InlineData("T", "GT:AD:DP:VF", "0/1:317,200:517:0.38685", "0.3869")] // VF (rounding issue) public void VariantFrequency_Nominal(string altAllele, string formatCol, string sampleCol, string expectedResults) { string vcfLine = $"chr1\t5592503\t.\tC\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.NotNull(sample?.VariantFrequencies); var observedResults = string.Join(',', sample.VariantFrequencies.Select(x => x.ToString("0.####"))); Assert.Equal(expectedResults, observedResults); }
public void ExtractSample_MitoHeteroplasmy_AsExpected() { var provider = new MitoHeteroplasmyProvider(); provider.Add(1, "C", new[] { 0.123, 0.200, 0.301 }, new[] { 1, 2, 4 }); provider.Add(1, "G", new[] { 0.101, 0.201 }, new[] { 1, 2 }); var simplePosition = new SimplePosition(ChromosomeUtilities.ChrM, 1, "A", new[] { "C", "T" }); var formatIndices = new FormatIndices(); formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS"); var sample = SampleFieldExtractor.ExtractSample("1|2:3.96:0,15,85:0.195:13,6:20,2:100:17,16,4,4:13,20,4,4:534234", formatIndices, simplePosition, provider); Assert.Equal(new[] { 15 / 100.0, 85 / 100.0 }, sample.VariantFrequencies); Assert.Equal(new[] { "14.29", "null" }, sample.HeteroplasmyPercentile); }
public void EmptySamples() { // for NIR-1306 const string vcfLine = "chrX 2735147 . G A 38.25 VQSRTrancheSNP99.90to100.00 AC=3;AF=0.500;AN=6;BaseQRankSum=-0.602;DP=56;Dels=0.00;FS=30.019;HaplotypeScore=7.7259;MLEAC=3;MLEAF=0.500;MQ=41.18;MQ0=0;MQRankSum=0.098;QD=1.06;ReadPosRankSum=0.266;SB=-8.681e-03;VQSLOD=-6.0901;culprit=QD GT:AD:DP:GQ:PL 0:7,0:7:3:0,3,39 ./. 0/1:14,3:17:35:35,0,35 1/1:9,10:19:3:41,3,0"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(4, samples.Length); var sample = samples[1]; var observedGenotype = sample.Genotype; var observedVariantFrequency = sample.VariantFrequencies; Assert.Equal("./.", observedGenotype); Assert.Null(observedVariantFrequency); }
public void MajorChromosomeCopyTest() { // data from NIR-1095 // for NIR-1218 const string vcfLine = "1 9314202 Canvas:GAIN:1:9314202:9404148 N <CNV> 36 PASS SVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621 RC:BC:CN:MCC . 151:108:6:4"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Length); var sample = samples[1]; var observedMcc = sample?.IsLossOfHeterozygosity; Assert.False(observedMcc); }
public void Smn1() { const string vcfLine = "5\t70247773\t.\tC\tT\t366\tPASS\tSNVHPOL=4;MQ=60\tGT:DST:DID:DCS:SCH:PCN:PLG:MAD:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:-:70:Orphanet:-:3,3:6606,6607:41,49:368:364:81:11:39,42:21,20:18,22:-41.0:PASS:370,0,365"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Single(samples); var sample = samples[0]; Assert.Equal(new[] { "-" }, sample?.DiseaseAffectedStatus); Assert.Equal(new[] { "70" }, sample?.DiseaseIds); Assert.Equal(new[] { "Orphanet" }, sample?.DiseaseClassificationSources); Assert.Equal("-", sample?.SilentCarrierHaplotype); Assert.Equal(new[] { 3, 3 }, sample?.ParalogousGeneCopyNumbers); Assert.Equal(new[] { 6606, 6607 }, sample?.ParalogousEntrezGeneIds); Assert.Equal(new[] { 41, 49 }, sample?.MpileupAlleleDepths); }
public void SplitReadCounts() { const string vcfLine = "chr7 127717248 MantaINV:267944:0:1:2:0:0 T <INV> . PASS END=140789466;SVTYPE=INV;SVLEN=13072218;INV5 PR:SR 78,0:65,0 157,42:252,63"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Length); var sample1 = samples[0]; Assert.Equal(new[] { 78, 0 }, sample1.PairEndReadCounts); Assert.Equal(new[] { 65, 0 }, sample1.SplitReadCounts); var sample2 = samples[1]; Assert.Equal(new[] { 157, 42 }, sample2.PairEndReadCounts); Assert.Equal(new[] { 252, 63 }, sample2.SplitReadCounts); }
public void SplitReadCounts() { var vcfLine = "chr7 127717248 MantaINV:267944:0:1:2:0:0 T <INV> . PASS END=140789466;SVTYPE=INV;SVLEN=13072218;INV5 PR:SR 78,0:65,0 157,42:252,63"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Count); var sample1 = samples[0]; Assert.Equal(new[] { "78", "0" }, sample1.PairEndReadCounts); Assert.Equal(new[] { "65", "0" }, sample1.SplitReadCounts); var sample2 = samples[1]; Assert.Equal(new[] { "157", "42" }, sample2.PairEndReadCounts); Assert.Equal(new[] { "252", "63" }, sample2.SplitReadCounts); }
public static Position CreatFromSimplePosition(ISimplePosition simplePosition, VariantFactory variantFactory) { if (simplePosition == null) { return(null); } var vcfFields = simplePosition.VcfFields; var infoData = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]); var id = vcfFields[VcfCommon.IdIndex]; int end = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length); // re-calculate the end by checking INFO field string[] altAlleles = vcfFields[VcfCommon.AltIndex].Split(',').ToArray(); double? quality = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse); string[] filters = vcfFields[VcfCommon.FilterIndex].Split(';'); var samples = new SampleFieldExtractor(vcfFields, infoData.Depth).ExtractSamples(); var variants = variantFactory.CreateVariants(simplePosition.Chromosome, id, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed, simplePosition.IsRecomposed); return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed)); }
public void GatkGenomeVcfSample() { // For NIR-1320 // the original AD= 15,11,0 var vcfLine = "1 30923 rs140337953 G T,<NON_REF> 264.77 PASS BaseQRankSum=0.259;DB;DP=26;MLEAC=1,0;MLEAF=0.500,0.00;MQ=43.87;MQ0=0;MQRankSum=-0.830;ReadPosRankSum=-0.156 GT:AD:GQ:PL:SB 0/1:15,11,20:99:293,0,330,337,363,700:8,7,3,8"; var vcfColumns = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(vcfColumns); var samples = extractor.ExtractSamples(true); Assert.Equal(1, samples.Count); var sample = samples[0]; var observedVariantFrequency = sample?.VariantFrequency; var expectedVariantFrequency = "0.4231"; Assert.Equal(expectedVariantFrequency, observedVariantFrequency); var expectedAlleleDepths = new[] { "15", "11" }; var observedAlleleDepths = sample?.AlleleDepths; Assert.Equal(expectedAlleleDepths, observedAlleleDepths); }
public void OutputCanvasCnvRelevantField() { var vcfLine = "1 9314201 Canvas:GAIN:1:9314202:9404148 N <CNV> 36 PASS SVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621 RC:BC:CN:MCC . 151:108:6:4"; var annotatedVariant = DataUtilities.GetVariant(Resources.CacheGRCh37("ENST00000377403_chr1_Ensembl84"), null, vcfLine); Assert.NotNull(annotatedVariant); JsonUtilities.AlleleEquals(annotatedVariant, "{\"altAllele\":\"CNV\",\"refAllele\":\"N\",\"begin\":9314202,\"chromosome\":\"1\",\"end\":9404148,\"variantType\":\"copy_number_variation\",\"vid\":\"1:9314202:9404148:6\",\"overlappingGenes\":[\"H6PD\"],\"transcripts\":{\"ensembl\":[{\"transcript\":\"ENST00000377403.2\",\"bioType\":\"protein_coding\",\"exons\":\"4-5/5\",\"introns\":\"3-4/4\",\"geneId\":\"ENSG00000049239\",\"hgnc\":\"H6PD\",\"consequence\":[\"copy_number_increase\"],\"isCanonical\":true,\"proteinId\":\"ENSP00000366620.1\"}]}}"); var cols = vcfLine.Split('\t'); var extractor = new SampleFieldExtractor(cols); var samples = extractor.ExtractSamples(); Assert.Equal(2, samples.Count); var sample = samples[1]; var observedCn = sample?.CopyNumber; Assert.Equal("6", observedCn); }