Exemplo n.º 1
0
        public void ExtractSample_EmptySampleColumn_ReturnEmptySample()
        {
            var formatIndices = new FormatIndices();
            var sample        = SampleFieldExtractor.ExtractSample(null, formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.True(sample.IsEmpty);
        }
        public void ExtractSample_EmptySampleColumn_ReturnEmptySample()
        {
            var formatIndices = new FormatIndices();
            var sample        = SampleFieldExtractor.ExtractSample(null, formatIndices, 1);

            Assert.True(sample.IsEmpty);
        }
        public void ExtractSample_DotInSampleColumn_ReturnEmptySample()
        {
            var formatIndices = new FormatIndices();
            var sample        = SampleFieldExtractor.ExtractSample(".", formatIndices, 1, false);

            Assert.True(sample.IsEmpty);
        }
Exemplo n.º 4
0
        public void ExtractSample_DragenCNV_MCN_LOH(string formatField, string sampleField)
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set(formatField);
            var sample = SampleFieldExtractor.ExtractSample(sampleField, formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.True(sample.IsLossOfHeterozygosity);
        }
Exemplo n.º 5
0
        public void ExtractSample_ExpansionHunter()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC");
            var sample = SampleFieldExtractor.ExtractSample("1/1:SPANNING/SPANNING:15/15:15-15/15-15:22/22:23/23:0/0:38.270270", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("1/1", sample.Genotype);
            Assert.Equal(new[] { 15, 15 }, sample.RepeatUnitCounts);
        }
Exemplo n.º 6
0
        public void ExtractSample_DragenCNV_AsExpected()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:CN:MCN");
            var sample = SampleFieldExtractor.ExtractSample("0|1:3:1", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("0|1", sample.Genotype);
            Assert.Equal(3, sample.CopyNumber);
            Assert.Equal(1, sample.MinorHaplotypeCopyNumber);
        }
Exemplo n.º 7
0
        public void ExtractSample_DragenSomatic_AsExpected()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS");
            var sample = SampleFieldExtractor.ExtractSample("0|1:3.96:33,8:0.195:13,6:20,2:41:17,16,4,4:13,20,4,4:534234", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("0|1", sample.Genotype);
            Assert.Equal(3.96, sample.SomaticQuality);
            Assert.Equal(new[] { 33, 8 }, sample.AlleleDepths);
            Assert.Equal(41, sample.TotalDepth);
            Assert.Equal(new[] { 8 / 41.0 }, sample.VariantFrequencies);
        }
Exemplo n.º 8
0
        [InlineData("C", ".", "DP:AU:CU:GU:TU", "75:0,0:72,77:0,0:0,2")]                // ref minor (AC)
        public void VariantFrequency_ReturnNull(string refAllele, string altAllele, string formatCol, string sampleCol)
        {
            var vcfLine    = $"chr1\t5592503\t.\t{refAllele}\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}";
            var vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);
            var sample = samples[0];

            Assert.Null(sample.VariantFrequencies);
        }
Exemplo n.º 9
0
        public void EmptySample()
        {
            const string vcfLine = "chr7	127717248	MantaINV:267944:0:1:2:0:0	T	<INV>	.	PASS	END=140789466;SVTYPE=INV;SVLEN=13072218;INV5	PR:SR	.";

            var vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);
            var sample = samples[0];

            Assert.True(sample.IsEmpty);
        }
Exemplo n.º 10
0
        public void DeNovoQuality()
        {
            const string vcfLine    = "chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:DQ\t0/1:20";
            var          vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample = samples[0];

            Assert.Equal(20, sample.DeNovoQuality);
        }
Exemplo n.º 11
0
        public void ExtractSample_PEPE()
        {
            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ");
            var sample = SampleFieldExtractor.ExtractSample("0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.000", formatIndices, GetSimplePositionUsingAlleleNum(1), null);

            Assert.Equal("0/1", sample.Genotype);
            Assert.Equal(5, sample.GenotypeQuality);
            Assert.Equal(new[] { 338, 1 }, sample.AlleleDepths);
            Assert.Equal(339, sample.TotalDepth);
            Assert.Equal(new[] { 0.00295 }, sample.VariantFrequencies);
            Assert.Equal(3.366f, sample.ArtifactAdjustedQualityScore);
            Assert.Equal(0.000f, sample.LikelihoodRatioQualityScore);
        }
Exemplo n.º 12
0
        public void FailedFilter(string sampleCol, bool?expectedFailedFilter)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:GQ:GQX:DP:DPF:FT\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample = samples[0];
            var observedFailedFilter = sample?.FailedFilter;

            Assert.Equal(expectedFailedFilter, observedFailedFilter);
        }
Exemplo n.º 13
0
        public void AlleleDepthsMultiAllelic(string formatCol, string sampleCol, int[] expectedAlleleDepths)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\tT,A\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample = samples[0];
            var observedAlleleDepths = sample?.AlleleDepths;

            Assert.Equal(expectedAlleleDepths, observedAlleleDepths);
        }
Exemplo n.º 14
0
        public void VariantFrequencyMultiAllelic(string formatCol, string sampleCol, string expectedVariantFrequency)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\tT,A\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(1, samples.Count);

            var sample = samples[0];
            var observedVariantFrequency = sample?.VariantFrequency;

            Assert.Equal(expectedVariantFrequency, observedVariantFrequency);
        }
Exemplo n.º 15
0
        public void PiscesTotalDepth()
        {
            const string vcfLine =
                "chr1\t115251293\t.\tGA\tG\t100\tSB;LowVariantFreq\tDP=7882\tGT:GQ:AD:VF:NL:SB:GQX\t0/1:100:7588,294:0:20:-100.0000:100";
            var vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns, 7882);
            var samples   = extractor.ExtractSamples();

            var       sample             = samples[0];
            var       observedTotalDepth = sample.TotalDepth;
            const int expectedTotalDepth = 7882;

            Assert.Equal(expectedTotalDepth, observedTotalDepth);
        }
Exemplo n.º 16
0
        public void TotalDepth(string formatCol, string sampleCol, int?expectedTotalDepth)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample             = samples[0];
            var observedTotalDepth = sample?.TotalDepth;

            Assert.Equal(expectedTotalDepth, observedTotalDepth);
        }
Exemplo n.º 17
0
        public void Genotype(string sampleCol, string expectedGenotype)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\tGT:GQ:GQX:DP:DPF:AD\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample           = samples[0];
            var observedGenotype = sample?.Genotype;

            Assert.Equal(expectedGenotype, observedGenotype);
        }
Exemplo n.º 18
0
        public void GenotypeQuality(string formatCol, string sampleCol, string expectedGenotypeQuality)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\tT\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(1, samples.Count);

            var sample = samples[0];
            var observedGenotypeQuality = sample?.GenotypeQuality;

            Assert.Equal(expectedGenotypeQuality, observedGenotypeQuality);
        }
Exemplo n.º 19
0
        public void ArtifactAdjustedQualityScore_LikelihoodRatioQualityScore()
        {
            const string vcfLine    = "chr1\t2488109\t.\tG\tA\t5\tLowSupport\tDP=339\tGT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ\t0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.001";
            var          vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample = samples[0];

            Assert.NotNull(sample.ArtifactAdjustedQualityScore);
            Assert.NotNull(sample.LikelihoodRatioQualityScore);
            Assert.Equal("3.366", sample.ArtifactAdjustedQualityScore.Value.ToString("0.###"));
            Assert.Equal("0.001", sample.LikelihoodRatioQualityScore.Value.ToString("0.###"));
        }
Exemplo n.º 20
0
        [InlineData("T", "GT:AD:DP:VF", "0/1:317,200:517:0.38685", "0.3869")]            // VF (rounding issue)
        public void VariantFrequency_Nominal(string altAllele, string formatCol, string sampleCol, string expectedResults)
        {
            string vcfLine    = $"chr1\t5592503\t.\tC\t{altAllele}\t900.00\tPASS\t.\t{formatCol}\t{sampleCol}";
            var    vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample = samples[0];

            Assert.NotNull(sample?.VariantFrequencies);
            var observedResults = string.Join(',', sample.VariantFrequencies.Select(x => x.ToString("0.####")));

            Assert.Equal(expectedResults, observedResults);
        }
Exemplo n.º 21
0
        public void ExtractSample_MitoHeteroplasmy_AsExpected()
        {
            var provider = new MitoHeteroplasmyProvider();

            provider.Add(1, "C", new[] { 0.123, 0.200, 0.301 }, new[] { 1, 2, 4 });
            provider.Add(1, "G", new[] { 0.101, 0.201 }, new[] { 1, 2 });

            var simplePosition = new SimplePosition(ChromosomeUtilities.ChrM, 1, "A", new[] { "C", "T" });

            var formatIndices = new FormatIndices();

            formatIndices.Set("GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS");
            var sample = SampleFieldExtractor.ExtractSample("1|2:3.96:0,15,85:0.195:13,6:20,2:100:17,16,4,4:13,20,4,4:534234", formatIndices, simplePosition, provider);

            Assert.Equal(new[] { 15 / 100.0, 85 / 100.0 }, sample.VariantFrequencies);
            Assert.Equal(new[] { "14.29", "null" }, sample.HeteroplasmyPercentile);
        }
Exemplo n.º 22
0
        public void EmptySamples()
        {
            // for NIR-1306
            const string vcfLine    = "chrX	2735147	.	G	A	38.25	VQSRTrancheSNP99.90to100.00	AC=3;AF=0.500;AN=6;BaseQRankSum=-0.602;DP=56;Dels=0.00;FS=30.019;HaplotypeScore=7.7259;MLEAC=3;MLEAF=0.500;MQ=41.18;MQ0=0;MQRankSum=0.098;QD=1.06;ReadPosRankSum=0.266;SB=-8.681e-03;VQSLOD=-6.0901;culprit=QD	GT:AD:DP:GQ:PL	0:7,0:7:3:0,3,39	./.	0/1:14,3:17:35:35,0,35	1/1:9,10:19:3:41,3,0";
            var          vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(4, samples.Length);

            var sample                   = samples[1];
            var observedGenotype         = sample.Genotype;
            var observedVariantFrequency = sample.VariantFrequencies;

            Assert.Equal("./.", observedGenotype);
            Assert.Null(observedVariantFrequency);
        }
Exemplo n.º 23
0
        public void MajorChromosomeCopyTest()
        {
            // data from NIR-1095
            // for NIR-1218
            const string vcfLine    = "1	9314202	Canvas:GAIN:1:9314202:9404148	N	<CNV>	36	PASS	SVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621	RC:BC:CN:MCC	.	151:108:6:4";
            var          vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(2, samples.Length);

            var sample = samples[1];

            var observedMcc = sample?.IsLossOfHeterozygosity;

            Assert.False(observedMcc);
        }
Exemplo n.º 24
0
        public void Smn1()
        {
            const string vcfLine    = "5\t70247773\t.\tC\tT\t366\tPASS\tSNVHPOL=4;MQ=60\tGT:DST:DID:DCS:SCH:PCN:PLG:MAD:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:-:70:Orphanet:-:3,3:6606,6607:41,49:368:364:81:11:39,42:21,20:18,22:-41.0:PASS:370,0,365";
            var          vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Single(samples);

            var sample = samples[0];

            Assert.Equal(new[] { "-" }, sample?.DiseaseAffectedStatus);
            Assert.Equal(new[] { "70" }, sample?.DiseaseIds);
            Assert.Equal(new[] { "Orphanet" }, sample?.DiseaseClassificationSources);
            Assert.Equal("-", sample?.SilentCarrierHaplotype);
            Assert.Equal(new[] { 3, 3 }, sample?.ParalogousGeneCopyNumbers);
            Assert.Equal(new[] { 6606, 6607 }, sample?.ParalogousEntrezGeneIds);
            Assert.Equal(new[] { 41, 49 }, sample?.MpileupAlleleDepths);
        }
Exemplo n.º 25
0
        public void SplitReadCounts()
        {
            const string vcfLine = "chr7	127717248	MantaINV:267944:0:1:2:0:0	T	<INV>	.	PASS	END=140789466;SVTYPE=INV;SVLEN=13072218;INV5	PR:SR	78,0:65,0	157,42:252,63";

            var vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(2, samples.Length);
            var sample1 = samples[0];

            Assert.Equal(new[] { 78, 0 }, sample1.PairEndReadCounts);
            Assert.Equal(new[] { 65, 0 }, sample1.SplitReadCounts);

            var sample2 = samples[1];

            Assert.Equal(new[] { 157, 42 }, sample2.PairEndReadCounts);
            Assert.Equal(new[] { 252, 63 }, sample2.SplitReadCounts);
        }
Exemplo n.º 26
0
        public void SplitReadCounts()
        {
            var vcfLine =
                "chr7	127717248	MantaINV:267944:0:1:2:0:0	T	<INV>	.	PASS	END=140789466;SVTYPE=INV;SVLEN=13072218;INV5	PR:SR	78,0:65,0	157,42:252,63";

            var vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(2, samples.Count);
            var sample1 = samples[0];

            Assert.Equal(new[] { "78", "0" }, sample1.PairEndReadCounts);
            Assert.Equal(new[] { "65", "0" }, sample1.SplitReadCounts);

            var sample2 = samples[1];

            Assert.Equal(new[] { "157", "42" }, sample2.PairEndReadCounts);
            Assert.Equal(new[] { "252", "63" }, sample2.SplitReadCounts);
        }
Exemplo n.º 27
0
        public static Position CreatFromSimplePosition(ISimplePosition simplePosition, VariantFactory variantFactory)
        {
            if (simplePosition == null)
            {
                return(null);
            }
            var vcfFields = simplePosition.VcfFields;
            var infoData  = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]);
            var id        = vcfFields[VcfCommon.IdIndex];
            int end       = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length); // re-calculate the end by checking INFO field

            string[] altAlleles = vcfFields[VcfCommon.AltIndex].Split(',').ToArray();
            double?  quality    = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse);

            string[] filters = vcfFields[VcfCommon.FilterIndex].Split(';');
            var      samples = new SampleFieldExtractor(vcfFields, infoData.Depth).ExtractSamples();

            var variants = variantFactory.CreateVariants(simplePosition.Chromosome, id, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed, simplePosition.IsRecomposed);

            return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, quality, filters, variants, samples,
                                infoData, vcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed));
        }
Exemplo n.º 28
0
        public void GatkGenomeVcfSample()
        {
            // For NIR-1320
            // the original AD= 15,11,0
            var vcfLine =
                "1	30923	rs140337953	G	T,<NON_REF>	264.77	PASS	BaseQRankSum=0.259;DB;DP=26;MLEAC=1,0;MLEAF=0.500,0.00;MQ=43.87;MQ0=0;MQRankSum=-0.830;ReadPosRankSum=-0.156	GT:AD:GQ:PL:SB	0/1:15,11,20:99:293,0,330,337,363,700:8,7,3,8";
            var vcfColumns = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(vcfColumns);
            var samples   = extractor.ExtractSamples(true);

            Assert.Equal(1, samples.Count);
            var sample = samples[0];
            var observedVariantFrequency = sample?.VariantFrequency;
            var expectedVariantFrequency = "0.4231";

            Assert.Equal(expectedVariantFrequency, observedVariantFrequency);
            var expectedAlleleDepths = new[] { "15", "11" };
            var observedAlleleDepths = sample?.AlleleDepths;

            Assert.Equal(expectedAlleleDepths, observedAlleleDepths);
        }
Exemplo n.º 29
0
        public void OutputCanvasCnvRelevantField()
        {
            var vcfLine = "1	9314201	Canvas:GAIN:1:9314202:9404148	N	<CNV>	36	PASS	SVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621	RC:BC:CN:MCC	.	151:108:6:4";

            var annotatedVariant = DataUtilities.GetVariant(Resources.CacheGRCh37("ENST00000377403_chr1_Ensembl84"), null, vcfLine);

            Assert.NotNull(annotatedVariant);

            JsonUtilities.AlleleEquals(annotatedVariant,
                                       "{\"altAllele\":\"CNV\",\"refAllele\":\"N\",\"begin\":9314202,\"chromosome\":\"1\",\"end\":9404148,\"variantType\":\"copy_number_variation\",\"vid\":\"1:9314202:9404148:6\",\"overlappingGenes\":[\"H6PD\"],\"transcripts\":{\"ensembl\":[{\"transcript\":\"ENST00000377403.2\",\"bioType\":\"protein_coding\",\"exons\":\"4-5/5\",\"introns\":\"3-4/4\",\"geneId\":\"ENSG00000049239\",\"hgnc\":\"H6PD\",\"consequence\":[\"copy_number_increase\"],\"isCanonical\":true,\"proteinId\":\"ENSP00000366620.1\"}]}}");

            var cols = vcfLine.Split('\t');

            var extractor = new SampleFieldExtractor(cols);
            var samples   = extractor.ExtractSamples();

            Assert.Equal(2, samples.Count);

            var sample = samples[1];

            var observedCn = sample?.CopyNumber;

            Assert.Equal("6", observedCn);
        }