Exemplo n.º 1
0
        public void CallThroughAnEmptyNbhd()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(0, acceptedMNVs.Count);

            Assert.Equal(2, acceptedRefs.Count);

            Assert.Equal(Genotype.HomozygousRef, acceptedRefs[123].Genotype);
            Assert.Equal(Genotype.HomozygousRef, acceptedRefs[124].Genotype);
            Assert.Equal(123, acceptedRefs[123].Coordinate);
            Assert.Equal(124, acceptedRefs[124].Coordinate);
        }
Exemplo n.º 2
0
        private List <VcfNeighborhood> GetNeighborhoods(int expectedNumberOfThreads)
        {
            var neighborhoods = new List <VcfNeighborhood>();

            for (var i = 0; i < expectedNumberOfThreads; i++)
            {
                var neighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T")
                {
                    VcfVariantSites = new List <VariantSite>
                    {
                        new VariantSite(123)
                        {
                            ReferenceName         = "chr1",
                            OriginalAlleleFromVcf = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156)
                                                    //orignally at index 0
                        },
                    }
                };

                neighborhoods.Add(neighborhood);
            }
            return(neighborhoods);
        }
Exemplo n.º 3
0
        public void WriteHeader()
        {
            //WriteHeader should write the original header and add a line about phaser used right before the column headers

            var writer = InitializeWriter(false);

            writer.WriteHeader();
            writer.Dispose();

            Assert.Throws <Exception>(() => writer.WriteHeader());
            Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> {
                PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156)
            }));
            writer.Dispose();

            Assert.True(File.Exists(_outputFile));
            var fileLines = File.ReadAllLines(_outputFile);

            Assert.Equal(_origHeader[0], fileLines[0]);
            Assert.Equal(_origHeader[1], fileLines[1]);
            Assert.NotEqual(_origHeader[2], fileLines[2]);
            Assert.True(fileLines[2].StartsWith("##VariantPhaser=Scylla"));
            Assert.Equal(_origHeader[2], fileLines[4]);
        }
Exemplo n.º 4
0
        public void VarCallsBecomeRefsAndNulls()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T", VariantQscore = 35, AlleleSupport = 10, TotalCoverage = 50
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);


            var vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }, { "DP", "1000" }, { "AD", "844" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);

            // If one has been sucked up and there are refs remaining, we should output it as a ref.
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
                { 124, 100 }
            };


            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);


            // If one has been sucked up all the way
            // we should output it as a null.
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
                { 124, 1000 }
            };


            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }, { "DP", "1000" }, { "AD", "0" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
        }
Exemplo n.º 5
0
        public void CallAVariantInANewLocation()
        {
            //set up the original variants
            var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);
            var vs3 = new VariantSite(originalVcfVariant3);
            var vs4 = new VariantSite(originalVcfVariant4);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, "");

            nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not goig to get used for phasing. Sps it is a variant that failed filters.
            nbhd.SetRangeOfInterest();

            //now stage one candidate MNV:
            var newMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome = "chr1", Coordinate = 129, Reference = "A", Alternate = "TT"
            };


            nbhd.AddAcceptedPhasedVariant(newMNV);
            nbhd.UsedRefCountsLookup = new Dictionary <int, int>()
            {
                { 124, 1000 }
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;


            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[129].Count);

            Assert.Equal(3, acceptedRefs.Count);

            VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
            VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]);
        }
Exemplo n.º 6
0
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileWriterTests.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new [] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Diploid,
            };

            //note, scylla has no SB or RMxN or R8 filters.


            var variants = new List <CalledAllele>
            {
                PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156),
            };

            variants[0].Filters.AddRange(new List <FilterType> {
                FilterType.RMxN, FilterType.LowDepth, FilterType.LowVariantFrequency
            });
            variants[1].Filters.AddRange(new List <FilterType> {
                FilterType.IndelRepeatLength, FilterType.LowVariantQscore, FilterType.StrandBias
            });

            var originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            VcfReader     reader        = new VcfReader(outputFilePath);
            List <string> writtenHeader = reader.HeaderLines;

            reader.Dispose();

            var expectedHeader1 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FILTER=<ID=q30,Description=\"Quality score less than 30, by Scylla\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">",
                "##FILTER=<ID=MultiAllelicSite,Description=\"Variant does not conform to diploid model\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            Assert.Equal(expectedHeader1.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader1.Count; i++)
            {
                if (expectedHeader1[i].StartsWith("##VariantPhaser="))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser="));
                    continue;
                }
                Assert.Equal(expectedHeader1[i], writtenHeader[i]);
            }

            config = new VcfWriterConfig
            {
                DepthFilterThreshold          = 500,
                VariantQualityFilterThreshold = 22,
                FrequencyFilterThreshold      = 0.007f,
                EstimatedBaseCallQuality      = 23,
                PloidyModel = PloidyModel.Somatic,
            };


            originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };
            writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);


            var expectedHeader2 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "##FILTER=<ID=q22,Description=\"Quality score less than 22\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam",
            };

            variants[0].Filters = new List <FilterType>();
            variants[1].Filters = new List <FilterType>();

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            reader        = new VcfReader(outputFilePath);
            writtenHeader = reader.HeaderLines;
            reader.Dispose();

            Assert.Equal(expectedHeader2.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader2.Count; i++)
            {
                if (expectedHeader2[i].StartsWith("##VariantPhaser="))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser="));
                    continue;
                }

                Assert.Equal(expectedHeader2[i], writtenHeader[i]);
            }
        }
Exemplo n.º 7
0
        public void Write()
        {
            //write a normal vcf
            var writer = InitializeWriter(false);

            //Writer should order the variants by chrom, coord, ref, then alt.
            var variants = new List <CalledAllele>
            {
                PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "A", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr8", 123, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chr9", 124, "A", "C", 1000, 156),
                PhasedVariantTestUtilities.CreateDummyAllele("chrM", 123, "A", "C", 1000, 156),
            };

            // Order should be:
            var expected = new List <string> {
                "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tT\tA\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tT\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000"
            };

            writer.Write(variants);
            writer.Dispose();

            Assert.Throws <Exception>(() => writer.WriteHeader());
            Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> {
                PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156)
            }));
            writer.Dispose();

            var fileLines = File.ReadAllLines(_outputFile);

            Assert.Equal(variants.Count, fileLines.Length);

            for (int i = 0; i < expected.Count; i++)
            {
                Assert.Equal(expected[i], fileLines[i]);
            }

            //write a crushed vcf
            writer = InitializeWriter(true);
            writer.Write(variants);
            writer.Dispose();
            fileLines = File.ReadAllLines(_outputFile);

            expected = new List <string> {
                "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t123\t.\tA\tC,A,C\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000",
                "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000"
            };

            Assert.Equal(6, fileLines.Length); //only variants at diff positions
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.Equal(expected[i], fileLines[i]);
            }
        }
Exemplo n.º 8
0
        public void GetAcceptedVariants_MergeVariants()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "C", 1000, 156);

            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            var newMNV = new CalledAllele()
            {
                Chromosome = "chr1",
                Coordinate = 229,
                Reference  = "AA",
                Alternate  = "T",
                Genotype   = Genotype.HeterozygousAltRef
            };

            var stagedVcfVariants = new List <CalledAllele> {
                originalVcfVariant, originalVcfVariant2, originalVcfVariant3, originalVcfVariant4
            };

            var variantsUsedByCaller2 = new List <CalledAllele>()
            {
                originalVcfVariant, originalVcfVariant2, originalVcfVariant3
            };

            var nbhd = new Mock <IVcfNeighborhood>();

            nbhd.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller2.ToList());

            var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >()
            {
                { newMNV.Coordinate, new List <CalledAllele>()
                  {
                      newMNV
                  } }
            };

            nbhd.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2);

            // If one has been sucked up all the way, we should output it as a nocall
            // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion.
            var stagedCalledRefs2 = new Dictionary <int, CalledAllele>()
            {
                { 123, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } },
                { 124, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall
                  } },
                { 234, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 234, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.HomozygousRef
                  } }
            };

            nbhd.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2);


            var accepted = VcfMerger.GetMergedListOfVariants(nbhd.Object, stagedVcfVariants.ToList());


            Assert.Equal(5, accepted.Count);

            CheckVariantsMatch(vcfVariant0asRef, accepted[0]);
            CheckVariantsMatch(vcfVariant2asNull, accepted[1]);
            CheckVariantsMatch(newMNV, accepted[2]);
            CheckVariantsMatch(vcfVariant3asRef, accepted[3]);
            CheckVariantsMatch(originalVcfVariant4, accepted[4]);
        }
Exemplo n.º 9
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new[] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new VcfReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), "chr2", vs1, vs2, "");

            nbhd1.SetRangeOfInterest();

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), "chr7", vs4, vs5, "");

            nbhd2.SetRangeOfInterest();


            VcfMerger           merger         = new VcfMerger(reader);
            List <CalledAllele> allelesPastNbh = new List <CalledAllele>();

            nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.Coordinate, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.Coordinate, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh);

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh);

            merger.WriteRemainingVariants(writer, allelesPastNbh);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
Exemplo n.º 10
0
        public void GetAcceptedVariants_MergeNull()
        {
            var originalVcfVariant  = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var stagedVcfVariants   = new List <CalledAllele> {
                originalVcfVariant, originalVcfVariant2, originalVcfVariant3
            };

            var variantsUsedByCaller = new List <CalledAllele>()
            {
                originalVcfVariant, originalVcfVariant2
            };

            var stagedCalledMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T"
            };

            var stagedCalledMNVs = new Dictionary <int, List <CalledAllele> >()
            {
                { stagedCalledMNV.Coordinate, new List <CalledAllele>()
                  {
                      stagedCalledMNV
                  } }
            };

            var stagedCalledRefs = new Dictionary <int, CalledAllele>()
            {
                { 123, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } },
                { 124, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } }
            };


            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var mockNeighborhood = new Mock <IVcfNeighborhood>();

            mockNeighborhood.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller.ToList());
            mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs);
            mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs);


            var accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants.ToList());

            Assert.Equal(3, accepted.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };



            CheckVariantsMatch(originalVcfVariant, accepted[0]);
            CheckVariantsMatch(vcfVariant2asNull, accepted[1]);
            CheckVariantsMatch(originalVcfVariant3, accepted[2]);

            //re-stage the MNVs
            var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >()
            {
                { stagedCalledMNV.Coordinate, new List <CalledAllele>()
                  {
                      stagedCalledMNV
                  } }
            };

            mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2);

            // If one has been sucked up all the way, we should output it as a nocall
            // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion.
            var stagedCalledRefs2 = new Dictionary <int, CalledAllele>()
            {
                { 123, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "."
                  } },
                { 124, new CalledAllele(AlleleCategory.Reference)
                  {
                      Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall
                  } }
            };

            mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2);

            accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants);


            Assert.Equal(3, accepted.Count);

            vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            CheckVariantsMatch(originalVcfVariant, accepted[0]);
            CheckVariantsMatch(vcfVariant2asNull, accepted[1]);
            CheckVariantsMatch(originalVcfVariant3, accepted[2]);
        }