public void CallThroughAnEmptyNbhd() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(0, acceptedMNVs.Count); Assert.Equal(2, acceptedRefs.Count); Assert.Equal(Genotype.HomozygousRef, acceptedRefs[123].Genotype); Assert.Equal(Genotype.HomozygousRef, acceptedRefs[124].Genotype); Assert.Equal(123, acceptedRefs[123].Coordinate); Assert.Equal(124, acceptedRefs[124].Coordinate); }
private List <VcfNeighborhood> GetNeighborhoods(int expectedNumberOfThreads) { var neighborhoods = new List <VcfNeighborhood>(); for (var i = 0; i < expectedNumberOfThreads; i++) { var neighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T") { VcfVariantSites = new List <VariantSite> { new VariantSite(123) { ReferenceName = "chr1", OriginalAlleleFromVcf = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156) //orignally at index 0 }, } }; neighborhoods.Add(neighborhood); } return(neighborhoods); }
public void WriteHeader() { //WriteHeader should write the original header and add a line about phaser used right before the column headers var writer = InitializeWriter(false); writer.WriteHeader(); writer.Dispose(); Assert.Throws <Exception>(() => writer.WriteHeader()); Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156) })); writer.Dispose(); Assert.True(File.Exists(_outputFile)); var fileLines = File.ReadAllLines(_outputFile); Assert.Equal(_origHeader[0], fileLines[0]); Assert.Equal(_origHeader[1], fileLines[1]); Assert.NotEqual(_origHeader[2], fileLines[2]); Assert.True(fileLines[2].StartsWith("##VariantPhaser=Scylla")); Assert.Equal(_origHeader[2], fileLines[4]); }
public void VarCallsBecomeRefsAndNulls() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T", VariantQscore = 35, AlleleSupport = 10, TotalCoverage = 50 }); nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "844" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up and there are refs remaining, we should output it as a ref. nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { { 124, 100 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up all the way // we should output it as a null. nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { { 124, 1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." }, { "DP", "1000" }, { "AD", "0" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); }
public void CallAVariantInANewLocation() { //set up the original variants var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var vs3 = new VariantSite(originalVcfVariant3); var vs4 = new VariantSite(originalVcfVariant4); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, ""); nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not goig to get used for phasing. Sps it is a variant that failed filters. nbhd.SetRangeOfInterest(); //now stage one candidate MNV: var newMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", Coordinate = 129, Reference = "A", Alternate = "TT" }; nbhd.AddAcceptedPhasedVariant(newMNV); nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { { 124, 1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[129].Count); Assert.Equal(3, acceptedRefs.Count); VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]); }
public void FilterHeader() { var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileWriterTests.vcf"); File.Delete(outputFilePath); var context = new VcfWriterInputContext { CommandLine = new [] { "myCommandLine" }, SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; // Variant strand bias too high or coverage on only one strand var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Diploid, }; //note, scylla has no SB or RMxN or R8 filters. var variants = new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156), }; variants[0].Filters.AddRange(new List <FilterType> { FilterType.RMxN, FilterType.LowDepth, FilterType.LowVariantFrequency }); variants[1].Filters.AddRange(new List <FilterType> { FilterType.IndelRepeatLength, FilterType.LowVariantQscore, FilterType.StrandBias }); var originalHeader = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FILTER=<ID=q20,Description=\"Quality score less than 20\">", "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">", "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">", "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam" }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null); writer.WriteHeader(); writer.Write(variants); writer.Dispose(); VcfReader reader = new VcfReader(outputFilePath); List <string> writtenHeader = reader.HeaderLines; reader.Dispose(); var expectedHeader1 = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##VariantPhaser=Scylla 1.0.0.0", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FILTER=<ID=q20,Description=\"Quality score less than 20\">", "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">", "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">", "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">", "##FILTER=<ID=q30,Description=\"Quality score less than 30, by Scylla\">", "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">", "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">", "##FILTER=<ID=MultiAllelicSite,Description=\"Variant does not conform to diploid model\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam" }; Assert.Equal(expectedHeader1.Count, writtenHeader.Count); for (int i = 0; i < expectedHeader1.Count; i++) { if (expectedHeader1[i].StartsWith("##VariantPhaser=")) { Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=")); continue; } Assert.Equal(expectedHeader1[i], writtenHeader[i]); } config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 22, FrequencyFilterThreshold = 0.007f, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Somatic, }; originalHeader = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam" }; writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null); var expectedHeader2 = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##VariantPhaser=Scylla 1.0.0.0", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "##FILTER=<ID=q22,Description=\"Quality score less than 22\">", "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">", "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam", }; variants[0].Filters = new List <FilterType>(); variants[1].Filters = new List <FilterType>(); writer.WriteHeader(); writer.Write(variants); writer.Dispose(); reader = new VcfReader(outputFilePath); writtenHeader = reader.HeaderLines; reader.Dispose(); Assert.Equal(expectedHeader2.Count, writtenHeader.Count); for (int i = 0; i < expectedHeader2.Count; i++) { if (expectedHeader2[i].StartsWith("##VariantPhaser=")) { Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=")); continue; } Assert.Equal(expectedHeader2[i], writtenHeader[i]); } }
public void Write() { //write a normal vcf var writer = InitializeWriter(false); //Writer should order the variants by chrom, coord, ref, then alt. var variants = new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "A", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr8", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 124, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chrM", 123, "A", "C", 1000, 156), }; // Order should be: var expected = new List <string> { "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tT\tA\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tT\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000" }; writer.Write(variants); writer.Dispose(); Assert.Throws <Exception>(() => writer.WriteHeader()); Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156) })); writer.Dispose(); var fileLines = File.ReadAllLines(_outputFile); Assert.Equal(variants.Count, fileLines.Length); for (int i = 0; i < expected.Count; i++) { Assert.Equal(expected[i], fileLines[i]); } //write a crushed vcf writer = InitializeWriter(true); writer.Write(variants); writer.Dispose(); fileLines = File.ReadAllLines(_outputFile); expected = new List <string> { "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tA\tC,A,C\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000" }; Assert.Equal(6, fileLines.Length); //only variants at diff positions for (int i = 0; i < expected.Count; i++) { Assert.Equal(expected[i], fileLines[i]); } }
public void GetAcceptedVariants_MergeVariants() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "C", 1000, 156); var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; var newMNV = new CalledAllele() { Chromosome = "chr1", Coordinate = 229, Reference = "AA", Alternate = "T", Genotype = Genotype.HeterozygousAltRef }; var stagedVcfVariants = new List <CalledAllele> { originalVcfVariant, originalVcfVariant2, originalVcfVariant3, originalVcfVariant4 }; var variantsUsedByCaller2 = new List <CalledAllele>() { originalVcfVariant, originalVcfVariant2, originalVcfVariant3 }; var nbhd = new Mock <IVcfNeighborhood>(); nbhd.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller2.ToList()); var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >() { { newMNV.Coordinate, new List <CalledAllele>() { newMNV } } }; nbhd.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2); // If one has been sucked up all the way, we should output it as a nocall // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion. var stagedCalledRefs2 = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall } }, { 234, new CalledAllele(AlleleCategory.Reference) { Coordinate = 234, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.HomozygousRef } } }; nbhd.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2); var accepted = VcfMerger.GetMergedListOfVariants(nbhd.Object, stagedVcfVariants.ToList()); Assert.Equal(5, accepted.Count); CheckVariantsMatch(vcfVariant0asRef, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(newMNV, accepted[2]); CheckVariantsMatch(vcfVariant3asRef, accepted[3]); CheckVariantsMatch(originalVcfVariant4, accepted[4]); }
public void WriteANbhd() { var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf"); var inputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerInput.vcf"); var expectedFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerOutput.vcf"); File.Delete(outputFilePath); var context = new VcfWriterInputContext { CommandLine = new[] { "myCommandLine" }, SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Somatic, AllowMultipleVcfLinesPerLoci = true }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>() { }, null); var reader = new VcfReader(inputFilePath, true); //set up the original variants var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156); var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156); var originalVcfVariant5 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156); var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); //have to replace variants at positon 116380048 (we call two new MNVS here) var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), "chr2", vs1, vs2, ""); nbhd1.SetRangeOfInterest(); //have to replace variants at positon 116380051 and 52 (we call one new MNV at 51) var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), "chr7", vs4, vs5, ""); nbhd2.SetRangeOfInterest(); VcfMerger merger = new VcfMerger(reader); List <CalledAllele> allelesPastNbh = new List <CalledAllele>(); nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant1.Coordinate, new List <CalledAllele> { originalVcfVariant1, originalVcfVariant2 } } }; nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant4.Coordinate, new List <CalledAllele> { originalVcfVariant4 } } }; allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName); allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh); allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName); allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh); merger.WriteRemainingVariants(writer, allelesPastNbh); writer.Dispose(); var expectedLines = File.ReadLines(expectedFilePath).ToList(); var outputLines = File.ReadLines(outputFilePath).ToList(); Assert.Equal(expectedLines.Count(), outputLines.Count()); for (int i = 0; i < expectedLines.Count; i++) { Assert.Equal(expectedLines[i], outputLines[i]); } }
public void GetAcceptedVariants_MergeNull() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var stagedVcfVariants = new List <CalledAllele> { originalVcfVariant, originalVcfVariant2, originalVcfVariant3 }; var variantsUsedByCaller = new List <CalledAllele>() { originalVcfVariant, originalVcfVariant2 }; var stagedCalledMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T" }; var stagedCalledMNVs = new Dictionary <int, List <CalledAllele> >() { { stagedCalledMNV.Coordinate, new List <CalledAllele>() { stagedCalledMNV } } }; var stagedCalledRefs = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = "." } } }; //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var mockNeighborhood = new Mock <IVcfNeighborhood>(); mockNeighborhood.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller.ToList()); mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs); mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs); var accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants.ToList()); Assert.Equal(3, accepted.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" } } }, }; CheckVariantsMatch(originalVcfVariant, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(originalVcfVariant3, accepted[2]); //re-stage the MNVs var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >() { { stagedCalledMNV.Coordinate, new List <CalledAllele>() { stagedCalledMNV } } }; mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2); // If one has been sucked up all the way, we should output it as a nocall // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion. var stagedCalledRefs2 = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall } } }; mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2); accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants); Assert.Equal(3, accepted.Count); vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; CheckVariantsMatch(originalVcfVariant, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(originalVcfVariant3, accepted[2]); }