public void VennVcf_FxnlTest_HG19() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "control_S15.vcf"); string VcfB = Path.Combine(VcfPathRoot, "control_S18.vcf"); string OutputPath = Path.Combine(outDir, "Consensus.vcf"); string ExpectedPath = Path.Combine(VcfPathRoot, "ExpectedConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; parameters.CommandLineArguments = new string[] { "testcase commandline" }; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); TestHelper.CompareFiles(OutputPath, ExpectedPath); }
public void VennVcf_EmptyInputTest() { var outDir = TestPaths.LocalTestDataDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "Empty_S1.vcf"); string VcfB = Path.Combine(VcfPathRoot, "Empty_S2.vcf"); string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); Assert.True(File.Exists(OutputPath)); var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); Assert.Equal(0, observedVariants.Count); }
public void VennVcf_GtTest() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "gtTests_S15.vcf"); string VcfB = Path.Combine(VcfPathRoot, "gtTests_S18.vcf"); string OutputPath = Path.Combine(outDir, "gtConsensusOut.vcf"); string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); Assert.True(File.Exists(OutputPath)); var expectedVariants = AlleleReader.GetAllVariantsInFile(ExpectedPath); var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); Assert.Equal(expectedVariants.Count, observedVariants.Count); for (int i = 0; i < expectedVariants.Count; i++) { var ExpectedVariant = expectedVariants[i]; var OutputVariant = observedVariants[i]; Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString()); } }
public void VennVcf_CombineTwoPoolVariants_MergeRefCalls() { //this is from an issue where there were multiple co-located variants in one pool, //and just ref in the other, at chr15 92604460. The consensus answer should be // a single ref call (and not multiple ref calls!). var outDir = TestPaths.LocalScratchDirectory; var vcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf"); string VcfPath_PoolB = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf"); string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf"); string OutputPath = Path.Combine(outDir, "Consensus2.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.InputFiles = new string[] { VcfPath_PoolA, VcfPath_PoolB }; parameters.OutputDirectory = outDir; parameters.ConsensusFileName = OutputPath; VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters); venn.DoPairwiseVenn(); Assert.Equal(File.Exists(OutputPath), true); var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); var ExpectedVariants = AlleleReader.GetAllVariantsInFile(VcfPath_Consensus); Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count); int NumVariantsAtPos92604460 = 0; for (int i = 0; i < ExpectedVariants.Count; i++) { var EVariant = ExpectedVariants[i]; var Variant = CombinedVariants[i]; if ((Variant.ReferencePosition == 92604460) && (Variant.Chromosome == "chr15")) { NumVariantsAtPos92604460++; } Assert.Equal(EVariant.ToString(), Variant.ToString()); } Assert.Equal(NumVariantsAtPos92604460, 1); }
public void VennVcf_FxnlTest() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "control_S15.vcf"); string VcfB = Path.Combine(VcfPathRoot, "control_S18.vcf"); string OutputPath = Path.Combine(outDir, "Consensus.vcf"); string ExpectedPath = Path.Combine(VcfPathRoot, "ExpectedConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(false); Assert.True(File.Exists(OutputPath)); using (VcfReader ReaderE = new VcfReader(ExpectedPath)) { using (VcfReader ReaderO = new VcfReader(OutputPath)) { VcfVariant ExpectedVariant = new VcfVariant(); VcfVariant OutputVariant = new VcfVariant(); while (true) { bool ExpectedVariantExists = ReaderE.GetNextVariant(ExpectedVariant); bool OutputVariantExists = ReaderO.GetNextVariant(OutputVariant); Assert.Equal(ExpectedVariantExists, OutputVariantExists); if (!ExpectedVariantExists || !OutputVariantExists) { break; } Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString()); } } } }
public void VennVcf_FxnlTest_GRCH37() { var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "GRCH37_fxnl"); TestHelper.RecreateDirectory(outDir); string VcfA = Path.Combine(TestPaths.LocalTestDataDirectory, "GRCH37_S25.bam.genome.vcf"); string VcfB = Path.Combine(TestPaths.LocalTestDataDirectory, "GRCH37_S30.bam.genome.vcf"); List <string> OutputVcfs = new List <string>() { Path.Combine(outDir, "Consensus.vcf"), Path.Combine(outDir, "GRCH37_S25.bam_and_GRCH37_S30.bam.vcf"), Path.Combine(outDir, "GRCH37_S25.bam_not_GRCH37_S30.bam.vcf"), Path.Combine(outDir, "GRCH37_S30.bam_and_GRCH37_S25.bam.vcf"), Path.Combine(outDir, "GRCH37_S30.bam_not_GRCH37_S25.bam.vcf") }; List <string> ExpectedVcfs = new List <string>() { Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_Consensus.vcf"), Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S25.bam_and_GRCH37_S30.bam.vcf"), Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S25.bam_not_GRCH37_S30.bam.vcf"), Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S30.bam_and_GRCH37_S25.bam.vcf"), Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S30.bam_not_GRCH37_S25.bam.vcf") }; VennVcfOptions parameters = new VennVcfOptions(); parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; parameters.VariantCallingParams.AmpliconBiasFilterThreshold = null; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); for (int i = 0; i < 5; i++) { TestHelper.CompareFiles(OutputVcfs[i], ExpectedVcfs[i]); } }
public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests() { //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. //Rule "F" test (ie various alt calls all ended up as no-call. we dont want multiple no call lines in the vcf.) //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf"); string OutputPath = Path.Combine(outDir, "outEandF.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor VennVcf = new VennProcessor( new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); VennVcf.DoPairwiseVenn(); Assert.Equal(File.Exists(OutputPath), true); var PoolAVariants = AlleleReader.GetAllVariantsInFile(VcfPath_PoolA); var PoolBVariants = AlleleReader.GetAllVariantsInFile(VcfPath_PoolB); var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. var VariantA_1 = PoolAVariants[0]; Assert.Equal(VariantA_1.Genotype, Genotype.HomozygousRef); Assert.Equal(VariantA_1.Frequency, 1.0 - 0.0021, 4); //note Vf here is the ref freq Assert.Equal(VariantA_1.VariantQscore, 100); Assert.Equal(VariantA_1.Filters.Count, 0); Assert.Equal(VariantA_1.ReferencePosition, 25378561); var VariantA_2 = PoolAVariants[1]; Assert.Equal(VariantA_2.ReferencePosition, 25378562); var VariantB_1 = PoolBVariants[0]; Assert.Equal(VariantB_1.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantB_1.Frequency, 0.0173, 4); Assert.Equal(VariantB_1.VariantQscore, 100); Assert.Equal(VariantB_1.Filters.Count, 0); Assert.Equal(VariantB_1.ReferencePosition, 25378561); var VariantB_2 = PoolBVariants[1]; Assert.Equal(VariantB_2.Genotype, Genotype.HomozygousRef); Assert.Equal(VariantB_2.Frequency, 0.9827, 4); //note Vf here is the ref freq Assert.Equal(VariantB_2.VariantQscore, 100); Assert.Equal(VariantB_2.Filters.Count, 0); Assert.Equal(VariantB_2.ReferencePosition, 25378561); var Consensus_1 = CombinedVariants[0]; Assert.Equal(Consensus_1.Genotype, Genotype.HomozygousRef); Assert.Equal(Consensus_1.Frequency, 0.9907, 4); //slightly improved from .008. //note Vf here is the ref freq Assert.Equal(Consensus_1.VariantQscore, 100); Assert.Equal(Consensus_1.Filters.Count, 0); //<-low VF tag will NOT added by post-processing b/c is ref call Assert.Equal(Consensus_1.ReferencePosition, 25378561); var Consensus_2 = CombinedVariants[1]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); //Rule "F" test (ie various alt calls all ended up as no-call. //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate VariantA_1 = PoolAVariants[1]; Assert.Equal(VariantA_1.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantA_1.Frequency, 0.0776, 4); Assert.Equal(VariantA_1.VariantQscore, 100); Assert.Equal(VariantA_1.Filters.Count, 0); Assert.Equal(VariantA_1.ReferencePosition, 25378562); VariantA_2 = PoolAVariants[2]; Assert.Equal(VariantA_2.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantA_2.Frequency, 0.0776, 4); Assert.Equal(VariantA_2.VariantQscore, 100); Assert.Equal(VariantA_2.Filters.Count, 0); Assert.Equal(VariantA_2.ReferencePosition, 25378562); var VariantA_3 = PoolAVariants[3]; Assert.Equal(VariantA_3.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantA_3.Frequency, 0.0776, 4); Assert.Equal(VariantA_3.VariantQscore, 100); Assert.Equal(VariantA_3.Filters.Count, 0); Assert.Equal(VariantA_3.ReferencePosition, 25378562); VariantB_1 = PoolBVariants[2]; Assert.Equal(VariantB_1.Genotype, Genotype.HomozygousRef); Assert.Equal(VariantB_1.Frequency, 0.9989, 4); Assert.Equal(VariantB_1.VariantQscore, 100); Assert.Equal(VariantB_1.Filters.Count, 0); Assert.Equal(VariantB_1.ReferencePosition, 25378562); VariantB_2 = PoolBVariants[3]; Assert.Equal(VariantB_2.ReferencePosition, 25378563); Consensus_1 = CombinedVariants[1]; Assert.Equal(Consensus_1.ReferencePosition, 25378562); Assert.Equal(Consensus_1.Genotype, Genotype.AltLikeNoCall); Assert.Equal(Consensus_1.Frequency, 0.0069, 4); Assert.Equal(Consensus_1.VariantQscore, 0); Assert.Equal(Consensus_1.Filters.Count, 1); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.ReferenceAllele, "C"); Assert.Equal(Consensus_1.AlternateAllele, "T"); Consensus_2 = CombinedVariants[2]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); Assert.Equal(Consensus_2.Genotype, Genotype.AltLikeNoCall); Assert.Equal(Consensus_2.Frequency, 0.0069, 4); Assert.Equal(Consensus_2.VariantQscore, 0); Assert.Equal(Consensus_1.Filters.Count, 1); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_2.ReferenceAllele, "C"); Assert.Equal(Consensus_2.AlternateAllele, "TT"); var Consensus_3 = CombinedVariants[3]; Assert.Equal(Consensus_3.ReferencePosition, 25378562); Assert.Equal(Consensus_3.Genotype, Genotype.AltLikeNoCall); Assert.Equal(Consensus_3.Frequency, 0.0069, 4); Assert.Equal(Consensus_3.VariantQscore, 0); Assert.Equal(Consensus_1.Filters.Count, 1); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_3.ReferenceAllele, "CC"); Assert.Equal(Consensus_3.AlternateAllele, "T"); var Consensus_4 = CombinedVariants[4]; Assert.Equal(Consensus_4.ReferencePosition, 25378563); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } }
public void VennVcf_CombineTwoPoolVariants_ProbePoolBias_Tests() { //this is from an issue anita had where a variant was in one pool at 1%, the other at 0%, and showed up as 6% in the combined pool. var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "small_S14.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "small_S17.genome.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf"); parameters.OutputDirectory = outDir; if (File.Exists(parameters.ConsensusFileName)) { File.Delete(parameters.ConsensusFileName); } VennProcessor Venn = new VennProcessor(new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); Venn.DoPairwiseVenn(); Assert.Equal(File.Exists(parameters.ConsensusFileName), true); var CombinedVariants = AlleleReader.GetAllVariantsInFile(parameters.ConsensusFileName); var AandBVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_and_S17.vcf")); var BandAVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_and_S14.vcf")); var AnotBVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_not_S17.vcf")); var BnotAVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_not_S14.vcf")); //poolA //chr1 115258743 . A . 100 PASS DP=35354 GT:GQ:AD:VF:NL:SB 0/0:100:30256:0.1442:20:-100.0000 //chr1 115258743 . AC TT 100 PASS DP=35354 GT:GQ:AD:VF:NL:SB 0/1:100:30720,4634:0.1311:20:-100.0000 //chr1 115258744 . C . 100 PASS DP=35253 GT:GQ:AD:VF:NL:SB 0/0:100:30277:0.1412:20:-100.0000 //chr1 115258745 . C . 100 PASS DP=35160 GT:GQ:AD:VF:NL:SB 0/0:100:35130:0.0009:20:-100.0000 //poolB //chr1 115258743 . AC TT 100 PASS DP=49612 GT:GQ:AD:VF:NL:SB 0/1:100:44202,5410:0.1090:20:-100.0000 //chr1 115258743 . A T 100 PASS DP=49612 GT:GQ:AD:VF:NL:SB 0/1:100:43362,670:0.0135:20:-46.0807 //chr1 115258744 . C T 24 PASS DP=49902 GT:GQ:AD:VF:NL:SB 0/1:24:43905,560:0.0112:20:-8.3857 //when we had bug: //chr1 115258743 . AC TT 100.00 PASS DP=84966 GT:GQ:AD:VF:NL: 0/1:100:74922,10044:0.1182:20:-100:-100.0000:100 //chr1 115258743 . A T 100.00 PB;LowVF DP=49612 GT:GQ:AD:VF:NL: ./.:100:43362,670:0.0135:20:-46.0807:0.0000:100 //chr1 115258743 . A . 100.00 PASS DP=35354 GT:GQ:AD:VF:NL: 0/0:100:30256:0.1442:20:-100.0000:-100.0000:100 //chr1 115258744 . C T 100.00 PB DP=85155 GT:GQ:AD:VF:NL: 0/1:100:74182,5536:0.0650:20:- //(issue#1) at 743 we had a A->. in only one pool. It should be marked as BIAS and not PASS. //(issue#2) at 744 we had a C->T at 6% when it should be at ~0%, and called as a ref. var FunnyResult0 = CombinedVariants[3]; Assert.Equal(FunnyResult0.Frequency, 0.8558, 4); Assert.Equal(FunnyResult0.Filters.Count, 1); Assert.Equal(FunnyResult0.Filters[0], FilterType.PoolBias); Assert.Equal(FunnyResult0.ReferenceAllele, "A"); Assert.Equal(FunnyResult0.AlternateAllele, "."); //this used to be a reference as a pass, even though it was only called in one pool. var FunnyResult = CombinedVariants[6]; Assert.Equal(FunnyResult.ReferencePosition, 115258744); Assert.Equal(FunnyResult.Frequency, 0.8711, 4); Assert.Equal(FunnyResult.Filters.Count, 0); Assert.Equal(FunnyResult.ReferenceAllele, "C"); Assert.Equal(FunnyResult.AlternateAllele, "."); //when we had the bug, this used to get called at 6%. //now, check the Venn functionality: Assert.Equal(2, AandBVariants.Count()); Assert.Equal(2, BandAVariants.Count()); Assert.Equal(2, AnotBVariants.Count()); Assert.Equal(0, BnotAVariants.Count()); Assert.Equal(115258743, AandBVariants[0].ReferencePosition); Assert.Equal("AC", AandBVariants[0].ReferenceAllele); Assert.Equal("TT", AandBVariants[0].AlternateAllele); Assert.Equal(115258747, AandBVariants[1].ReferencePosition); Assert.Equal("C", AandBVariants[1].ReferenceAllele); Assert.Equal("T", AandBVariants[1].AlternateAllele); Assert.Equal(115258743, BandAVariants[0].ReferencePosition); Assert.Equal("AC", BandAVariants[0].ReferenceAllele); Assert.Equal("TT", BandAVariants[0].AlternateAllele); Assert.Equal(115258747, BandAVariants[1].ReferencePosition); Assert.Equal("C", BandAVariants[1].ReferenceAllele); Assert.Equal("T", BandAVariants[1].AlternateAllele); Assert.Equal(115258743, AnotBVariants[0].ReferencePosition); Assert.Equal("A", AnotBVariants[0].ReferenceAllele); Assert.Equal("T", AnotBVariants[0].AlternateAllele); Assert.Equal(115258744, AnotBVariants[1].ReferencePosition); Assert.Equal("C", AnotBVariants[1].ReferenceAllele); Assert.Equal("T", AnotBVariants[1].AlternateAllele); }
public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests() { //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. //Rule "F" test (ie various alt calls all ended up as no-call. we dont want multiple no call lines in the vcf.) //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf"); string OutputPath = Path.Combine(outDir, "outEandF.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor VennVcf = new VennProcessor( new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); VennVcf.DoPairwiseVenn(false); Assert.Equal(File.Exists(OutputPath), true); List <VcfVariant> PoolAVariants = VcfReader.GetAllVariantsInFile(VcfPath_PoolA); List <VcfVariant> PoolBVariants = VcfReader.GetAllVariantsInFile(VcfPath_PoolB); List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath); //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. VcfVariant VariantA_1 = PoolAVariants[0]; Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/0"); Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0021"); Assert.Equal(VariantA_1.Quality, 100); Assert.Equal(VariantA_1.Filters, "PASS"); Assert.Equal(VariantA_1.ReferencePosition, 25378561); VcfVariant VariantA_2 = PoolAVariants[1]; Assert.Equal(VariantA_2.ReferencePosition, 25378562); VcfVariant VariantB_1 = PoolBVariants[0]; Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0173"); Assert.Equal(VariantB_1.Quality, 100); Assert.Equal(VariantB_1.Filters, "PASS"); Assert.Equal(VariantB_1.ReferencePosition, 25378561); VcfVariant VariantB_2 = PoolBVariants[1]; Assert.Equal(VariantB_2.Genotypes[0]["GT"], "0/0"); Assert.Equal(VariantB_2.Genotypes[0]["VF"], "0.0021"); Assert.Equal(VariantB_2.Quality, 100); Assert.Equal(VariantB_2.Filters, "PASS"); Assert.Equal(VariantB_2.ReferencePosition, 25378561); VcfVariant Consensus_1 = CombinedVariants[0]; Assert.Equal(Consensus_1.Genotypes[0]["GT"], "0/0"); Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.009"); //slightly improved from .008 Assert.Equal(Consensus_1.Quality, 100); Assert.Equal(Consensus_1.Filters, "PASS"); //<-low VF tag will NOT added by post-processing b/c is ref call Assert.Equal(Consensus_1.ReferencePosition, 25378561); VcfVariant Consensus_2 = CombinedVariants[1]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); //Rule "F" test (ie various alt calls all ended up as no-call. //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate VariantA_1 = PoolAVariants[1]; Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0725"); Assert.Equal(VariantA_1.Quality, 100); Assert.Equal(VariantA_1.Filters, "PASS"); Assert.Equal(VariantA_1.ReferencePosition, 25378562); VariantA_2 = PoolAVariants[2]; Assert.Equal(VariantA_2.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantA_2.Genotypes[0]["VF"], "0.0725"); Assert.Equal(VariantA_2.Quality, 100); Assert.Equal(VariantA_2.Filters, "PASS"); Assert.Equal(VariantA_2.ReferencePosition, 25378562); VcfVariant VariantA_3 = PoolAVariants[3]; Assert.Equal(VariantA_3.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantA_3.Genotypes[0]["VF"], "0.0725"); Assert.Equal(VariantA_3.Quality, 100); Assert.Equal(VariantA_3.Filters, "PASS"); Assert.Equal(VariantA_3.ReferencePosition, 25378562); VariantB_1 = PoolBVariants[2]; Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/0"); Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0024"); Assert.Equal(VariantB_1.Quality, 100); Assert.Equal(VariantB_1.Filters, "PASS"); Assert.Equal(VariantB_1.ReferencePosition, 25378562); VariantB_2 = PoolBVariants[3]; Assert.Equal(VariantB_2.ReferencePosition, 25378563); Consensus_1 = CombinedVariants[1]; Assert.Equal(Consensus_1.ReferencePosition, 25378562); Assert.Equal(Consensus_1.Genotypes[0]["GT"], "./."); Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.007"); Assert.Equal(Consensus_1.Quality, 0); Assert.Equal(Consensus_1.Filters, "PB"); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.ReferenceAllele, "C"); Assert.Equal(Consensus_1.VariantAlleles[0], "T"); Consensus_2 = CombinedVariants[2]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); Assert.Equal(Consensus_2.Genotypes[0]["GT"], "./."); Assert.Equal(Consensus_2.Genotypes[0]["VF"], "0.007"); Assert.Equal(Consensus_2.Quality, 0); Assert.Equal(Consensus_2.Filters, "PB"); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_2.ReferenceAllele, "C"); Assert.Equal(Consensus_2.VariantAlleles[0], "TT"); VcfVariant Consensus_3 = CombinedVariants[3]; Assert.Equal(Consensus_3.ReferencePosition, 25378562); Assert.Equal(Consensus_3.Genotypes[0]["GT"], "./."); Assert.Equal(Consensus_3.Genotypes[0]["VF"], "0.007"); Assert.Equal(Consensus_3.Quality, 0); Assert.Equal(Consensus_3.Filters, "PB"); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_3.ReferenceAllele, "CC"); Assert.Equal(Consensus_3.VariantAlleles[0], "T"); VcfVariant Consensus_4 = CombinedVariants[4]; Assert.Equal(Consensus_4.ReferencePosition, 25378563); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } }