Esempio n. 1
0
        public void VennVcf_FxnlTest_HG19()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA         = Path.Combine(VcfPathRoot, "control_S15.vcf");
            string VcfB         = Path.Combine(VcfPathRoot, "control_S18.vcf");
            string OutputPath   = Path.Combine(outDir, "Consensus.vcf");
            string ExpectedPath = Path.Combine(VcfPathRoot, "ExpectedConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName    = Path.Combine(outDir, "Consensus.vcf");
            parameters.OutputDirectory      = outDir;
            parameters.DebugMode            = true;
            parameters.CommandLineArguments = new string[] { "testcase commandline" };

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            TestHelper.CompareFiles(OutputPath, ExpectedPath);
        }
Esempio n. 2
0
        public void VennVcf_EmptyInputTest()
        {
            var outDir      = TestPaths.LocalTestDataDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA       = Path.Combine(VcfPathRoot, "Empty_S1.vcf");
            string VcfB       = Path.Combine(VcfPathRoot, "Empty_S2.vcf");
            string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf");
            parameters.OutputDirectory   = outDir;
            parameters.DebugMode         = true;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.True(File.Exists(OutputPath));
            var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(0, observedVariants.Count);
        }
Esempio n. 3
0
        public void VennVcf_GtTest()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA         = Path.Combine(VcfPathRoot, "gtTests_S15.vcf");
            string VcfB         = Path.Combine(VcfPathRoot, "gtTests_S18.vcf");
            string OutputPath   = Path.Combine(outDir, "gtConsensusOut.vcf");
            string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.True(File.Exists(OutputPath));
            var expectedVariants = AlleleReader.GetAllVariantsInFile(ExpectedPath);
            var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(expectedVariants.Count, observedVariants.Count);

            for (int i = 0; i < expectedVariants.Count; i++)
            {
                var ExpectedVariant = expectedVariants[i];
                var OutputVariant   = observedVariants[i];
                Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString());
            }
        }
Esempio n. 4
0
        public void VennVcf_CombineTwoPoolVariants_MergeRefCalls()
        {
            //this is  from an issue where there were multiple co-located variants in one pool,
            //and just ref in the other, at chr15	92604460.  The consensus answer should be
            // a single ref call (and not multiple ref calls!).
            var outDir      = TestPaths.LocalScratchDirectory;
            var vcfPathRoot = _TestDataPath;

            string VcfPath_PoolA     = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf");
            string VcfPath_PoolB     = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf");
            string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf");

            string OutputPath = Path.Combine(outDir, "Consensus2.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.InputFiles        = new string[] { VcfPath_PoolA, VcfPath_PoolB };
            parameters.OutputDirectory   = outDir;
            parameters.ConsensusFileName = OutputPath;
            VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters);

            venn.DoPairwiseVenn();

            Assert.Equal(File.Exists(OutputPath), true);
            var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);
            var ExpectedVariants = AlleleReader.GetAllVariantsInFile(VcfPath_Consensus);

            Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count);

            int NumVariantsAtPos92604460 = 0;

            for (int i = 0; i < ExpectedVariants.Count; i++)
            {
                var EVariant = ExpectedVariants[i];
                var Variant  = CombinedVariants[i];

                if ((Variant.ReferencePosition == 92604460) &&
                    (Variant.Chromosome == "chr15"))
                {
                    NumVariantsAtPos92604460++;
                }

                Assert.Equal(EVariant.ToString(), Variant.ToString());
            }

            Assert.Equal(NumVariantsAtPos92604460, 1);
        }
        public void VennVcf_FxnlTest()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA         = Path.Combine(VcfPathRoot, "control_S15.vcf");
            string VcfB         = Path.Combine(VcfPathRoot, "control_S18.vcf");
            string OutputPath   = Path.Combine(outDir, "Consensus.vcf");
            string ExpectedPath = Path.Combine(VcfPathRoot, "ExpectedConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf");
            parameters.OutputDirectory   = outDir;
            parameters.DebugMode         = true;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn(false);

            Assert.True(File.Exists(OutputPath));

            using (VcfReader ReaderE = new VcfReader(ExpectedPath))
            {
                using (VcfReader ReaderO = new VcfReader(OutputPath))
                {
                    VcfVariant ExpectedVariant = new VcfVariant();
                    VcfVariant OutputVariant   = new VcfVariant();

                    while (true)
                    {
                        bool ExpectedVariantExists = ReaderE.GetNextVariant(ExpectedVariant);
                        bool OutputVariantExists   = ReaderO.GetNextVariant(OutputVariant);

                        Assert.Equal(ExpectedVariantExists, OutputVariantExists);

                        if (!ExpectedVariantExists || !OutputVariantExists)
                        {
                            break;
                        }

                        Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString());
                    }
                }
            }
        }
Esempio n. 6
0
        public void VennVcf_FxnlTest_GRCH37()
        {
            var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "GRCH37_fxnl");

            TestHelper.RecreateDirectory(outDir);

            string VcfA = Path.Combine(TestPaths.LocalTestDataDirectory, "GRCH37_S25.bam.genome.vcf");
            string VcfB = Path.Combine(TestPaths.LocalTestDataDirectory, "GRCH37_S30.bam.genome.vcf");

            List <string> OutputVcfs = new List <string>()
            {
                Path.Combine(outDir, "Consensus.vcf"),
                Path.Combine(outDir, "GRCH37_S25.bam_and_GRCH37_S30.bam.vcf"),
                Path.Combine(outDir, "GRCH37_S25.bam_not_GRCH37_S30.bam.vcf"),
                Path.Combine(outDir, "GRCH37_S30.bam_and_GRCH37_S25.bam.vcf"),
                Path.Combine(outDir, "GRCH37_S30.bam_not_GRCH37_S25.bam.vcf")
            };

            List <string> ExpectedVcfs = new List <string>()
            {
                Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_Consensus.vcf"),
                Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S25.bam_and_GRCH37_S30.bam.vcf"),
                Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S25.bam_not_GRCH37_S30.bam.vcf"),
                Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S30.bam_and_GRCH37_S25.bam.vcf"),
                Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_GRCH37_S30.bam_not_GRCH37_S25.bam.vcf")
            };


            VennVcfOptions parameters = new VennVcfOptions();

            parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf");
            parameters.OutputDirectory   = outDir;
            parameters.DebugMode         = true;
            parameters.VariantCallingParams.AmpliconBiasFilterThreshold = null;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            for (int i = 0; i < 5; i++)
            {
                TestHelper.CompareFiles(OutputVcfs[i], ExpectedVcfs[i]);
            }
        }
Esempio n. 7
0
        public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests()
        {
            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            //Rule "F" test    (ie various alt calls all ended up as no-call.  we dont want multiple no call lines in the vcf.)
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf");

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;
            VennProcessor VennVcf = new VennProcessor(
                new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            VennVcf.DoPairwiseVenn();

            Assert.Equal(File.Exists(OutputPath), true);

            var PoolAVariants    = AlleleReader.GetAllVariantsInFile(VcfPath_PoolA);
            var PoolBVariants    = AlleleReader.GetAllVariantsInFile(VcfPath_PoolB);
            var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            var VariantA_1 = PoolAVariants[0];

            Assert.Equal(VariantA_1.Genotype, Genotype.HomozygousRef);
            Assert.Equal(VariantA_1.Frequency, 1.0 - 0.0021, 4); //note Vf here is the ref freq
            Assert.Equal(VariantA_1.VariantQscore, 100);
            Assert.Equal(VariantA_1.Filters.Count, 0);
            Assert.Equal(VariantA_1.ReferencePosition, 25378561);

            var VariantA_2 = PoolAVariants[1];

            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            var VariantB_1 = PoolBVariants[0];

            Assert.Equal(VariantB_1.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantB_1.Frequency, 0.0173, 4);
            Assert.Equal(VariantB_1.VariantQscore, 100);
            Assert.Equal(VariantB_1.Filters.Count, 0);
            Assert.Equal(VariantB_1.ReferencePosition, 25378561);

            var VariantB_2 = PoolBVariants[1];

            Assert.Equal(VariantB_2.Genotype, Genotype.HomozygousRef);
            Assert.Equal(VariantB_2.Frequency, 0.9827, 4);  //note Vf here is the ref freq
            Assert.Equal(VariantB_2.VariantQscore, 100);
            Assert.Equal(VariantB_2.Filters.Count, 0);
            Assert.Equal(VariantB_2.ReferencePosition, 25378561);

            var Consensus_1 = CombinedVariants[0];

            Assert.Equal(Consensus_1.Genotype, Genotype.HomozygousRef);
            Assert.Equal(Consensus_1.Frequency, 0.9907, 4); //slightly improved from .008.  //note Vf here is the ref freq
            Assert.Equal(Consensus_1.VariantQscore, 100);
            Assert.Equal(Consensus_1.Filters.Count, 0);     //<-low VF tag will NOT added by post-processing b/c is ref call
            Assert.Equal(Consensus_1.ReferencePosition, 25378561);

            var Consensus_2 = CombinedVariants[1];

            Assert.Equal(Consensus_2.ReferencePosition, 25378562);

            //Rule "F" test    (ie various alt calls all ended up as no-call.
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate


            VariantA_1 = PoolAVariants[1];
            Assert.Equal(VariantA_1.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantA_1.Frequency, 0.0776, 4);
            Assert.Equal(VariantA_1.VariantQscore, 100);
            Assert.Equal(VariantA_1.Filters.Count, 0);
            Assert.Equal(VariantA_1.ReferencePosition, 25378562);

            VariantA_2 = PoolAVariants[2];
            Assert.Equal(VariantA_2.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantA_2.Frequency, 0.0776, 4);
            Assert.Equal(VariantA_2.VariantQscore, 100);
            Assert.Equal(VariantA_2.Filters.Count, 0);
            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            var VariantA_3 = PoolAVariants[3];

            Assert.Equal(VariantA_3.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantA_3.Frequency, 0.0776, 4);
            Assert.Equal(VariantA_3.VariantQscore, 100);
            Assert.Equal(VariantA_3.Filters.Count, 0);
            Assert.Equal(VariantA_3.ReferencePosition, 25378562);

            VariantB_1 = PoolBVariants[2];
            Assert.Equal(VariantB_1.Genotype, Genotype.HomozygousRef);
            Assert.Equal(VariantB_1.Frequency, 0.9989, 4);
            Assert.Equal(VariantB_1.VariantQscore, 100);
            Assert.Equal(VariantB_1.Filters.Count, 0);
            Assert.Equal(VariantB_1.ReferencePosition, 25378562);

            VariantB_2 = PoolBVariants[3];
            Assert.Equal(VariantB_2.ReferencePosition, 25378563);

            Consensus_1 = CombinedVariants[1];
            Assert.Equal(Consensus_1.ReferencePosition, 25378562);
            Assert.Equal(Consensus_1.Genotype, Genotype.AltLikeNoCall);
            Assert.Equal(Consensus_1.Frequency, 0.0069, 4);
            Assert.Equal(Consensus_1.VariantQscore, 0);
            Assert.Equal(Consensus_1.Filters.Count, 1);                //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.ReferenceAllele, "C");
            Assert.Equal(Consensus_1.AlternateAllele, "T");

            Consensus_2 = CombinedVariants[2];
            Assert.Equal(Consensus_2.ReferencePosition, 25378562);
            Assert.Equal(Consensus_2.Genotype, Genotype.AltLikeNoCall);
            Assert.Equal(Consensus_2.Frequency, 0.0069, 4);
            Assert.Equal(Consensus_2.VariantQscore, 0);
            Assert.Equal(Consensus_1.Filters.Count, 1);                //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_2.ReferenceAllele, "C");
            Assert.Equal(Consensus_2.AlternateAllele, "TT");

            var Consensus_3 = CombinedVariants[3];

            Assert.Equal(Consensus_3.ReferencePosition, 25378562);
            Assert.Equal(Consensus_3.Genotype, Genotype.AltLikeNoCall);
            Assert.Equal(Consensus_3.Frequency, 0.0069, 4);
            Assert.Equal(Consensus_3.VariantQscore, 0);
            Assert.Equal(Consensus_1.Filters.Count, 1);                //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_3.ReferenceAllele, "CC");
            Assert.Equal(Consensus_3.AlternateAllele, "T");

            var Consensus_4 = CombinedVariants[4];

            Assert.Equal(Consensus_4.ReferencePosition, 25378563);

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }
        }
Esempio n. 8
0
        public void VennVcf_CombineTwoPoolVariants_ProbePoolBias_Tests()
        {
            //this is  from an issue anita had where a variant was in one pool at 1%, the other at 0%, and showed up as 6% in the combined pool.

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "small_S14.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "small_S17.genome.vcf");



            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf");
            parameters.OutputDirectory   = outDir;
            if (File.Exists(parameters.ConsensusFileName))
            {
                File.Delete(parameters.ConsensusFileName);
            }


            VennProcessor Venn = new VennProcessor(new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.Equal(File.Exists(parameters.ConsensusFileName), true);

            var CombinedVariants = AlleleReader.GetAllVariantsInFile(parameters.ConsensusFileName);
            var AandBVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_and_S17.vcf"));
            var BandAVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_and_S14.vcf"));
            var AnotBVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_not_S17.vcf"));
            var BnotAVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_not_S14.vcf"));

            //poolA
            //chr1	     115258743	.	A	.	100	PASS	DP=35354	GT:GQ:AD:VF:NL:SB	0/0:100:30256:0.1442:20:-100.0000
            //chr1       115258743           .               AC          TT           100         PASS      DP=35354            GT:GQ:AD:VF:NL:SB                0/1:100:30720,4634:0.1311:20:-100.0000
            //chr1       115258744           .               C             .               100         PASS      DP=35253            GT:GQ:AD:VF:NL:SB                0/0:100:30277:0.1412:20:-100.0000
            //chr1       115258745           .               C             .               100         PASS      DP=35160            GT:GQ:AD:VF:NL:SB                0/0:100:35130:0.0009:20:-100.0000


            //poolB
            //chr1       115258743           .               AC          TT           100         PASS      DP=49612            GT:GQ:AD:VF:NL:SB                0/1:100:44202,5410:0.1090:20:-100.0000
            //chr1       115258743           .               A             T              100         PASS      DP=49612            GT:GQ:AD:VF:NL:SB                0/1:100:43362,670:0.0135:20:-46.0807
            //chr1       115258744           .               C             T              24           PASS      DP=49902            GT:GQ:AD:VF:NL:SB                0/1:24:43905,560:0.0112:20:-8.3857


            //when we had bug:
            //chr1       115258743           .               AC          TT           100.00   PASS      DP=84966            GT:GQ:AD:VF:NL:                0/1:100:74922,10044:0.1182:20:-100:-100.0000:100
            //chr1       115258743           .               A             T              100.00   PB;LowVF            DP=49612            GT:GQ:AD:VF:NL:                ./.:100:43362,670:0.0135:20:-46.0807:0.0000:100
            //chr1       115258743           .               A             .               100.00   PASS      DP=35354            GT:GQ:AD:VF:NL:                0/0:100:30256:0.1442:20:-100.0000:-100.0000:100
            //chr1       115258744           .               C             T              100.00   PB           DP=85155            GT:GQ:AD:VF:NL:                0/1:100:74182,5536:0.0650:20:-
            //(issue#1) at 743 we had a A->. in only one pool. It should be marked as BIAS and not PASS.
            //(issue#2) at 744 we had a C->T at 6% when it should be at ~0%, and called as a ref.

            var FunnyResult0 = CombinedVariants[3];

            Assert.Equal(FunnyResult0.Frequency, 0.8558, 4);
            Assert.Equal(FunnyResult0.Filters.Count, 1);
            Assert.Equal(FunnyResult0.Filters[0], FilterType.PoolBias);
            Assert.Equal(FunnyResult0.ReferenceAllele, "A");
            Assert.Equal(FunnyResult0.AlternateAllele, ".");
            //this used to be a reference as a pass, even though it was only called in one pool.

            var FunnyResult = CombinedVariants[6];

            Assert.Equal(FunnyResult.ReferencePosition, 115258744);
            Assert.Equal(FunnyResult.Frequency, 0.8711, 4);
            Assert.Equal(FunnyResult.Filters.Count, 0);
            Assert.Equal(FunnyResult.ReferenceAllele, "C");
            Assert.Equal(FunnyResult.AlternateAllele, ".");
            //when we had the bug, this used to get called at 6%.


            //now, check the Venn functionality:
            Assert.Equal(2, AandBVariants.Count());
            Assert.Equal(2, BandAVariants.Count());
            Assert.Equal(2, AnotBVariants.Count());
            Assert.Equal(0, BnotAVariants.Count());

            Assert.Equal(115258743, AandBVariants[0].ReferencePosition);
            Assert.Equal("AC", AandBVariants[0].ReferenceAllele);
            Assert.Equal("TT", AandBVariants[0].AlternateAllele);

            Assert.Equal(115258747, AandBVariants[1].ReferencePosition);
            Assert.Equal("C", AandBVariants[1].ReferenceAllele);
            Assert.Equal("T", AandBVariants[1].AlternateAllele);

            Assert.Equal(115258743, BandAVariants[0].ReferencePosition);
            Assert.Equal("AC", BandAVariants[0].ReferenceAllele);
            Assert.Equal("TT", BandAVariants[0].AlternateAllele);

            Assert.Equal(115258747, BandAVariants[1].ReferencePosition);
            Assert.Equal("C", BandAVariants[1].ReferenceAllele);
            Assert.Equal("T", BandAVariants[1].AlternateAllele);

            Assert.Equal(115258743, AnotBVariants[0].ReferencePosition);
            Assert.Equal("A", AnotBVariants[0].ReferenceAllele);
            Assert.Equal("T", AnotBVariants[0].AlternateAllele);

            Assert.Equal(115258744, AnotBVariants[1].ReferencePosition);
            Assert.Equal("C", AnotBVariants[1].ReferenceAllele);
            Assert.Equal("T", AnotBVariants[1].AlternateAllele);
        }
        public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests()
        {
            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            //Rule "F" test    (ie various alt calls all ended up as no-call.  we dont want multiple no call lines in the vcf.)
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf");

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;
            VennProcessor VennVcf = new VennProcessor(
                new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            VennVcf.DoPairwiseVenn(false);

            Assert.Equal(File.Exists(OutputPath), true);

            List <VcfVariant> PoolAVariants    = VcfReader.GetAllVariantsInFile(VcfPath_PoolA);
            List <VcfVariant> PoolBVariants    = VcfReader.GetAllVariantsInFile(VcfPath_PoolB);
            List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath);

            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            VcfVariant VariantA_1 = PoolAVariants[0];

            Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/0");
            Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0021");
            Assert.Equal(VariantA_1.Quality, 100);
            Assert.Equal(VariantA_1.Filters, "PASS");
            Assert.Equal(VariantA_1.ReferencePosition, 25378561);

            VcfVariant VariantA_2 = PoolAVariants[1];

            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            VcfVariant VariantB_1 = PoolBVariants[0];

            Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0173");
            Assert.Equal(VariantB_1.Quality, 100);
            Assert.Equal(VariantB_1.Filters, "PASS");
            Assert.Equal(VariantB_1.ReferencePosition, 25378561);

            VcfVariant VariantB_2 = PoolBVariants[1];

            Assert.Equal(VariantB_2.Genotypes[0]["GT"], "0/0");
            Assert.Equal(VariantB_2.Genotypes[0]["VF"], "0.0021");
            Assert.Equal(VariantB_2.Quality, 100);
            Assert.Equal(VariantB_2.Filters, "PASS");
            Assert.Equal(VariantB_2.ReferencePosition, 25378561);

            VcfVariant Consensus_1 = CombinedVariants[0];

            Assert.Equal(Consensus_1.Genotypes[0]["GT"], "0/0");
            Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.009"); //slightly improved from .008
            Assert.Equal(Consensus_1.Quality, 100);
            Assert.Equal(Consensus_1.Filters, "PASS");             //<-low VF tag will NOT added by post-processing b/c is ref call
            Assert.Equal(Consensus_1.ReferencePosition, 25378561);

            VcfVariant Consensus_2 = CombinedVariants[1];

            Assert.Equal(Consensus_2.ReferencePosition, 25378562);

            //Rule "F" test    (ie various alt calls all ended up as no-call.
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate


            VariantA_1 = PoolAVariants[1];
            Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0725");
            Assert.Equal(VariantA_1.Quality, 100);
            Assert.Equal(VariantA_1.Filters, "PASS");
            Assert.Equal(VariantA_1.ReferencePosition, 25378562);

            VariantA_2 = PoolAVariants[2];
            Assert.Equal(VariantA_2.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantA_2.Genotypes[0]["VF"], "0.0725");
            Assert.Equal(VariantA_2.Quality, 100);
            Assert.Equal(VariantA_2.Filters, "PASS");
            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            VcfVariant VariantA_3 = PoolAVariants[3];

            Assert.Equal(VariantA_3.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantA_3.Genotypes[0]["VF"], "0.0725");
            Assert.Equal(VariantA_3.Quality, 100);
            Assert.Equal(VariantA_3.Filters, "PASS");
            Assert.Equal(VariantA_3.ReferencePosition, 25378562);

            VariantB_1 = PoolBVariants[2];
            Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/0");
            Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0024");
            Assert.Equal(VariantB_1.Quality, 100);
            Assert.Equal(VariantB_1.Filters, "PASS");
            Assert.Equal(VariantB_1.ReferencePosition, 25378562);

            VariantB_2 = PoolBVariants[3];
            Assert.Equal(VariantB_2.ReferencePosition, 25378563);

            Consensus_1 = CombinedVariants[1];
            Assert.Equal(Consensus_1.ReferencePosition, 25378562);
            Assert.Equal(Consensus_1.Genotypes[0]["GT"], "./.");
            Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.007");
            Assert.Equal(Consensus_1.Quality, 0);
            Assert.Equal(Consensus_1.Filters, "PB"); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.ReferenceAllele, "C");
            Assert.Equal(Consensus_1.VariantAlleles[0], "T");

            Consensus_2 = CombinedVariants[2];
            Assert.Equal(Consensus_2.ReferencePosition, 25378562);
            Assert.Equal(Consensus_2.Genotypes[0]["GT"], "./.");
            Assert.Equal(Consensus_2.Genotypes[0]["VF"], "0.007");
            Assert.Equal(Consensus_2.Quality, 0);
            Assert.Equal(Consensus_2.Filters, "PB"); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_2.ReferenceAllele, "C");
            Assert.Equal(Consensus_2.VariantAlleles[0], "TT");

            VcfVariant Consensus_3 = CombinedVariants[3];

            Assert.Equal(Consensus_3.ReferencePosition, 25378562);
            Assert.Equal(Consensus_3.Genotypes[0]["GT"], "./.");
            Assert.Equal(Consensus_3.Genotypes[0]["VF"], "0.007");
            Assert.Equal(Consensus_3.Quality, 0);
            Assert.Equal(Consensus_3.Filters, "PB"); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_3.ReferenceAllele, "CC");
            Assert.Equal(Consensus_3.VariantAlleles[0], "T");

            VcfVariant Consensus_4 = CombinedVariants[4];

            Assert.Equal(Consensus_4.ReferencePosition, 25378563);

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }
        }