コード例 #1
0
ファイル: VennProcessorTests.cs プロジェクト: tamsen/Pisces
        public void VennVcf_EmptyInputTest()
        {
            var outDir      = TestPaths.LocalTestDataDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA       = Path.Combine(VcfPathRoot, "Empty_S1.vcf");
            string VcfB       = Path.Combine(VcfPathRoot, "Empty_S2.vcf");
            string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf");
            parameters.OutputDirectory   = outDir;
            parameters.DebugMode         = true;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.True(File.Exists(OutputPath));
            var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(0, observedVariants.Count);
        }
コード例 #2
0
ファイル: VennProcessorTests.cs プロジェクト: tamsen/Pisces
        public void VennVcf_GtTest()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA         = Path.Combine(VcfPathRoot, "gtTests_S15.vcf");
            string VcfB         = Path.Combine(VcfPathRoot, "gtTests_S18.vcf");
            string OutputPath   = Path.Combine(outDir, "gtConsensusOut.vcf");
            string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.True(File.Exists(OutputPath));
            var expectedVariants = AlleleReader.GetAllVariantsInFile(ExpectedPath);
            var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(expectedVariants.Count, observedVariants.Count);

            for (int i = 0; i < expectedVariants.Count; i++)
            {
                var ExpectedVariant = expectedVariants[i];
                var OutputVariant   = observedVariants[i];
                Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString());
            }
        }
        private void CompareVariants(string expectedResultsFilePath, string actualResultsFilePath)
        {
            List <CalledAllele> results  = AlleleReader.GetAllVariantsInFile(actualResultsFilePath);
            List <CalledAllele> expected = AlleleReader.GetAllVariantsInFile(expectedResultsFilePath);

            Assert.Equal(results.Count, expected.Count);

            for (int i = 0; i < results.Count; i++)
            {
                Assert.Equal(expected[i].ToString(), results[i].ToString());
            }
        }
コード例 #4
0
ファイル: VennProcessorTests.cs プロジェクト: tamsen/Pisces
        public void VennVcf_CombineTwoPoolVariants_MergeRefCalls()
        {
            //this is  from an issue where there were multiple co-located variants in one pool,
            //and just ref in the other, at chr15	92604460.  The consensus answer should be
            // a single ref call (and not multiple ref calls!).
            var outDir      = TestPaths.LocalScratchDirectory;
            var vcfPathRoot = _TestDataPath;

            string VcfPath_PoolA     = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf");
            string VcfPath_PoolB     = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf");
            string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf");

            string OutputPath = Path.Combine(outDir, "Consensus2.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.InputFiles        = new string[] { VcfPath_PoolA, VcfPath_PoolB };
            parameters.OutputDirectory   = outDir;
            parameters.ConsensusFileName = OutputPath;
            VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters);

            venn.DoPairwiseVenn();

            Assert.Equal(File.Exists(OutputPath), true);
            var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);
            var ExpectedVariants = AlleleReader.GetAllVariantsInFile(VcfPath_Consensus);

            Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count);

            int NumVariantsAtPos92604460 = 0;

            for (int i = 0; i < ExpectedVariants.Count; i++)
            {
                var EVariant = ExpectedVariants[i];
                var Variant  = CombinedVariants[i];

                if ((Variant.ReferencePosition == 92604460) &&
                    (Variant.Chromosome == "chr15"))
                {
                    NumVariantsAtPos92604460++;
                }

                Assert.Equal(EVariant.ToString(), Variant.ToString());
            }

            Assert.Equal(NumVariantsAtPos92604460, 1);
        }
コード例 #5
0
        public void UnpackAlleles()
        {
            //two example vcf files that have been "crushed".
            var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf");
            var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf");

            var unpackedVariants1 = AlleleReader.GetAllVariantsInFile(crushedVcf1);
            var unpackedVariants2 = AlleleReader.GetAllVariantsInFile(crushedVcf2);

            Assert.Equal(8, unpackedVariants1.Count);  //7 lines, but 8 alleles
            Assert.Equal(91, unpackedVariants2.Count); //90 lines, but 91 alleles

            var hetAlt1     = unpackedVariants1[5];
            var hetAlt2     = unpackedVariants2[3];
            var hetAlt1next = unpackedVariants1[6];
            var hetAlt2next = unpackedVariants2[4];

            //example one:
            //total depth = 5394, total variant count = 2387 + 2000 = 4387
            //so, ref counts ~1007.

            //example two:
            //total depth = 532, total variant count = 254 + 254 = 508
            //so, ref counts ~24.

            Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt1.Genotype);
            Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt2.Genotype);

            Assert.Equal(1007, hetAlt1.ReferenceSupport);
            Assert.Equal(2387, hetAlt1.AlleleSupport);
            Assert.Equal(0.4425, hetAlt1.Frequency, 4);

            Assert.Equal(24, hetAlt2.ReferenceSupport);
            Assert.Equal(254, hetAlt2.AlleleSupport);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.AlternateAllele);

            Assert.Equal(223906731, hetAlt2.ReferencePosition);

            Assert.Equal(1007, hetAlt1next.ReferenceSupport);
            Assert.Equal(2000, hetAlt1next.AlleleSupport);
            Assert.Equal("G", hetAlt1next.AlternateAllele);
            Assert.Equal(0.3708, hetAlt1next.Frequency, 4);

            Assert.Equal(24, hetAlt2next.ReferenceSupport);
            Assert.Equal(254, hetAlt2next.AlleleSupport);

            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731, hetAlt2next.ReferencePosition);
        }
        public void Execute(
            string bamFilePath,
            string vcfFilePath,
            string intervalPath,
            List <CalledAllele> expectedVariants,
            List <ChrReference> fakeReferences = null,
            bool doCheckVariants            = true,
            bool doCheckReferences          = false,
            int expectedNumCoveredPositions = 0,
            bool threadByChr = false,
            int doCountsOnly = 0,
            bool doLog       = false,
            bool callMnvs    = true,
            PiscesApplicationOptions applicationOptions = null,
            bool collapse = true)
        {
            if (doCheckReferences)
            {
                vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf");
            }

            if (applicationOptions == null)
            {
                applicationOptions = new PiscesApplicationOptions
                {
                    BAMPaths            = new[] { bamFilePath },
                    IntervalPaths       = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath },
                    GenomePaths         = new[] { GenomeDirectory },
                    OutputBiasFiles     = true,
                    DebugMode           = doLog,
                    CallMNVs            = callMnvs,
                    MaxGapBetweenMNV    = 10,
                    MaxSizeMNV          = 15,
                    Collapse            = collapse,
                    BamFilterParameters = new BamFilterParameters()
                    {
                        MinimumBaseCallQuality = 20
                    },
                    VariantCallingParameters = new VariantCallingParameters(),
                    VcfWritingParameters     = new VcfWritingParameters()
                    {
                        OutputGvcfFile = doCheckReferences,
                    },
                    CommandLineArguments = new string[] { "some", "cmds" }
                };
            }

            applicationOptions.OutputDirectory = OutputDirectory;

            var factory = GetFactory(applicationOptions);

            IGenome genome;

            if (fakeReferences == null)
            {
                genome = factory.GetReferenceGenome(GenomeDirectory);
            }
            else
            {
                genome = new MockGenome(fakeReferences, GenomeDirectory);
            }

            if (threadByChr)
            {
                var processor = new GenomeProcessor(factory, genome, false);

                processor.Execute(1);
            }
            else
            {
                var processor = new GenomeProcessor(factory, genome);

                processor.Execute(1);
            }

            var alleles      = AlleleReader.GetAllVariantsInFile(vcfFilePath);
            var variantCalls = alleles.Where(a => !a.IsRefType).ToList();

            if (doCheckVariants)
            {
                if (doCountsOnly > 0)
                {
                    Assert.Equal(variantCalls.Count(), doCountsOnly);
                }
                else
                {
                    CheckVariants(variantCalls, expectedVariants);
                }
            }

            if (doCheckReferences)
            {
                var referenceAlleles = alleles.Where(a => a.IsRefType).ToList();

                // make sure no reference calls at variant positions
                Assert.Equal(referenceAlleles.Count(),
                             alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition)));
            }
        }
        public void StitchedCollapsedBamGroundTruth()
        {
            // SNP ground truth from TingTing
            var bamFilePath          = Path.Combine(TestPaths.LocalTestDataDirectory, "collapsed.test.stitched.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr1");

            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                IntervalPaths        = null,
                GenomePaths          = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr1") },
                OutputBiasFiles      = true,
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                MaxSizeMNV           = 100,
                MaxGapBetweenMNV     = 10,
                NoiseModelHalfWindow = 1,
                BamFilterParameters  = new BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20,
                    MinimumMapQuality      = 1,
                    OnlyUseProperPairs     = false,
                },
                VariantCallingParameters = new VariantCallingParameters()
                {
                    MaximumVariantQScore                  = 100,
                    MinimumVariantQScoreFilter            = 30,
                    MinimumVariantQScore                  = 20,
                    MinimumCoverage                       = 10,
                    MinimumFrequency                      = 0.01f,
                    FilterOutVariantsPresentOnlyOneStrand = false,
                    ForcedNoiseLevel                      = -1,
                    NoiseModel                  = NoiseModel.Flat,
                    StrandBiasModel             = StrandBiasModel.Extended,
                    AmpliconBiasFilterThreshold = 0.01F
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                    ReportRcCounts = true,
                    ReportTsCounts = true
                }
            };

            // Time to build the fake sequences for testing.
            var mockChrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    // position 9770498 ~ 9770669
                    Name     = "chr1",
                    Sequence = new string('N', 9770498 - 1) + "GAAGTAACAACGCAGGATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGGTATGGCCTCCATC"
                }
            };

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 9770596,
                    ReferenceAllele   = "C",
                    AlternateAllele   = "A",
                    Chromosome        = "chr1"
                }
            };

            functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions);
            var truthvcfFilePath         = Path.Combine(Path.GetDirectoryName(appOptions.BAMPaths[0]), "test_truth.stitched.genome.vcf");
            var stitchedCollapsedTruth   = AlleleReader.GetAllVariantsInFile(truthvcfFilePath);
            var resultFilePath           = Path.ChangeExtension(bamFilePath, "genome.vcf");
            var stitchedCollapsedResults = AlleleReader.GetAllVariantsInFile(resultFilePath);

            TestUtilities.TestHelper.CompareFiles(truthvcfFilePath, resultFilePath);
        }
        public void Pisces_LowDepthTest()
        {
            List <ChrReference> chrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    Name     = "chr19",
                    Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA"
                }
            };

            var options = new PiscesApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                //IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode           = true,
                CallMNVs            = true,
                UseMNVReallocation  = false,
                MaxSizeMNV          = 100,
                OutputDirectory     = TestPaths.LocalTestDataDirectory,
                BamFilterParameters = new Domain.Options.BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20
                },
                VariantCallingParameters = new Domain.Options.VariantCallingParameters()
                {
                    MinimumVariantQScore = 20,
                    MinimumCoverage      = 1000,
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                }
            };

            var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf");

            var     factory = new Factory(options);
            IGenome genomeRef;

            genomeRef = new MockGenome(chrRef, _genomeChr19);

            var bp = new GenomeProcessor(factory, genomeRef);

            bp.Execute(1);
            var coverage1000results = AlleleReader.GetAllVariantsInFile(vcfFilePath);

            options = new PiscesApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                // IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                OutputDirectory      = TestPaths.LocalTestDataDirectory,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = false,
                }
            };
            factory = new Factory(options);
            bp      = new GenomeProcessor(factory, genomeRef);
            bp.Execute(1);
            var coverage10results = AlleleReader.GetAllVariantsInFile(vcfFilePath);
        }
コード例 #9
0
ファイル: VennProcessorTests.cs プロジェクト: tamsen/Pisces
        public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests()
        {
            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            //Rule "F" test    (ie various alt calls all ended up as no-call.  we dont want multiple no call lines in the vcf.)
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf");

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;
            VennProcessor VennVcf = new VennProcessor(
                new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            VennVcf.DoPairwiseVenn();

            Assert.Equal(File.Exists(OutputPath), true);

            var PoolAVariants    = AlleleReader.GetAllVariantsInFile(VcfPath_PoolA);
            var PoolBVariants    = AlleleReader.GetAllVariantsInFile(VcfPath_PoolB);
            var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            var VariantA_1 = PoolAVariants[0];

            Assert.Equal(VariantA_1.Genotype, Genotype.HomozygousRef);
            Assert.Equal(VariantA_1.Frequency, 1.0 - 0.0021, 4); //note Vf here is the ref freq
            Assert.Equal(VariantA_1.VariantQscore, 100);
            Assert.Equal(VariantA_1.Filters.Count, 0);
            Assert.Equal(VariantA_1.ReferencePosition, 25378561);

            var VariantA_2 = PoolAVariants[1];

            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            var VariantB_1 = PoolBVariants[0];

            Assert.Equal(VariantB_1.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantB_1.Frequency, 0.0173, 4);
            Assert.Equal(VariantB_1.VariantQscore, 100);
            Assert.Equal(VariantB_1.Filters.Count, 0);
            Assert.Equal(VariantB_1.ReferencePosition, 25378561);

            var VariantB_2 = PoolBVariants[1];

            Assert.Equal(VariantB_2.Genotype, Genotype.HomozygousRef);
            Assert.Equal(VariantB_2.Frequency, 0.9827, 4);  //note Vf here is the ref freq
            Assert.Equal(VariantB_2.VariantQscore, 100);
            Assert.Equal(VariantB_2.Filters.Count, 0);
            Assert.Equal(VariantB_2.ReferencePosition, 25378561);

            var Consensus_1 = CombinedVariants[0];

            Assert.Equal(Consensus_1.Genotype, Genotype.HomozygousRef);
            Assert.Equal(Consensus_1.Frequency, 0.9907, 4); //slightly improved from .008.  //note Vf here is the ref freq
            Assert.Equal(Consensus_1.VariantQscore, 100);
            Assert.Equal(Consensus_1.Filters.Count, 0);     //<-low VF tag will NOT added by post-processing b/c is ref call
            Assert.Equal(Consensus_1.ReferencePosition, 25378561);

            var Consensus_2 = CombinedVariants[1];

            Assert.Equal(Consensus_2.ReferencePosition, 25378562);

            //Rule "F" test    (ie various alt calls all ended up as no-call.
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate


            VariantA_1 = PoolAVariants[1];
            Assert.Equal(VariantA_1.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantA_1.Frequency, 0.0776, 4);
            Assert.Equal(VariantA_1.VariantQscore, 100);
            Assert.Equal(VariantA_1.Filters.Count, 0);
            Assert.Equal(VariantA_1.ReferencePosition, 25378562);

            VariantA_2 = PoolAVariants[2];
            Assert.Equal(VariantA_2.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantA_2.Frequency, 0.0776, 4);
            Assert.Equal(VariantA_2.VariantQscore, 100);
            Assert.Equal(VariantA_2.Filters.Count, 0);
            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            var VariantA_3 = PoolAVariants[3];

            Assert.Equal(VariantA_3.Genotype, Genotype.HeterozygousAltRef); //"0/1");
            Assert.Equal(VariantA_3.Frequency, 0.0776, 4);
            Assert.Equal(VariantA_3.VariantQscore, 100);
            Assert.Equal(VariantA_3.Filters.Count, 0);
            Assert.Equal(VariantA_3.ReferencePosition, 25378562);

            VariantB_1 = PoolBVariants[2];
            Assert.Equal(VariantB_1.Genotype, Genotype.HomozygousRef);
            Assert.Equal(VariantB_1.Frequency, 0.9989, 4);
            Assert.Equal(VariantB_1.VariantQscore, 100);
            Assert.Equal(VariantB_1.Filters.Count, 0);
            Assert.Equal(VariantB_1.ReferencePosition, 25378562);

            VariantB_2 = PoolBVariants[3];
            Assert.Equal(VariantB_2.ReferencePosition, 25378563);

            Consensus_1 = CombinedVariants[1];
            Assert.Equal(Consensus_1.ReferencePosition, 25378562);
            Assert.Equal(Consensus_1.Genotype, Genotype.AltLikeNoCall);
            Assert.Equal(Consensus_1.Frequency, 0.0069, 4);
            Assert.Equal(Consensus_1.VariantQscore, 0);
            Assert.Equal(Consensus_1.Filters.Count, 1);                //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.ReferenceAllele, "C");
            Assert.Equal(Consensus_1.AlternateAllele, "T");

            Consensus_2 = CombinedVariants[2];
            Assert.Equal(Consensus_2.ReferencePosition, 25378562);
            Assert.Equal(Consensus_2.Genotype, Genotype.AltLikeNoCall);
            Assert.Equal(Consensus_2.Frequency, 0.0069, 4);
            Assert.Equal(Consensus_2.VariantQscore, 0);
            Assert.Equal(Consensus_1.Filters.Count, 1);                //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_2.ReferenceAllele, "C");
            Assert.Equal(Consensus_2.AlternateAllele, "TT");

            var Consensus_3 = CombinedVariants[3];

            Assert.Equal(Consensus_3.ReferencePosition, 25378562);
            Assert.Equal(Consensus_3.Genotype, Genotype.AltLikeNoCall);
            Assert.Equal(Consensus_3.Frequency, 0.0069, 4);
            Assert.Equal(Consensus_3.VariantQscore, 0);
            Assert.Equal(Consensus_1.Filters.Count, 1);                //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_3.ReferenceAllele, "CC");
            Assert.Equal(Consensus_3.AlternateAllele, "T");

            var Consensus_4 = CombinedVariants[4];

            Assert.Equal(Consensus_4.ReferencePosition, 25378563);

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }
        }
コード例 #10
0
ファイル: VennProcessorTests.cs プロジェクト: tamsen/Pisces
        public void VennVcf_CombineTwoPoolVariants_RulesAthroughD_Tests()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S7.genome.vcf");
            var    PoolAVariants = (AlleleReader.GetAllVariantsInFile(VcfPath_PoolA)).ToList();

            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S8.genome.vcf");
            var    PoolBVariants = (AlleleReader.GetAllVariantsInFile(VcfPath_PoolB)).ToList();

            CalledAllele VariantA = PoolAVariants[0];
            CalledAllele VariantB = PoolBVariants[0];

            List <CalledAllele[]> pairs = VennProcessor.SelectPairs(
                new List <CalledAllele>()
            {
                VariantA
            },
                new List <CalledAllele>
            {
                VariantB
            });

            VariantComparisonCase ComparisonCase   = VennProcessor.GetComparisonCase(pairs[0][0], pairs[0][1]);
            ConsensusBuilder      consensusBuilder = new ConsensusBuilder("", parameters);
            CalledAllele          Consensus        = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            //Rule "A" test
            //A	if combined VF<1% and less than 2.6% in each pool, call REF
            //(note, we were Alt in one pool and ref in another)

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantA.Frequency, 0.9979, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.0173, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(Consensus.Frequency, 0.9907, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will NOT added by post-processing b/c is ref call

            //B	if combined VF<1% and more than 2.6% in one pool, call NO CALL

            VariantA = PoolAVariants[1];
            VariantB = PoolBVariants[1];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.0776, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantB.Frequency, 0.9989, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.0070, 4);
            Assert.Equal(Consensus.VariantQscore, 0);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType>
            {
                Pisces.Domain.Types.FilterType.PoolBias
            });                                          //<-low VF tag will also get added by post-processing

            //Rule "Ca" test
            //C-a	if combined 1%<VF<2.6%
            // and more than 2.6% in one pool and less than 1% in the other, call NO CALL w/PB

            VariantA = PoolAVariants[2];
            VariantB = PoolBVariants[2];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.0367, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantB.Frequency, 0.9976, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.0117, 4);
            Assert.Equal(Consensus.VariantQscore, 23);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
                Pisces.Domain.Types.FilterType.PoolBias
            });
            //Rule "Cb" test
            //C-a	if combined 1%<VF<2.6%
            // and more than 2.6% in one pool and between 1% and 2.6% in the other, call NO CALL w/ no PB

            VariantA = PoolAVariants[3];
            VariantB = PoolBVariants[3];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.01725, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.03667, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.02347, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will also get added by post-processing

            //Rule "D" test
            //D	if combined VF>=2.6% call VARIANT (PB if only present in one pool, using 1% as the cutoff)

            VariantA = PoolAVariants[4];
            VariantB = PoolBVariants[4];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.2509, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.0367, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(Consensus.Frequency, 0.1716, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will also get set by post processor
        }
コード例 #11
0
ファイル: VennProcessorTests.cs プロジェクト: tamsen/Pisces
        public void VennVcf_CombineTwoPoolVariants_ProbePoolBias_Tests()
        {
            //this is  from an issue anita had where a variant was in one pool at 1%, the other at 0%, and showed up as 6% in the combined pool.

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "small_S14.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "small_S17.genome.vcf");



            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf");
            parameters.OutputDirectory   = outDir;
            if (File.Exists(parameters.ConsensusFileName))
            {
                File.Delete(parameters.ConsensusFileName);
            }


            VennProcessor Venn = new VennProcessor(new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.Equal(File.Exists(parameters.ConsensusFileName), true);

            var CombinedVariants = AlleleReader.GetAllVariantsInFile(parameters.ConsensusFileName);
            var AandBVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_and_S17.vcf"));
            var BandAVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_and_S14.vcf"));
            var AnotBVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_not_S17.vcf"));
            var BnotAVariants    = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_not_S14.vcf"));

            //poolA
            //chr1	     115258743	.	A	.	100	PASS	DP=35354	GT:GQ:AD:VF:NL:SB	0/0:100:30256:0.1442:20:-100.0000
            //chr1       115258743           .               AC          TT           100         PASS      DP=35354            GT:GQ:AD:VF:NL:SB                0/1:100:30720,4634:0.1311:20:-100.0000
            //chr1       115258744           .               C             .               100         PASS      DP=35253            GT:GQ:AD:VF:NL:SB                0/0:100:30277:0.1412:20:-100.0000
            //chr1       115258745           .               C             .               100         PASS      DP=35160            GT:GQ:AD:VF:NL:SB                0/0:100:35130:0.0009:20:-100.0000


            //poolB
            //chr1       115258743           .               AC          TT           100         PASS      DP=49612            GT:GQ:AD:VF:NL:SB                0/1:100:44202,5410:0.1090:20:-100.0000
            //chr1       115258743           .               A             T              100         PASS      DP=49612            GT:GQ:AD:VF:NL:SB                0/1:100:43362,670:0.0135:20:-46.0807
            //chr1       115258744           .               C             T              24           PASS      DP=49902            GT:GQ:AD:VF:NL:SB                0/1:24:43905,560:0.0112:20:-8.3857


            //when we had bug:
            //chr1       115258743           .               AC          TT           100.00   PASS      DP=84966            GT:GQ:AD:VF:NL:                0/1:100:74922,10044:0.1182:20:-100:-100.0000:100
            //chr1       115258743           .               A             T              100.00   PB;LowVF            DP=49612            GT:GQ:AD:VF:NL:                ./.:100:43362,670:0.0135:20:-46.0807:0.0000:100
            //chr1       115258743           .               A             .               100.00   PASS      DP=35354            GT:GQ:AD:VF:NL:                0/0:100:30256:0.1442:20:-100.0000:-100.0000:100
            //chr1       115258744           .               C             T              100.00   PB           DP=85155            GT:GQ:AD:VF:NL:                0/1:100:74182,5536:0.0650:20:-
            //(issue#1) at 743 we had a A->. in only one pool. It should be marked as BIAS and not PASS.
            //(issue#2) at 744 we had a C->T at 6% when it should be at ~0%, and called as a ref.

            var FunnyResult0 = CombinedVariants[3];

            Assert.Equal(FunnyResult0.Frequency, 0.8558, 4);
            Assert.Equal(FunnyResult0.Filters.Count, 1);
            Assert.Equal(FunnyResult0.Filters[0], FilterType.PoolBias);
            Assert.Equal(FunnyResult0.ReferenceAllele, "A");
            Assert.Equal(FunnyResult0.AlternateAllele, ".");
            //this used to be a reference as a pass, even though it was only called in one pool.

            var FunnyResult = CombinedVariants[6];

            Assert.Equal(FunnyResult.ReferencePosition, 115258744);
            Assert.Equal(FunnyResult.Frequency, 0.8711, 4);
            Assert.Equal(FunnyResult.Filters.Count, 0);
            Assert.Equal(FunnyResult.ReferenceAllele, "C");
            Assert.Equal(FunnyResult.AlternateAllele, ".");
            //when we had the bug, this used to get called at 6%.


            //now, check the Venn functionality:
            Assert.Equal(2, AandBVariants.Count());
            Assert.Equal(2, BandAVariants.Count());
            Assert.Equal(2, AnotBVariants.Count());
            Assert.Equal(0, BnotAVariants.Count());

            Assert.Equal(115258743, AandBVariants[0].ReferencePosition);
            Assert.Equal("AC", AandBVariants[0].ReferenceAllele);
            Assert.Equal("TT", AandBVariants[0].AlternateAllele);

            Assert.Equal(115258747, AandBVariants[1].ReferencePosition);
            Assert.Equal("C", AandBVariants[1].ReferenceAllele);
            Assert.Equal("T", AandBVariants[1].AlternateAllele);

            Assert.Equal(115258743, BandAVariants[0].ReferencePosition);
            Assert.Equal("AC", BandAVariants[0].ReferenceAllele);
            Assert.Equal("TT", BandAVariants[0].AlternateAllele);

            Assert.Equal(115258747, BandAVariants[1].ReferencePosition);
            Assert.Equal("C", BandAVariants[1].ReferenceAllele);
            Assert.Equal("T", BandAVariants[1].AlternateAllele);

            Assert.Equal(115258743, AnotBVariants[0].ReferencePosition);
            Assert.Equal("A", AnotBVariants[0].ReferenceAllele);
            Assert.Equal("T", AnotBVariants[0].AlternateAllele);

            Assert.Equal(115258744, AnotBVariants[1].ReferencePosition);
            Assert.Equal("C", AnotBVariants[1].ReferenceAllele);
            Assert.Equal("T", AnotBVariants[1].AlternateAllele);
        }
コード例 #12
0
        public void TestForStrandBiasOnStitchingScenarios(StitchingScenario scenario, string resultFile)
        {
            //limit the scope of concern for now.
            if (scenario.ShouldRefStitch != true)
            {
                return;
            }


            //limit the scope of concern for now.
            if (scenario.ShouldStitch != true)
            {
                return;
            }

            var resultsSummary = Path.Combine(Options.OutputDirectory, StrandBiasSummaryFileName);

            using (StreamWriter sw = new StreamWriter(new FileStream(resultsSummary, FileMode.OpenOrCreate)))
            {
                var day  = DateTime.Now.ToString("d"); //.net core
                var time = DateTime.Now.ToString("t"); //.net core

                var sb = new StringBuilder(
                    string.Join(",", day, time,
                                scenario.Category, scenario.Id));

                try
                {
                    if (!Directory.Exists(Options.OutputDirectory))
                    {
                        Directory.CreateDirectory(Options.OutputDirectory);
                    }


                    var factory = new AmpliconTestFactory(new string('A', 100), sourceIsStitched: true);

                    byte qualityForAll      = 30;
                    int  numVariantCounts   = 2; // 10;
                    int  numReferenceCounts = 2; // 90;
                    var  varRead            = BuildRead(scenario.OutputRead1, qualityForAll, StageMNVdata(scenario));
                    var  refRead            = BuildRead(scenario.OutputRefRead1, qualityForAll, NoMNVdata(scenario));

                    if (refRead == null)
                    {
                        return;
                    }

                    factory.StageStitchedVariant(
                        varRead, numVariantCounts,
                        refRead, numReferenceCounts);

                    var outputFileName = string.Format("{0}_{1}.vcf", scenario.Category, scenario.Id);
                    var vcfOutputPath  = Path.Combine(Options.OutputDirectory, outputFileName);
                    var biasOutputPath = StrandBiasFileWriter.GetBiasFilePath(vcfOutputPath);

                    File.Delete(vcfOutputPath);
                    File.Delete(biasOutputPath);

                    StitchedReadBiasHelper.CallStrandedVariantsWithMockData(vcfOutputPath, Options, factory);
                    var varResults  = StitchedReadBiasHelper.GetResults(AlleleReader.GetAllVariantsInFile(vcfOutputPath));
                    var biasResults = StitchedReadBiasHelper.GetStrandResultsFromFile(biasOutputPath);

                    var observedFrequency = (varResults.Count == 0) ? "0" : "";
                    var observedSB        = (biasResults.Count == 0) ? "FN" : "";

                    for (int i = 0; i < varResults.Count; i++)
                    {
                        var varResult = varResults[i];
                        if (i != 0)
                        {
                            observedFrequency += ";";
                        }
                        observedFrequency += varResult.VariantFrequency;
                    }

                    for (int i = 0; i < biasResults.Count; i++)
                    {
                        var biasResult = biasResults[i];
                        if (i != 0)
                        {
                            observedSB += ";";
                        }
                        observedSB += biasResult.HasStrandBias;

                        //there should be no SB on our current set of stitched scenarios.
                        Assert.True(!biasResult.HasStrandBias);
                    }

                    var expectedValues = new List <string>()
                    {
                        "1", scenario.Frequency, scenario.ShouldBias
                    };

                    var observedValues = new List <string>()
                    {
                        varResults.Count.ToString(), observedFrequency, observedSB
                    };

                    sb.Append(GetResultString(expectedValues, observedValues));

                    sw.WriteLine(sb.ToString());
                }
                catch (Exception ex)
                {
                    sb.Append(",Fail:  " + ex);
                    sw.WriteLine(sb.ToString());
                }
            }
        }
コード例 #13
0
        private void Write_InFlow(bool threadByChr)
        {
            var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.bam");

            var vcfFilePath  = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.vcf");
            var biasFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.ReadStrandBias.txt");

            if (threadByChr)
            {
                biasFilePath = biasFilePath + "_chr19";              //Currently when threading by chrom we are outputting one bias file per chromsome. This is not a customer-facing deliverable and is a low-priority feature.
            }
            var expectedBiasResultsPath = Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_Sample_S1.ReadStrandBias.txt");

            var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr19");

            var applicationOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                IntervalPaths        = null,
                GenomePaths          = new[] { genomeDirectory },
                OutputBiasFiles      = true,
                DebugMode            = true,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            // Using GenomeProcessor
            //If OutputBiasFiles is true, should output one bias file per vcf
            var factory = new MockFactoryWithDefaults(applicationOptions);
            var genome  = factory.GetReferenceGenome(genomeDirectory);

            CreateAndExecuteProcessor(threadByChr, factory, genome);

            Assert.True(File.Exists(biasFilePath));

            //All variants that are present in VCF where ref!=alt should be included
            var biasFileContents = File.ReadAllLines(biasFilePath);
            var alleles          = AlleleReader.GetAllVariantsInFile(vcfFilePath);
            var variantCalls     = alleles.Where(a => a.AlternateAllele != ".").ToList();

            foreach (var variantCall in variantCalls)
            {
                Console.WriteLine(variantCall);
                Assert.True(biasFileContents.Count(l => l.Split('\t')[0] == variantCall.Chromosome &&
                                                   l.Split('\t')[1] == variantCall.ReferencePosition.ToString() &&
                                                   l.Split('\t')[2] == variantCall.ReferenceAllele &&
                                                   l.Split('\t')[3] == variantCall.AlternateAllele) == 1);
            }
            foreach (var refCall in alleles.Where(a => a.AlternateAllele == ".").ToList())
            {
                Assert.False(biasFileContents.Count(l => l.Split('\t')[0] == refCall.Chromosome &&
                                                    l.Split('\t')[1] == refCall.ReferencePosition.ToString() &&
                                                    l.Split('\t')[2] == refCall.ReferenceAllele &&
                                                    l.Split('\t')[3] == refCall.AlternateAllele) == 1);
            }

            //Bias files should have expected contents
            var expectedBiasFileContents = File.ReadAllLines(expectedBiasResultsPath);

            Assert.Equal(expectedBiasFileContents, biasFileContents);

            //If OutputBiasFiles is false, should not output any bias files
            File.Delete(biasFilePath);

            applicationOptions.OutputBiasFiles = false;
            factory = new MockFactoryWithDefaults(applicationOptions);
            genome  = factory.GetReferenceGenome(genomeDirectory);
            CreateAndExecuteProcessor(threadByChr, factory, genome);
            Assert.False(File.Exists(biasFilePath));
        }
コード例 #14
0
        public void UpdateVcfTest_TestOnSingleAlleleAction()
        {
            var outDir           = Path.Combine(TestPaths.LocalScratchDirectory, "VcfUpdaterTestsOutDir");
            var inputDir         = Path.Combine(TestPaths.LocalTestDataDirectory);
            var inputVcfFilePath = Path.Combine(inputDir, "crushed.genome.vcf");
            var outputFile1      = Path.Combine(outDir, "RewriteExample1.vcf");
            var outputFile2      = Path.Combine(outDir, "RewriteExample2.vcf");
            var outputFile3      = Path.Combine(outDir, "RewriteExample3.vcf");
            var outputFile4      = Path.Combine(outDir, "RewriteExample4.vcf");
            var outputFile5      = Path.Combine(outDir, "RewriteExample5.vcf");
            var outputFile6      = Path.Combine(outDir, "RewriteExample6.vcf");

            var expectedFile1 = Path.Combine(inputDir, "VcfReWriter_NoChangeToVariants.vcf");
            var expectedFile2 = Path.Combine(inputDir, "VcfReWriter_AllChangeToVariants.vcf");
            var expectedFile3 = Path.Combine(inputDir, "VcfReWriter_SomeChangeToVariants.vcf");
            var expectedFile4 = Path.Combine(inputDir, "VcfReWriter_RemoveAllVariants.vcf");
            var expectedFile5 = Path.Combine(inputDir, "VcfReWriter_RemoveSomeVariants.vcf");
            var expectedFile6 = Path.Combine(inputDir, "VcfReWriter_ComplexChangesVariants.vcf");

            TestUtilities.TestHelper.RecreateDirectory(outDir);

            var myData  = new SomeData();
            var options = new VcfConsumerAppOptions();

            options.VcfPath = inputVcfFilePath;
            options.VariantCallingParams.AmpliconBiasFilterThreshold = null;//turning this off because these tests predate the AB filter. This allows the pre-exisiting vcf headers to stay the same.

            //edit NO lines
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile1, options, true, myData, UpdateChrToFrog, CanAlwaysSkipVcfLine, GetVcfFileWriter);

            //edit ALL lines
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile2, options, true, myData, UpdateChrToFrog, CanNeverSkipVcfLine, GetVcfFileWriter);

            //do something silly to lines with a "C" allele
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile3, options, true, myData, UpdateChrToFrog, CanSometimesSkipVcfLine, GetVcfFileWriter);

            //remove all vcf entries
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile4, options, true, myData, UpdateChrToFrog, CanAlwaysDeleteVcfLine, GetVcfFileWriter);

            //remove all vcf entries with a "C" allele
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile5, options, true, myData, UpdateChrToFrog, CanSometimesDeleteVcfLine, GetVcfFileWriter);


            //Look at lines with a "C" allele.
            //If lines with a C allele (ref or alt) have T as an alt, make the chr = "MadeAChangeHERE".
            //If lines with a C allele (ref or alt) DO NOT have T as an alt, delete the line entirely.
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile6, options, true, myData, UpdateChrToFrogOrDelete, CanSometimesSkipVcfLine, GetVcfFileWriter);

            //so, this one is left as is;
            //chr1    223906730.G.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000

            //this one, the C->A should get removed, and the C->T should have  chr = "MadeAChangeHERE".
            //chr1    223906731.C   A,T 100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    1 / 2:100:254,254:532:0.95:20:-100.0000

            // these are also all removed
            //chr1    223906744.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    228526603.C.   100 PASS DP = 536  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:536:536:0.00:20:-100.0000
            //chr1    228526606.C.   100 PASS DP = 536  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:536:536:0.00:20:-100.0000
            //chr1    247812092.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812094.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812096.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812099.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812108.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr2    55862775.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000


            //check files
            TestUtilities.TestHelper.CompareFiles(outputFile1, expectedFile1);
            TestUtilities.TestHelper.CompareFiles(outputFile2, expectedFile2);
            TestUtilities.TestHelper.CompareFiles(outputFile3, expectedFile3);
            TestUtilities.TestHelper.CompareFiles(outputFile4, expectedFile4);
            TestUtilities.TestHelper.CompareFiles(outputFile5, expectedFile5);
            TestUtilities.TestHelper.CompareFiles(outputFile6, expectedFile6);

            //explicit checks for the complicated one, so users can see what we are looking for:

            var variantsTest6 = AlleleReader.GetAllVariantsInFile(outputFile6);
            var variantsInput = AlleleReader.GetAllVariantsInFile(inputVcfFilePath);

            Assert.Equal(91, variantsInput.Count());
            Assert.Equal(91 - 10, variantsTest6.Count()); //accounting for removed lines

            Assert.Equal(223906728, variantsInput[0].ReferencePosition);
            Assert.Equal("chr1", variantsInput[0].Chromosome);

            Assert.Equal(223906728, variantsTest6[0].ReferencePosition);
            Assert.Equal("chr1", variantsTest6[0].Chromosome);

            Assert.Equal(223906731, variantsInput[3].ReferencePosition);
            Assert.Equal("chr1", variantsInput[3].Chromosome);

            Assert.Equal(223906731, variantsTest6[3].ReferencePosition);
            Assert.Equal("FrogChr", variantsTest6[3].Chromosome);
        }