public void VennVcf_EmptyInputTest() { var outDir = TestPaths.LocalTestDataDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "Empty_S1.vcf"); string VcfB = Path.Combine(VcfPathRoot, "Empty_S2.vcf"); string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); Assert.True(File.Exists(OutputPath)); var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); Assert.Equal(0, observedVariants.Count); }
public void VennVcf_GtTest() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "gtTests_S15.vcf"); string VcfB = Path.Combine(VcfPathRoot, "gtTests_S18.vcf"); string OutputPath = Path.Combine(outDir, "gtConsensusOut.vcf"); string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); Assert.True(File.Exists(OutputPath)); var expectedVariants = AlleleReader.GetAllVariantsInFile(ExpectedPath); var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); Assert.Equal(expectedVariants.Count, observedVariants.Count); for (int i = 0; i < expectedVariants.Count; i++) { var ExpectedVariant = expectedVariants[i]; var OutputVariant = observedVariants[i]; Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString()); } }
private void CompareVariants(string expectedResultsFilePath, string actualResultsFilePath) { List <CalledAllele> results = AlleleReader.GetAllVariantsInFile(actualResultsFilePath); List <CalledAllele> expected = AlleleReader.GetAllVariantsInFile(expectedResultsFilePath); Assert.Equal(results.Count, expected.Count); for (int i = 0; i < results.Count; i++) { Assert.Equal(expected[i].ToString(), results[i].ToString()); } }
public void VennVcf_CombineTwoPoolVariants_MergeRefCalls() { //this is from an issue where there were multiple co-located variants in one pool, //and just ref in the other, at chr15 92604460. The consensus answer should be // a single ref call (and not multiple ref calls!). var outDir = TestPaths.LocalScratchDirectory; var vcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf"); string VcfPath_PoolB = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf"); string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf"); string OutputPath = Path.Combine(outDir, "Consensus2.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.InputFiles = new string[] { VcfPath_PoolA, VcfPath_PoolB }; parameters.OutputDirectory = outDir; parameters.ConsensusFileName = OutputPath; VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters); venn.DoPairwiseVenn(); Assert.Equal(File.Exists(OutputPath), true); var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); var ExpectedVariants = AlleleReader.GetAllVariantsInFile(VcfPath_Consensus); Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count); int NumVariantsAtPos92604460 = 0; for (int i = 0; i < ExpectedVariants.Count; i++) { var EVariant = ExpectedVariants[i]; var Variant = CombinedVariants[i]; if ((Variant.ReferencePosition == 92604460) && (Variant.Chromosome == "chr15")) { NumVariantsAtPos92604460++; } Assert.Equal(EVariant.ToString(), Variant.ToString()); } Assert.Equal(NumVariantsAtPos92604460, 1); }
public void UnpackAlleles() { //two example vcf files that have been "crushed". var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf"); var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf"); var unpackedVariants1 = AlleleReader.GetAllVariantsInFile(crushedVcf1); var unpackedVariants2 = AlleleReader.GetAllVariantsInFile(crushedVcf2); Assert.Equal(8, unpackedVariants1.Count); //7 lines, but 8 alleles Assert.Equal(91, unpackedVariants2.Count); //90 lines, but 91 alleles var hetAlt1 = unpackedVariants1[5]; var hetAlt2 = unpackedVariants2[3]; var hetAlt1next = unpackedVariants1[6]; var hetAlt2next = unpackedVariants2[4]; //example one: //total depth = 5394, total variant count = 2387 + 2000 = 4387 //so, ref counts ~1007. //example two: //total depth = 532, total variant count = 254 + 254 = 508 //so, ref counts ~24. Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt1.Genotype); Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt2.Genotype); Assert.Equal(1007, hetAlt1.ReferenceSupport); Assert.Equal(2387, hetAlt1.AlleleSupport); Assert.Equal(0.4425, hetAlt1.Frequency, 4); Assert.Equal(24, hetAlt2.ReferenceSupport); Assert.Equal(254, hetAlt2.AlleleSupport); Assert.Equal(10, hetAlt1.ReferencePosition); Assert.Equal("AA", hetAlt1.ReferenceAllele); Assert.Equal("GA", hetAlt1.AlternateAllele); Assert.Equal(223906731, hetAlt2.ReferencePosition); Assert.Equal(1007, hetAlt1next.ReferenceSupport); Assert.Equal(2000, hetAlt1next.AlleleSupport); Assert.Equal("G", hetAlt1next.AlternateAllele); Assert.Equal(0.3708, hetAlt1next.Frequency, 4); Assert.Equal(24, hetAlt2next.ReferenceSupport); Assert.Equal(254, hetAlt2next.AlleleSupport); Assert.Equal(223906731, hetAlt2.ReferencePosition); Assert.Equal(10, hetAlt1next.ReferencePosition); Assert.Equal(223906731, hetAlt2next.ReferencePosition); }
public void Execute( string bamFilePath, string vcfFilePath, string intervalPath, List <CalledAllele> expectedVariants, List <ChrReference> fakeReferences = null, bool doCheckVariants = true, bool doCheckReferences = false, int expectedNumCoveredPositions = 0, bool threadByChr = false, int doCountsOnly = 0, bool doLog = false, bool callMnvs = true, PiscesApplicationOptions applicationOptions = null, bool collapse = true) { if (doCheckReferences) { vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf"); } if (applicationOptions == null) { applicationOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath }, GenomePaths = new[] { GenomeDirectory }, OutputBiasFiles = true, DebugMode = doLog, CallMNVs = callMnvs, MaxGapBetweenMNV = 10, MaxSizeMNV = 15, Collapse = collapse, BamFilterParameters = new BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new VariantCallingParameters(), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = doCheckReferences, }, CommandLineArguments = new string[] { "some", "cmds" } }; } applicationOptions.OutputDirectory = OutputDirectory; var factory = GetFactory(applicationOptions); IGenome genome; if (fakeReferences == null) { genome = factory.GetReferenceGenome(GenomeDirectory); } else { genome = new MockGenome(fakeReferences, GenomeDirectory); } if (threadByChr) { var processor = new GenomeProcessor(factory, genome, false); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } var alleles = AlleleReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => !a.IsRefType).ToList(); if (doCheckVariants) { if (doCountsOnly > 0) { Assert.Equal(variantCalls.Count(), doCountsOnly); } else { CheckVariants(variantCalls, expectedVariants); } } if (doCheckReferences) { var referenceAlleles = alleles.Where(a => a.IsRefType).ToList(); // make sure no reference calls at variant positions Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition))); } }
public void StitchedCollapsedBamGroundTruth() { // SNP ground truth from TingTing var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "collapsed.test.stitched.bam"); var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup(); functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr1"); var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr1") }, OutputBiasFiles = true, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, MaxGapBetweenMNV = 10, NoiseModelHalfWindow = 1, BamFilterParameters = new BamFilterParameters() { MinimumBaseCallQuality = 20, MinimumMapQuality = 1, OnlyUseProperPairs = false, }, VariantCallingParameters = new VariantCallingParameters() { MaximumVariantQScore = 100, MinimumVariantQScoreFilter = 30, MinimumVariantQScore = 20, MinimumCoverage = 10, MinimumFrequency = 0.01f, FilterOutVariantsPresentOnlyOneStrand = false, ForcedNoiseLevel = -1, NoiseModel = NoiseModel.Flat, StrandBiasModel = StrandBiasModel.Extended, AmpliconBiasFilterThreshold = 0.01F }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, ReportRcCounts = true, ReportTsCounts = true } }; // Time to build the fake sequences for testing. var mockChrRef = new List <ChrReference>() { new ChrReference() { // position 9770498 ~ 9770669 Name = "chr1", Sequence = new string('N', 9770498 - 1) + "GAAGTAACAACGCAGGATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGGTATGGCCTCCATC" } }; var expectedAlleles = new List <CalledAllele> { new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 9770596, ReferenceAllele = "C", AlternateAllele = "A", Chromosome = "chr1" } }; functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions); var truthvcfFilePath = Path.Combine(Path.GetDirectoryName(appOptions.BAMPaths[0]), "test_truth.stitched.genome.vcf"); var stitchedCollapsedTruth = AlleleReader.GetAllVariantsInFile(truthvcfFilePath); var resultFilePath = Path.ChangeExtension(bamFilePath, "genome.vcf"); var stitchedCollapsedResults = AlleleReader.GetAllVariantsInFile(resultFilePath); TestUtilities.TestHelper.CompareFiles(truthvcfFilePath, resultFilePath); }
public void Pisces_LowDepthTest() { List <ChrReference> chrRef = new List <ChrReference>() { new ChrReference() { Name = "chr19", Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA" } }; var options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, //IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, OutputDirectory = TestPaths.LocalTestDataDirectory, BamFilterParameters = new Domain.Options.BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MinimumVariantQScore = 20, MinimumCoverage = 1000, }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, } }; var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf"); var factory = new Factory(options); IGenome genomeRef; genomeRef = new MockGenome(chrRef, _genomeChr19); var bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); var coverage1000results = AlleleReader.GetAllVariantsInFile(vcfFilePath); options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, // IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, OutputDirectory = TestPaths.LocalTestDataDirectory, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = false, } }; factory = new Factory(options); bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); var coverage10results = AlleleReader.GetAllVariantsInFile(vcfFilePath); }
public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests() { //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. //Rule "F" test (ie various alt calls all ended up as no-call. we dont want multiple no call lines in the vcf.) //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf"); string OutputPath = Path.Combine(outDir, "outEandF.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor VennVcf = new VennProcessor( new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); VennVcf.DoPairwiseVenn(); Assert.Equal(File.Exists(OutputPath), true); var PoolAVariants = AlleleReader.GetAllVariantsInFile(VcfPath_PoolA); var PoolBVariants = AlleleReader.GetAllVariantsInFile(VcfPath_PoolB); var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. var VariantA_1 = PoolAVariants[0]; Assert.Equal(VariantA_1.Genotype, Genotype.HomozygousRef); Assert.Equal(VariantA_1.Frequency, 1.0 - 0.0021, 4); //note Vf here is the ref freq Assert.Equal(VariantA_1.VariantQscore, 100); Assert.Equal(VariantA_1.Filters.Count, 0); Assert.Equal(VariantA_1.ReferencePosition, 25378561); var VariantA_2 = PoolAVariants[1]; Assert.Equal(VariantA_2.ReferencePosition, 25378562); var VariantB_1 = PoolBVariants[0]; Assert.Equal(VariantB_1.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantB_1.Frequency, 0.0173, 4); Assert.Equal(VariantB_1.VariantQscore, 100); Assert.Equal(VariantB_1.Filters.Count, 0); Assert.Equal(VariantB_1.ReferencePosition, 25378561); var VariantB_2 = PoolBVariants[1]; Assert.Equal(VariantB_2.Genotype, Genotype.HomozygousRef); Assert.Equal(VariantB_2.Frequency, 0.9827, 4); //note Vf here is the ref freq Assert.Equal(VariantB_2.VariantQscore, 100); Assert.Equal(VariantB_2.Filters.Count, 0); Assert.Equal(VariantB_2.ReferencePosition, 25378561); var Consensus_1 = CombinedVariants[0]; Assert.Equal(Consensus_1.Genotype, Genotype.HomozygousRef); Assert.Equal(Consensus_1.Frequency, 0.9907, 4); //slightly improved from .008. //note Vf here is the ref freq Assert.Equal(Consensus_1.VariantQscore, 100); Assert.Equal(Consensus_1.Filters.Count, 0); //<-low VF tag will NOT added by post-processing b/c is ref call Assert.Equal(Consensus_1.ReferencePosition, 25378561); var Consensus_2 = CombinedVariants[1]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); //Rule "F" test (ie various alt calls all ended up as no-call. //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate VariantA_1 = PoolAVariants[1]; Assert.Equal(VariantA_1.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantA_1.Frequency, 0.0776, 4); Assert.Equal(VariantA_1.VariantQscore, 100); Assert.Equal(VariantA_1.Filters.Count, 0); Assert.Equal(VariantA_1.ReferencePosition, 25378562); VariantA_2 = PoolAVariants[2]; Assert.Equal(VariantA_2.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantA_2.Frequency, 0.0776, 4); Assert.Equal(VariantA_2.VariantQscore, 100); Assert.Equal(VariantA_2.Filters.Count, 0); Assert.Equal(VariantA_2.ReferencePosition, 25378562); var VariantA_3 = PoolAVariants[3]; Assert.Equal(VariantA_3.Genotype, Genotype.HeterozygousAltRef); //"0/1"); Assert.Equal(VariantA_3.Frequency, 0.0776, 4); Assert.Equal(VariantA_3.VariantQscore, 100); Assert.Equal(VariantA_3.Filters.Count, 0); Assert.Equal(VariantA_3.ReferencePosition, 25378562); VariantB_1 = PoolBVariants[2]; Assert.Equal(VariantB_1.Genotype, Genotype.HomozygousRef); Assert.Equal(VariantB_1.Frequency, 0.9989, 4); Assert.Equal(VariantB_1.VariantQscore, 100); Assert.Equal(VariantB_1.Filters.Count, 0); Assert.Equal(VariantB_1.ReferencePosition, 25378562); VariantB_2 = PoolBVariants[3]; Assert.Equal(VariantB_2.ReferencePosition, 25378563); Consensus_1 = CombinedVariants[1]; Assert.Equal(Consensus_1.ReferencePosition, 25378562); Assert.Equal(Consensus_1.Genotype, Genotype.AltLikeNoCall); Assert.Equal(Consensus_1.Frequency, 0.0069, 4); Assert.Equal(Consensus_1.VariantQscore, 0); Assert.Equal(Consensus_1.Filters.Count, 1); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.ReferenceAllele, "C"); Assert.Equal(Consensus_1.AlternateAllele, "T"); Consensus_2 = CombinedVariants[2]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); Assert.Equal(Consensus_2.Genotype, Genotype.AltLikeNoCall); Assert.Equal(Consensus_2.Frequency, 0.0069, 4); Assert.Equal(Consensus_2.VariantQscore, 0); Assert.Equal(Consensus_1.Filters.Count, 1); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_2.ReferenceAllele, "C"); Assert.Equal(Consensus_2.AlternateAllele, "TT"); var Consensus_3 = CombinedVariants[3]; Assert.Equal(Consensus_3.ReferencePosition, 25378562); Assert.Equal(Consensus_3.Genotype, Genotype.AltLikeNoCall); Assert.Equal(Consensus_3.Frequency, 0.0069, 4); Assert.Equal(Consensus_3.VariantQscore, 0); Assert.Equal(Consensus_1.Filters.Count, 1); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.Filters[0], FilterType.PoolBias); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_3.ReferenceAllele, "CC"); Assert.Equal(Consensus_3.AlternateAllele, "T"); var Consensus_4 = CombinedVariants[4]; Assert.Equal(Consensus_4.ReferencePosition, 25378563); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } }
public void VennVcf_CombineTwoPoolVariants_RulesAthroughD_Tests() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string OutputPath = Path.Combine(outDir, "outEandF.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S7.genome.vcf"); var PoolAVariants = (AlleleReader.GetAllVariantsInFile(VcfPath_PoolA)).ToList(); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S8.genome.vcf"); var PoolBVariants = (AlleleReader.GetAllVariantsInFile(VcfPath_PoolB)).ToList(); CalledAllele VariantA = PoolAVariants[0]; CalledAllele VariantB = PoolBVariants[0]; List <CalledAllele[]> pairs = VennProcessor.SelectPairs( new List <CalledAllele>() { VariantA }, new List <CalledAllele> { VariantB }); VariantComparisonCase ComparisonCase = VennProcessor.GetComparisonCase(pairs[0][0], pairs[0][1]); ConsensusBuilder consensusBuilder = new ConsensusBuilder("", parameters); CalledAllele Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); //Rule "A" test //A if combined VF<1% and less than 2.6% in each pool, call REF //(note, we were Alt in one pool and ref in another) Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(VariantA.Frequency, 0.9979, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantB.Frequency, 0.0173, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(Consensus.Frequency, 0.9907, 4); Assert.Equal(Consensus.VariantQscore, 100); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { }); //<-low VF tag will NOT added by post-processing b/c is ref call //B if combined VF<1% and more than 2.6% in one pool, call NO CALL VariantA = PoolAVariants[1]; VariantB = PoolBVariants[1]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.0776, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(VariantB.Frequency, 0.9989, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall); Assert.Equal(Consensus.Frequency, 0.0070, 4); Assert.Equal(Consensus.VariantQscore, 0); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { Pisces.Domain.Types.FilterType.PoolBias }); //<-low VF tag will also get added by post-processing //Rule "Ca" test //C-a if combined 1%<VF<2.6% // and more than 2.6% in one pool and less than 1% in the other, call NO CALL w/PB VariantA = PoolAVariants[2]; VariantB = PoolBVariants[2]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.0367, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(VariantB.Frequency, 0.9976, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall); Assert.Equal(Consensus.Frequency, 0.0117, 4); Assert.Equal(Consensus.VariantQscore, 23); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { Pisces.Domain.Types.FilterType.PoolBias }); //Rule "Cb" test //C-a if combined 1%<VF<2.6% // and more than 2.6% in one pool and between 1% and 2.6% in the other, call NO CALL w/ no PB VariantA = PoolAVariants[3]; VariantB = PoolBVariants[3]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.01725, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantB.Frequency, 0.03667, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall); Assert.Equal(Consensus.Frequency, 0.02347, 4); Assert.Equal(Consensus.VariantQscore, 100); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { }); //<-low VF tag will also get added by post-processing //Rule "D" test //D if combined VF>=2.6% call VARIANT (PB if only present in one pool, using 1% as the cutoff) VariantA = PoolAVariants[4]; VariantB = PoolBVariants[4]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.2509, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantB.Frequency, 0.0367, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(Consensus.Frequency, 0.1716, 4); Assert.Equal(Consensus.VariantQscore, 100); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { }); //<-low VF tag will also get set by post processor }
public void VennVcf_CombineTwoPoolVariants_ProbePoolBias_Tests() { //this is from an issue anita had where a variant was in one pool at 1%, the other at 0%, and showed up as 6% in the combined pool. var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "small_S14.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "small_S17.genome.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf"); parameters.OutputDirectory = outDir; if (File.Exists(parameters.ConsensusFileName)) { File.Delete(parameters.ConsensusFileName); } VennProcessor Venn = new VennProcessor(new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); Venn.DoPairwiseVenn(); Assert.Equal(File.Exists(parameters.ConsensusFileName), true); var CombinedVariants = AlleleReader.GetAllVariantsInFile(parameters.ConsensusFileName); var AandBVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_and_S17.vcf")); var BandAVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_and_S14.vcf")); var AnotBVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_not_S17.vcf")); var BnotAVariants = AlleleReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_not_S14.vcf")); //poolA //chr1 115258743 . A . 100 PASS DP=35354 GT:GQ:AD:VF:NL:SB 0/0:100:30256:0.1442:20:-100.0000 //chr1 115258743 . AC TT 100 PASS DP=35354 GT:GQ:AD:VF:NL:SB 0/1:100:30720,4634:0.1311:20:-100.0000 //chr1 115258744 . C . 100 PASS DP=35253 GT:GQ:AD:VF:NL:SB 0/0:100:30277:0.1412:20:-100.0000 //chr1 115258745 . C . 100 PASS DP=35160 GT:GQ:AD:VF:NL:SB 0/0:100:35130:0.0009:20:-100.0000 //poolB //chr1 115258743 . AC TT 100 PASS DP=49612 GT:GQ:AD:VF:NL:SB 0/1:100:44202,5410:0.1090:20:-100.0000 //chr1 115258743 . A T 100 PASS DP=49612 GT:GQ:AD:VF:NL:SB 0/1:100:43362,670:0.0135:20:-46.0807 //chr1 115258744 . C T 24 PASS DP=49902 GT:GQ:AD:VF:NL:SB 0/1:24:43905,560:0.0112:20:-8.3857 //when we had bug: //chr1 115258743 . AC TT 100.00 PASS DP=84966 GT:GQ:AD:VF:NL: 0/1:100:74922,10044:0.1182:20:-100:-100.0000:100 //chr1 115258743 . A T 100.00 PB;LowVF DP=49612 GT:GQ:AD:VF:NL: ./.:100:43362,670:0.0135:20:-46.0807:0.0000:100 //chr1 115258743 . A . 100.00 PASS DP=35354 GT:GQ:AD:VF:NL: 0/0:100:30256:0.1442:20:-100.0000:-100.0000:100 //chr1 115258744 . C T 100.00 PB DP=85155 GT:GQ:AD:VF:NL: 0/1:100:74182,5536:0.0650:20:- //(issue#1) at 743 we had a A->. in only one pool. It should be marked as BIAS and not PASS. //(issue#2) at 744 we had a C->T at 6% when it should be at ~0%, and called as a ref. var FunnyResult0 = CombinedVariants[3]; Assert.Equal(FunnyResult0.Frequency, 0.8558, 4); Assert.Equal(FunnyResult0.Filters.Count, 1); Assert.Equal(FunnyResult0.Filters[0], FilterType.PoolBias); Assert.Equal(FunnyResult0.ReferenceAllele, "A"); Assert.Equal(FunnyResult0.AlternateAllele, "."); //this used to be a reference as a pass, even though it was only called in one pool. var FunnyResult = CombinedVariants[6]; Assert.Equal(FunnyResult.ReferencePosition, 115258744); Assert.Equal(FunnyResult.Frequency, 0.8711, 4); Assert.Equal(FunnyResult.Filters.Count, 0); Assert.Equal(FunnyResult.ReferenceAllele, "C"); Assert.Equal(FunnyResult.AlternateAllele, "."); //when we had the bug, this used to get called at 6%. //now, check the Venn functionality: Assert.Equal(2, AandBVariants.Count()); Assert.Equal(2, BandAVariants.Count()); Assert.Equal(2, AnotBVariants.Count()); Assert.Equal(0, BnotAVariants.Count()); Assert.Equal(115258743, AandBVariants[0].ReferencePosition); Assert.Equal("AC", AandBVariants[0].ReferenceAllele); Assert.Equal("TT", AandBVariants[0].AlternateAllele); Assert.Equal(115258747, AandBVariants[1].ReferencePosition); Assert.Equal("C", AandBVariants[1].ReferenceAllele); Assert.Equal("T", AandBVariants[1].AlternateAllele); Assert.Equal(115258743, BandAVariants[0].ReferencePosition); Assert.Equal("AC", BandAVariants[0].ReferenceAllele); Assert.Equal("TT", BandAVariants[0].AlternateAllele); Assert.Equal(115258747, BandAVariants[1].ReferencePosition); Assert.Equal("C", BandAVariants[1].ReferenceAllele); Assert.Equal("T", BandAVariants[1].AlternateAllele); Assert.Equal(115258743, AnotBVariants[0].ReferencePosition); Assert.Equal("A", AnotBVariants[0].ReferenceAllele); Assert.Equal("T", AnotBVariants[0].AlternateAllele); Assert.Equal(115258744, AnotBVariants[1].ReferencePosition); Assert.Equal("C", AnotBVariants[1].ReferenceAllele); Assert.Equal("T", AnotBVariants[1].AlternateAllele); }
public void TestForStrandBiasOnStitchingScenarios(StitchingScenario scenario, string resultFile) { //limit the scope of concern for now. if (scenario.ShouldRefStitch != true) { return; } //limit the scope of concern for now. if (scenario.ShouldStitch != true) { return; } var resultsSummary = Path.Combine(Options.OutputDirectory, StrandBiasSummaryFileName); using (StreamWriter sw = new StreamWriter(new FileStream(resultsSummary, FileMode.OpenOrCreate))) { var day = DateTime.Now.ToString("d"); //.net core var time = DateTime.Now.ToString("t"); //.net core var sb = new StringBuilder( string.Join(",", day, time, scenario.Category, scenario.Id)); try { if (!Directory.Exists(Options.OutputDirectory)) { Directory.CreateDirectory(Options.OutputDirectory); } var factory = new AmpliconTestFactory(new string('A', 100), sourceIsStitched: true); byte qualityForAll = 30; int numVariantCounts = 2; // 10; int numReferenceCounts = 2; // 90; var varRead = BuildRead(scenario.OutputRead1, qualityForAll, StageMNVdata(scenario)); var refRead = BuildRead(scenario.OutputRefRead1, qualityForAll, NoMNVdata(scenario)); if (refRead == null) { return; } factory.StageStitchedVariant( varRead, numVariantCounts, refRead, numReferenceCounts); var outputFileName = string.Format("{0}_{1}.vcf", scenario.Category, scenario.Id); var vcfOutputPath = Path.Combine(Options.OutputDirectory, outputFileName); var biasOutputPath = StrandBiasFileWriter.GetBiasFilePath(vcfOutputPath); File.Delete(vcfOutputPath); File.Delete(biasOutputPath); StitchedReadBiasHelper.CallStrandedVariantsWithMockData(vcfOutputPath, Options, factory); var varResults = StitchedReadBiasHelper.GetResults(AlleleReader.GetAllVariantsInFile(vcfOutputPath)); var biasResults = StitchedReadBiasHelper.GetStrandResultsFromFile(biasOutputPath); var observedFrequency = (varResults.Count == 0) ? "0" : ""; var observedSB = (biasResults.Count == 0) ? "FN" : ""; for (int i = 0; i < varResults.Count; i++) { var varResult = varResults[i]; if (i != 0) { observedFrequency += ";"; } observedFrequency += varResult.VariantFrequency; } for (int i = 0; i < biasResults.Count; i++) { var biasResult = biasResults[i]; if (i != 0) { observedSB += ";"; } observedSB += biasResult.HasStrandBias; //there should be no SB on our current set of stitched scenarios. Assert.True(!biasResult.HasStrandBias); } var expectedValues = new List <string>() { "1", scenario.Frequency, scenario.ShouldBias }; var observedValues = new List <string>() { varResults.Count.ToString(), observedFrequency, observedSB }; sb.Append(GetResultString(expectedValues, observedValues)); sw.WriteLine(sb.ToString()); } catch (Exception ex) { sb.Append(",Fail: " + ex); sw.WriteLine(sb.ToString()); } } }
private void Write_InFlow(bool threadByChr) { var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.bam"); var vcfFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.vcf"); var biasFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.ReadStrandBias.txt"); if (threadByChr) { biasFilePath = biasFilePath + "_chr19"; //Currently when threading by chrom we are outputting one bias file per chromsome. This is not a customer-facing deliverable and is a low-priority feature. } var expectedBiasResultsPath = Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_Sample_S1.ReadStrandBias.txt"); var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr19"); var applicationOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { genomeDirectory }, OutputBiasFiles = true, DebugMode = true, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true } }; // Using GenomeProcessor //If OutputBiasFiles is true, should output one bias file per vcf var factory = new MockFactoryWithDefaults(applicationOptions); var genome = factory.GetReferenceGenome(genomeDirectory); CreateAndExecuteProcessor(threadByChr, factory, genome); Assert.True(File.Exists(biasFilePath)); //All variants that are present in VCF where ref!=alt should be included var biasFileContents = File.ReadAllLines(biasFilePath); var alleles = AlleleReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => a.AlternateAllele != ".").ToList(); foreach (var variantCall in variantCalls) { Console.WriteLine(variantCall); Assert.True(biasFileContents.Count(l => l.Split('\t')[0] == variantCall.Chromosome && l.Split('\t')[1] == variantCall.ReferencePosition.ToString() && l.Split('\t')[2] == variantCall.ReferenceAllele && l.Split('\t')[3] == variantCall.AlternateAllele) == 1); } foreach (var refCall in alleles.Where(a => a.AlternateAllele == ".").ToList()) { Assert.False(biasFileContents.Count(l => l.Split('\t')[0] == refCall.Chromosome && l.Split('\t')[1] == refCall.ReferencePosition.ToString() && l.Split('\t')[2] == refCall.ReferenceAllele && l.Split('\t')[3] == refCall.AlternateAllele) == 1); } //Bias files should have expected contents var expectedBiasFileContents = File.ReadAllLines(expectedBiasResultsPath); Assert.Equal(expectedBiasFileContents, biasFileContents); //If OutputBiasFiles is false, should not output any bias files File.Delete(biasFilePath); applicationOptions.OutputBiasFiles = false; factory = new MockFactoryWithDefaults(applicationOptions); genome = factory.GetReferenceGenome(genomeDirectory); CreateAndExecuteProcessor(threadByChr, factory, genome); Assert.False(File.Exists(biasFilePath)); }
public void UpdateVcfTest_TestOnSingleAlleleAction() { var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "VcfUpdaterTestsOutDir"); var inputDir = Path.Combine(TestPaths.LocalTestDataDirectory); var inputVcfFilePath = Path.Combine(inputDir, "crushed.genome.vcf"); var outputFile1 = Path.Combine(outDir, "RewriteExample1.vcf"); var outputFile2 = Path.Combine(outDir, "RewriteExample2.vcf"); var outputFile3 = Path.Combine(outDir, "RewriteExample3.vcf"); var outputFile4 = Path.Combine(outDir, "RewriteExample4.vcf"); var outputFile5 = Path.Combine(outDir, "RewriteExample5.vcf"); var outputFile6 = Path.Combine(outDir, "RewriteExample6.vcf"); var expectedFile1 = Path.Combine(inputDir, "VcfReWriter_NoChangeToVariants.vcf"); var expectedFile2 = Path.Combine(inputDir, "VcfReWriter_AllChangeToVariants.vcf"); var expectedFile3 = Path.Combine(inputDir, "VcfReWriter_SomeChangeToVariants.vcf"); var expectedFile4 = Path.Combine(inputDir, "VcfReWriter_RemoveAllVariants.vcf"); var expectedFile5 = Path.Combine(inputDir, "VcfReWriter_RemoveSomeVariants.vcf"); var expectedFile6 = Path.Combine(inputDir, "VcfReWriter_ComplexChangesVariants.vcf"); TestUtilities.TestHelper.RecreateDirectory(outDir); var myData = new SomeData(); var options = new VcfConsumerAppOptions(); options.VcfPath = inputVcfFilePath; options.VariantCallingParams.AmpliconBiasFilterThreshold = null;//turning this off because these tests predate the AB filter. This allows the pre-exisiting vcf headers to stay the same. //edit NO lines VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile1, options, true, myData, UpdateChrToFrog, CanAlwaysSkipVcfLine, GetVcfFileWriter); //edit ALL lines VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile2, options, true, myData, UpdateChrToFrog, CanNeverSkipVcfLine, GetVcfFileWriter); //do something silly to lines with a "C" allele VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile3, options, true, myData, UpdateChrToFrog, CanSometimesSkipVcfLine, GetVcfFileWriter); //remove all vcf entries VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile4, options, true, myData, UpdateChrToFrog, CanAlwaysDeleteVcfLine, GetVcfFileWriter); //remove all vcf entries with a "C" allele VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile5, options, true, myData, UpdateChrToFrog, CanSometimesDeleteVcfLine, GetVcfFileWriter); //Look at lines with a "C" allele. //If lines with a C allele (ref or alt) have T as an alt, make the chr = "MadeAChangeHERE". //If lines with a C allele (ref or alt) DO NOT have T as an alt, delete the line entirely. VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile6, options, true, myData, UpdateChrToFrogOrDelete, CanSometimesSkipVcfLine, GetVcfFileWriter); //so, this one is left as is; //chr1 223906730.G. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //this one, the C->A should get removed, and the C->T should have chr = "MadeAChangeHERE". //chr1 223906731.C A,T 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 1 / 2:100:254,254:532:0.95:20:-100.0000 // these are also all removed //chr1 223906744.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 228526603.C. 100 PASS DP = 536 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:536:536:0.00:20:-100.0000 //chr1 228526606.C. 100 PASS DP = 536 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:536:536:0.00:20:-100.0000 //chr1 247812092.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812094.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812096.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812099.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812108.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr2 55862775.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //check files TestUtilities.TestHelper.CompareFiles(outputFile1, expectedFile1); TestUtilities.TestHelper.CompareFiles(outputFile2, expectedFile2); TestUtilities.TestHelper.CompareFiles(outputFile3, expectedFile3); TestUtilities.TestHelper.CompareFiles(outputFile4, expectedFile4); TestUtilities.TestHelper.CompareFiles(outputFile5, expectedFile5); TestUtilities.TestHelper.CompareFiles(outputFile6, expectedFile6); //explicit checks for the complicated one, so users can see what we are looking for: var variantsTest6 = AlleleReader.GetAllVariantsInFile(outputFile6); var variantsInput = AlleleReader.GetAllVariantsInFile(inputVcfFilePath); Assert.Equal(91, variantsInput.Count()); Assert.Equal(91 - 10, variantsTest6.Count()); //accounting for removed lines Assert.Equal(223906728, variantsInput[0].ReferencePosition); Assert.Equal("chr1", variantsInput[0].Chromosome); Assert.Equal(223906728, variantsTest6[0].ReferencePosition); Assert.Equal("chr1", variantsTest6[0].Chromosome); Assert.Equal(223906731, variantsInput[3].ReferencePosition); Assert.Equal("chr1", variantsInput[3].Chromosome); Assert.Equal(223906731, variantsTest6[3].ReferencePosition); Assert.Equal("FrogChr", variantsTest6[3].Chromosome); }