public void VennVcf_EmptyInputTest() { var outDir = TestPaths.LocalTestDataDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "Empty_S1.vcf"); string VcfB = Path.Combine(VcfPathRoot, "Empty_S2.vcf"); string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(false); Assert.True(File.Exists(OutputPath)); var observedVariants = VcfReader.GetAllVariantsInFile(OutputPath); Assert.Equal(0, observedVariants.Count); }
public void VennVcf_GtTest() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "gtTests_S15.vcf"); string VcfB = Path.Combine(VcfPathRoot, "gtTests_S18.vcf"); string OutputPath = Path.Combine(outDir, "gtConsensusOut.vcf"); string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(false); Assert.True(File.Exists(OutputPath)); var expectedVariants = VcfReader.GetAllVariantsInFile(ExpectedPath); var observedVariants = VcfReader.GetAllVariantsInFile(OutputPath); Assert.Equal(expectedVariants.Count, observedVariants.Count); for (int i = 0; i < expectedVariants.Count; i++) { var ExpectedVariant = expectedVariants[i]; var OutputVariant = observedVariants[i]; Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString()); } }
public void CallSomaticVariants_LowDepthTest() { List <ChrReference> chrRef = new List <ChrReference>() { new ChrReference() { Name = "chr19", Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA" } }; var options = new ApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, //IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, OutputgVCFFiles = true, MinimumCoverage = 1000, OutputFolder = UnitTestPaths.TestDataDirectory }; var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf"); var factory = new Factory(options); IGenome genomeRef; genomeRef = new MockGenome(chrRef); var bp = new BamProcessor(factory, genomeRef); bp.Execute(1); List <VcfVariant> coverage1000results = VcfReader.GetAllVariantsInFile(vcfFilePath); options = new ApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, // IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, OutputgVCFFiles = true, OutputFolder = UnitTestPaths.TestDataDirectory }; factory = new Factory(options); bp = new BamProcessor(factory, genomeRef); bp.Execute(1); List <VcfVariant> coverage10results = VcfReader.GetAllVariantsInFile(vcfFilePath); // Assert.NotEqual(coverage1000results.Count, coverage10results.Count); // Assert.Equal(coverage1000results.Count, 84); // Assert.Equal(coverage10results.Count, 100); }
private void CompareVariants(string expectedResultsFilePath, string actualResultsFilePath) { List <VcfVariant> results = VcfReader.GetAllVariantsInFile(actualResultsFilePath); List <VcfVariant> expected = VcfReader.GetAllVariantsInFile(expectedResultsFilePath); Assert.Equal(results.Count, expected.Count); for (int i = 0; i < results.Count; i++) { Assert.Equal(expected[i].ToString(), results[i].ToString()); } }
public void VennVcf_CombineTwoPoolVariants_MergeRefCalls() { //this is from an issue where there were multiple co-located variants in one pool, //and just ref in the other, at chr15 92604460. The consensus answer should be // a single ref call (and not multiple ref calls!). var outDir = TestPaths.LocalScratchDirectory; var vcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf"); string VcfPath_PoolB = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf"); string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf"); string OutputPath = Path.Combine(outDir, "Consensus2.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.InputFiles = new string[] { VcfPath_PoolA, VcfPath_PoolB }; parameters.OutputDirectory = outDir; //Path.Combine(outDir, "RefMergeOut.vcf"); parameters.ConsensusFileName = OutputPath; VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters); venn.DoPairwiseVenn(false); Assert.Equal(File.Exists(OutputPath), true); List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath); List <VcfVariant> ExpectedVariants = VcfReader.GetAllVariantsInFile(VcfPath_Consensus); Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count); int NumVariantsAtPos92604460 = 0; for (int i = 0; i < ExpectedVariants.Count; i++) { VcfVariant EVariant = ExpectedVariants[i]; VcfVariant Variant = CombinedVariants[i]; if ((Variant.ReferencePosition == 92604460) && (Variant.ReferenceName == "chr15")) { NumVariantsAtPos92604460++; } Assert.Equal(EVariant.ToString(), Variant.ToString()); } Assert.Equal(NumVariantsAtPos92604460, 1); }
public Dictionary <string, Dictionary <int, List <Mapping> > > IntersectVcfWithPanelVar(string file) { List <VcfVariant> allVariantsFromVcf = VcfReader.GetAllVariantsInFile(file); Dictionary <string, Dictionary <int, List <Mapping> > > foundVariantDataByChrAndPos = new Dictionary <string, Dictionary <int, List <Mapping> > >(); foundVariantDataByChrAndPos["chr12"] = new Dictionary <int, List <Mapping> >(); foundVariantDataByChrAndPos["chr1"] = new Dictionary <int, List <Mapping> >(); foreach (VcfVariant vcfVar in allVariantsFromVcf) { string myChr = vcfVar.ReferenceName; int myPos = vcfVar.ReferencePosition; if ((myChr != "chr12") && (myChr != "chr1")) { continue; } if (!TableDataByChrAndPos.ContainsKey(myChr)) { continue; } //we have a Var or ref call at a position we care about if (TableDataByChrAndPos[myChr].ContainsKey(vcfVar.ReferencePosition)) { List <RefPanelEntry> panelEntryAtPOsition = TableDataByChrAndPos[myChr][myPos]; if (!foundVariantDataByChrAndPos[myChr].ContainsKey(myPos)) { foundVariantDataByChrAndPos[myChr][myPos] = new List <Mapping>(); } foreach (RefPanelEntry entry in panelEntryAtPOsition) { if (CheckForMatch(entry, vcfVar)) { Mapping mapping = new Mapping(); mapping.Var = vcfVar; mapping.Entry = entry; foundVariantDataByChrAndPos[myChr][myPos].Add(mapping); break; } } } } return(foundVariantDataByChrAndPos); }
public void GetNeighborhoodsFromMessyVCF() { var vcfFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "verymutated.genome.vcf"); var neighborhoodManager = CreateNbhdBuilder(_sourceVcf0); Assert.Equal(0, neighborhoodManager.GetBatchOfNeighborhoods(0).Count()); List <VcfVariant> LotsOfCoLocatedVariants = VcfReader.GetAllVariantsInFile(vcfFilePath); neighborhoodManager = CreateNbhdBuilder(_sourceVcf_Mutated); var neighborhoods = neighborhoodManager.GetBatchOfNeighborhoods(0); Assert.Equal(1, neighborhoods.Count()); Assert.Equal(12, neighborhoods.First().VcfVariantSites.Count()); }
public void GetNeighborhoodsFromMessyVCF() { var vcfFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "verymutated.genome.vcf"); var outFolder = Path.Combine(UnitTestPaths.TestDataDirectory, "Out"); var neighborhoodManager = CreateBuilder(new List <VcfVariant>() { }); Assert.Equal(0, neighborhoodManager.GetNeighborhoods().Count()); List <VcfVariant> LotsOfCoLocatedVariants = VcfReader.GetAllVariantsInFile(vcfFilePath); neighborhoodManager = CreateBuilder(LotsOfCoLocatedVariants); var neighborhoods = neighborhoodManager.GetNeighborhoods(); Assert.Equal(1, neighborhoods.Count()); Assert.Equal(12, neighborhoods.First().VcfVariantSites.Count()); }
public void Pisces_LowDepthTest() { List <ChrReference> chrRef = new List <ChrReference>() { new ChrReference() { Name = "chr19", Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA" } }; var options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, //IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, OutputDirectory = TestPaths.LocalTestDataDirectory, BamFilterParameters = new Domain.Options.BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MinimumVariantQScore = 20, MinimumCoverage = 1000, }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, } }; var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf"); var factory = new Factory(options); IGenome genomeRef; genomeRef = new MockGenome(chrRef, _genomeChr19); var bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); List <VcfVariant> coverage1000results = VcfReader.GetAllVariantsInFile(vcfFilePath); options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, // IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, OutputDirectory = TestPaths.LocalTestDataDirectory, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = false, } }; factory = new Factory(options); bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); List <VcfVariant> coverage10results = VcfReader.GetAllVariantsInFile(vcfFilePath); // Assert.NotEqual(coverage1000results.Count, coverage10results.Count); // Assert.Equal(coverage1000results.Count, 84); // Assert.Equal(coverage10results.Count, 100); }
private void Write_InFlow(bool threadByChr) { var bamFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "SBWriter_Sample_S1.bam"); var vcfFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "SBWriter_Sample_S1.genome.vcf"); var biasFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "SBWriter_Sample_S1.genome.ReadStrandBias.txt"); if (threadByChr) { biasFilePath = biasFilePath + "_chr19"; //Currently when threading by chrom we are outputting one bias file per chromsome. This is not a customer-facing deliverable and is a low-priority feature. } var expectedBiasResultsPath = Path.Combine(UnitTestPaths.TestDataDirectory, "Expected_Sample_S1.ReadStrandBias.txt"); var genomeDirectory = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr19"); var applicationOptions = new ApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { genomeDirectory }, OutputBiasFiles = true, DebugMode = true, OutputgVCFFiles = true, }; // Using GenomeProcessor //If OutputBiasFiles is true, should output one bias file per vcf var factory = new MockFactoryWithDefaults(applicationOptions); var genome = factory.GetReferenceGenome(genomeDirectory); CreateAndExecuteProcessor(threadByChr, factory, genome); Assert.True(File.Exists(biasFilePath)); //All variants that are present in VCF where ref!=alt should be included var biasFileContents = File.ReadAllLines(biasFilePath); var alleles = VcfReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => a.VariantAlleles[0] != ".").ToList(); foreach (var variantCall in variantCalls) { Console.WriteLine(variantCall); Assert.True(biasFileContents.Count(l => l.Split('\t')[0] == variantCall.ReferenceName && l.Split('\t')[1] == variantCall.ReferencePosition.ToString() && l.Split('\t')[2] == variantCall.ReferenceAllele && l.Split('\t')[3] == variantCall.VariantAlleles.First()) == 1); } foreach (var refCall in alleles.Where(a => a.VariantAlleles[0] == ".").ToList()) { Assert.False(biasFileContents.Count(l => l.Split('\t')[0] == refCall.ReferenceName && l.Split('\t')[1] == refCall.ReferencePosition.ToString() && l.Split('\t')[2] == refCall.ReferenceAllele && l.Split('\t')[3] == refCall.VariantAlleles.First()) == 1); } //Bias files should have expected contents var expectedBiasFileContents = File.ReadAllLines(expectedBiasResultsPath); Assert.Equal(expectedBiasFileContents, biasFileContents); //If OutputBiasFiles is false, should not output any bias files File.Delete(biasFilePath); applicationOptions.OutputBiasFiles = false; factory = new MockFactoryWithDefaults(applicationOptions); genome = factory.GetReferenceGenome(genomeDirectory); CreateAndExecuteProcessor(threadByChr, factory, genome); Assert.False(File.Exists(biasFilePath)); }
public void Execute( string bamFilePath, string vcfFilePath, string intervalPath, List <CalledAllele> expectedVariants, List <ChrReference> fakeReferences = null, bool doCheckVariants = true, bool doCheckReferences = false, int expectedNumCoveredPositions = 0, bool threadByChr = false, int doCountsOnly = 0, bool doLog = false, bool callMnvs = true, ApplicationOptions applicationOptions = null, bool collapse = true) { if (doCheckReferences) { vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf"); } if (applicationOptions == null) { applicationOptions = new ApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath }, GenomePaths = new[] { GenomeDirectory }, OutputgVCFFiles = doCheckReferences, OutputBiasFiles = true, DebugMode = doLog, MinimumBaseCallQuality = 20, CallMNVs = callMnvs, MaxGapBetweenMNV = 10, MaxSizeMNV = 15, Collapse = collapse }; } Logger.TryOpenLog(applicationOptions.LogFolder, applicationOptions.LogFileName); var factory = GetFactory(applicationOptions); IGenome genome; if (fakeReferences == null) { genome = factory.GetReferenceGenome(GenomeDirectory); } else { genome = new MockGenome(fakeReferences, GenomeDirectory); } if (threadByChr) { var processor = new GenomeProcessor(factory, genome, false); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } Logger.TryCloseLog(); var alleles = VcfReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => a.VariantAlleles[0] != ".").ToList(); if (doCheckVariants) { if (doCountsOnly > 0) { Assert.Equal(variantCalls.Count(), doCountsOnly); } else { CheckVariants(variantCalls, expectedVariants); } } if (doCheckReferences) { var referenceAlleles = alleles.Where(a => a.VariantAlleles[0] == ".").ToList(); // make sure no reference calls at variant positions Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition))); } }
/// <summary> /// MNV within overlap region's edge /// </summary> //[Fact] //[Trait("Category", "ReadStitching")] //public void MNV_PartialOverlap() //{ // var outputFileName = "Mnv-PartialOverlap_S1.genome.vcf"; // var test = new AmpliconMnvTest() // { // StitchPairedReads = true, // RequireXCTagToStitch = true, // ReadLength = 30, // ReferenceSequenceRelative = "GTTGGTCTTC" + "TATTTTATGCGAATTCTTCT" + "AAGATTCCCA", // VariantPositionRelative = 9, // ChangedSequence = "AAG", // VariantDepth = 25, // ReferenceDepth = 25 // }; // var expectedResults = new AmpliconTestResult() // { // VariantFrequency = 0.5f, // TotalDepth = 50, // VariantDepth = 25, // ReferenceDepth = 25 // }; // ExecuteTest(test, outputFileName, expectedResults); // test.RequireXCTagToStitch = false; // ExecuteTest(test, outputFileName, expectedResults); // // Note: don't test without stitch reads // // this test would fail because the two reads would detect different variants // // test.StitchPairedReads = false; // // ExecuteTest(test, outputFileName, expectedResults); // test.VariantDepth = 10; // test.ReferenceDepth = 90; // test.RequireXCTagToStitch = true; // expectedResults = new AmpliconTestResult() // { // VariantFrequency = 0.1f, // TotalDepth = 100, // VariantDepth = 10, // ReferenceDepth = 90 // }; // ExecuteTest(test, outputFileName, expectedResults); // test.RequireXCTagToStitch = false; // ExecuteTest(test, outputFileName, expectedResults); //} #region Helpers private void ExecuteTest(AmpliconTest test, string outputFileName, AmpliconTestResult expectedResult) { var appOptions = new ApplicationOptions() { BAMPaths = new[] { string.Empty }, GenomePaths = new[] { _genomeChr19 }, OutputFolder = UnitTestPaths.TestDataDirectory, OutputgVCFFiles = true, StitchReads = test.StitchPairedReads, RequireXCTagToStitch = test.RequireXCTagToStitch, CallMNVs = true, MaxSizeMNV = 3, MaxGapBetweenMNV = 1 }; var vcfOutputPath = Path.Combine(appOptions.OutputFolder, outputFileName); File.Delete(vcfOutputPath); test.ChrOffset = CHR_OFFSET; // test execution var factory = new AmpliconTestFactory(test.ReferenceSequenceAbsolute, test.StitchPairedReads); factory.ChrOffset = CHR_OFFSET; if (test is AmpliconInsertionTest) { factory.StageInsertion( test.ReadLength, ((AmpliconInsertionTest)test).InsertionSequence, test.VariantPositionAbsolute, test.VariantDepth, test.ReferenceDepth); } else if (test is AmpliconDeletionTest) { factory.StageDeletion( test.ReadLength, ((AmpliconDeletionTest)test).NumberDeletedBases, test.VariantPositionAbsolute, test.VariantDepth, test.ReferenceDepth); } else if (test is AmpliconMnvTest) { factory.StageMnv( test.ReadLength, ((AmpliconMnvTest)test).ChangedSequence, test.VariantPositionAbsolute, test.VariantDepth, test.ReferenceDepth); } CallVariantsWithMockData(vcfOutputPath, appOptions, factory); var results = GetResults(VcfReader.GetAllVariantsInFile(vcfOutputPath)); Assert.True(results.Count == 1); var result = results[0]; if (test is AmpliconInsertionTest) { Assert.True(result.VariantAllele.Substring(1) == ((AmpliconInsertionTest)test).InsertionSequence); } else if (test is AmpliconDeletionTest) { Assert.True(result.ReferenceAllele.Length == ((AmpliconDeletionTest)test).NumberDeletedBases + 1); } else if (test is AmpliconMnvTest) { Assert.True(result.VariantAllele == ((AmpliconMnvTest)test).ChangedSequence); } Assert.True(result.Filters == "PASS"); Assert.True(result.Filters.Split(',').Count() == 1); Assert.True(result.Position == test.VariantPositionAbsolute); VerifyEqual(result.VariantFrequency, expectedResult.VariantFrequency, 0.1f); VerifyEqual(result.TotalDepth, expectedResult.TotalDepth, 1f); VerifyEqual(result.VariantDepth, expectedResult.VariantDepth, 1f); VerifyEqual(result.ReferenceDepth, expectedResult.ReferenceDepth, 1f); }
public void TestForStrandBiasOnStitchingScenarios(StitchingScenario scenario, string resultFile) { //limit the scope of concern for now. if (scenario.ShouldRefStitch != true) { return; } //limit the scope of concern for now. if (scenario.ShouldStitch != true) { return; } var resultsSummary = Path.Combine(Options.OutputFolder, StrandBiasSummaryFileName); using (StreamWriter sw = new StreamWriter(resultsSummary, true)) { var sb = new StringBuilder( string.Join(",", DateTime.Today.ToShortDateString(), DateTime.Now.ToLongTimeString(), scenario.Category, scenario.Id)); try { if (!Directory.Exists(Options.OutputFolder)) { Directory.CreateDirectory(Options.OutputFolder); } var factory = new AmpliconTestFactory(new string('A', 100), sourceIsStitched: true); byte qualityForAll = 30; int numVariantCounts = 2; // 10; int numReferenceCounts = 2; // 90; var varRead = BuildRead(scenario.OutputRead1, qualityForAll, StageMNVdata(scenario)); var refRead = BuildRead(scenario.OutputRefRead1, qualityForAll, NoMNVdata(scenario)); if (refRead == null) { return; } factory.StageStitchedVariant( varRead, numVariantCounts, refRead, numReferenceCounts); var outputFileName = string.Format("{0}_{1}.vcf", scenario.Category, scenario.Id); var vcfOutputPath = Path.Combine(Options.OutputFolder, outputFileName); var biasOutputPath = StrandBiasFileWriter.GetBiasFilePath(vcfOutputPath); File.Delete(vcfOutputPath); File.Delete(biasOutputPath); StitchedReadBiasHelper.CallStrandedVariantsWithMockData(vcfOutputPath, Options, factory); var varResults = StitchedReadBiasHelper.GetResults(VcfReader.GetAllVariantsInFile(vcfOutputPath)); var biasResults = StitchedReadBiasHelper.GetStrandResultsFromFile(biasOutputPath); var observedFrequency = (varResults.Count == 0) ? "0": ""; var observedSB = (biasResults.Count == 0) ? "FN": ""; for (int i = 0; i < varResults.Count; i++) { var varResult = varResults[i]; if (i != 0) { observedFrequency += ";"; } observedFrequency += varResult.VariantFrequency; } for (int i = 0; i < biasResults.Count; i++) { var biasResult = biasResults[i]; if (i != 0) { observedSB += ";"; } observedSB += biasResult.HasStrandBias; //there should be no SB on our current set of stitched scenarios. Assert.True(!biasResult.HasStrandBias); } var expectedValues = new List <string>() { "1", scenario.Frequency, scenario.ShouldBias }; var observedValues = new List <string>() { varResults.Count.ToString(), observedFrequency, observedSB }; sb.Append(GetResultString(expectedValues, observedValues)); sw.WriteLine(sb.ToString()); } catch (Exception ex) { sb.Append(",Fail: " + ex); sw.WriteLine(sb.ToString()); } } }
public void UnpackAlleles() { //two example vcf files that have been "crushed". var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf"); var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf"); var vcfVariants1 = VcfReader.GetAllVariantsInFile(crushedVcf1); var vcfVariants2 = VcfReader.GetAllVariantsInFile(crushedVcf2); Assert.Equal(7, vcfVariants1.Count); Assert.Equal(90, vcfVariants2.Count); // 1/2 variants var hetAlt1 = vcfVariants1[5]; var hetAlt2 = vcfVariants2[3]; var hetAlt1next = vcfVariants1[6]; var hetAlt2next = vcfVariants2[4]; Assert.Equal(1, hetAlt1.Genotypes.Count); Assert.Equal(1, hetAlt2.Genotypes.Count); Assert.Equal(2, hetAlt1.VariantAlleles.Count()); Assert.Equal(2, hetAlt2.VariantAlleles.Count()); Assert.Equal("2387,2000", hetAlt1.Genotypes[0]["AD"]); Assert.Equal("0.8133", hetAlt1.Genotypes[0]["VF"]); Assert.Equal("254,254", hetAlt2.Genotypes[0]["AD"]); Assert.Equal("AA", hetAlt1.ReferenceAllele); Assert.Equal("GA", hetAlt1.VariantAlleles[0]); Assert.Equal("G", hetAlt1.VariantAlleles[1]); Assert.Equal(".", hetAlt1next.VariantAlleles[0]); Assert.Equal("0", hetAlt1next.Genotypes[0]["AD"]); Assert.Equal("532", hetAlt2next.Genotypes[0]["AD"]); Assert.Equal(10, hetAlt1.ReferencePosition); Assert.Equal(223906731, hetAlt2.ReferencePosition); Assert.Equal(10 + 1, hetAlt1next.ReferencePosition); Assert.Equal(223906731 + 1, hetAlt2next.ReferencePosition); var unpackedVariants1 = VcfVariantUtilities.UnpackVariants(vcfVariants1); var unpackedVariants2 = VcfVariantUtilities.UnpackVariants(vcfVariants2); Assert.Equal(8, unpackedVariants1.Count); Assert.Equal(91, unpackedVariants2.Count); hetAlt1 = unpackedVariants1[5]; hetAlt2 = unpackedVariants2[3]; hetAlt1next = unpackedVariants1[6]; hetAlt2next = unpackedVariants2[4]; //example one: //total depth = 5394, total variant count = 2387 + 2000 = 4387 //so, ref counts ~1007. //example two: //total depth = 532, total variant count = 254 + 254 = 508 //so, ref counts ~24. Assert.Equal(1, hetAlt1.Genotypes.Count); Assert.Equal(1, hetAlt2.Genotypes.Count); Assert.Equal("1007,2387", hetAlt1.Genotypes[0]["AD"]); Assert.Equal("24,254", hetAlt2.Genotypes[0]["AD"]); Assert.Equal("0.4425", hetAlt1.Genotypes[0]["VF"]); Assert.Equal(1, hetAlt1.VariantAlleles.Count()); Assert.Equal(1, hetAlt2.VariantAlleles.Count()); Assert.Equal(1, hetAlt1next.VariantAlleles.Count()); Assert.Equal(1, hetAlt2next.VariantAlleles.Count()); Assert.Equal("1007,2000", hetAlt1next.Genotypes[0]["AD"]); Assert.Equal("24,254", hetAlt2next.Genotypes[0]["AD"]); Assert.Equal("AA", hetAlt1.ReferenceAllele); Assert.Equal("GA", hetAlt1.VariantAlleles[0]); Assert.Equal("G", hetAlt1next.VariantAlleles[0]); Assert.Equal("0.3708", hetAlt1next.Genotypes[0]["VF"]); Assert.Equal(10, hetAlt1.ReferencePosition); Assert.Equal(223906731, hetAlt2.ReferencePosition); Assert.Equal(10, hetAlt1next.ReferencePosition); Assert.Equal(223906731, hetAlt2next.ReferencePosition); }
public void VennVcf_CombineTwoPoolVariants_ProbePoolBias_Tests() { //this is from an issue anita had where a variant was in one pool at 1%, the other at 0%, and showed up as 6% in the combined pool. var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "small_S14.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "small_S17.genome.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf"); parameters.OutputDirectory = outDir; if (File.Exists(parameters.ConsensusFileName)) { File.Delete(parameters.ConsensusFileName); } VennProcessor Venn = new VennProcessor(new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); Venn.DoPairwiseVenn(false); Assert.Equal(File.Exists(parameters.ConsensusFileName), true); List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(parameters.ConsensusFileName); List <VcfVariant> AandBVariants = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_and_S17.vcf")); List <VcfVariant> BandAVariants = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_and_S14.vcf")); List <VcfVariant> AnotBVariants = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_not_S17.vcf")); List <VcfVariant> BnotAVariants = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_not_S14.vcf")); //poolA //chr1 115258743 . A . 100 PASS DP=35354 GT:GQ:AD:VF:NL:SB 0/0:100:30256:0.1442:20:-100.0000 //chr1 115258743 . AC TT 100 PASS DP=35354 GT:GQ:AD:VF:NL:SB 0/1:100:30720,4634:0.1311:20:-100.0000 //chr1 115258744 . C . 100 PASS DP=35253 GT:GQ:AD:VF:NL:SB 0/0:100:30277:0.1412:20:-100.0000 //chr1 115258745 . C . 100 PASS DP=35160 GT:GQ:AD:VF:NL:SB 0/0:100:35130:0.0009:20:-100.0000 //poolB //chr1 115258743 . AC TT 100 PASS DP=49612 GT:GQ:AD:VF:NL:SB 0/1:100:44202,5410:0.1090:20:-100.0000 //chr1 115258743 . A T 100 PASS DP=49612 GT:GQ:AD:VF:NL:SB 0/1:100:43362,670:0.0135:20:-46.0807 //chr1 115258744 . C T 24 PASS DP=49902 GT:GQ:AD:VF:NL:SB 0/1:24:43905,560:0.0112:20:-8.3857 //when we had bug: //chr1 115258743 . AC TT 100.00 PASS DP=84966 GT:GQ:AD:VF:NL: 0/1:100:74922,10044:0.1182:20:-100:-100.0000:100 //chr1 115258743 . A T 100.00 PB;LowVF DP=49612 GT:GQ:AD:VF:NL: ./.:100:43362,670:0.0135:20:-46.0807:0.0000:100 //chr1 115258743 . A . 100.00 PASS DP=35354 GT:GQ:AD:VF:NL: 0/0:100:30256:0.1442:20:-100.0000:-100.0000:100 //chr1 115258744 . C T 100.00 PB DP=85155 GT:GQ:AD:VF:NL: 0/1:100:74182,5536:0.0650:20:- //(issue#1) at 743 we had a A->. in only one pool. It should be marked as BIAS and not PASS. //(issue#2) at 744 we had a C->T at 6% when it should be at ~0%, and called as a ref. string VFstring = ""; VcfVariant FunnyResult0 = CombinedVariants[3]; VFstring = FunnyResult0.Genotypes[0]["VF"]; Assert.Equal(VFstring, "0.144"); Assert.Equal(FunnyResult0.Filters, "PB"); Assert.Equal(FunnyResult0.ReferenceAllele, "A"); Assert.Equal(FunnyResult0.VariantAlleles[0], "."); //this used to be a reference as a pass, even though it was only called in one pool. VcfVariant FunnyResult = CombinedVariants[6]; Assert.Equal(FunnyResult.ReferencePosition, 115258744); VFstring = FunnyResult.Genotypes[0]["VF"]; Assert.Equal(VFstring, "0.129"); Assert.Equal(FunnyResult.Filters, "PASS"); Assert.Equal(FunnyResult.ReferenceAllele, "C"); Assert.Equal(FunnyResult.VariantAlleles[0], "."); //when we had the bug, this used to get called at 6%. //now, check the Venn functionality: Assert.Equal(2, AandBVariants.Count()); Assert.Equal(2, BandAVariants.Count()); Assert.Equal(2, AnotBVariants.Count()); Assert.Equal(0, BnotAVariants.Count()); Assert.Equal(115258743, AandBVariants[0].ReferencePosition); Assert.Equal("AC", AandBVariants[0].ReferenceAllele); Assert.Equal("TT", AandBVariants[0].VariantAlleles[0]); Assert.Equal(115258747, AandBVariants[1].ReferencePosition); Assert.Equal("C", AandBVariants[1].ReferenceAllele); Assert.Equal("T", AandBVariants[1].VariantAlleles[0]); Assert.Equal(115258743, BandAVariants[0].ReferencePosition); Assert.Equal("AC", BandAVariants[0].ReferenceAllele); Assert.Equal("TT", BandAVariants[0].VariantAlleles[0]); Assert.Equal(115258747, BandAVariants[1].ReferencePosition); Assert.Equal("C", BandAVariants[1].ReferenceAllele); Assert.Equal("T", BandAVariants[1].VariantAlleles[0]); Assert.Equal(115258743, AnotBVariants[0].ReferencePosition); Assert.Equal("A", AnotBVariants[0].ReferenceAllele); Assert.Equal("T", AnotBVariants[0].VariantAlleles[0]); Assert.Equal(115258744, AnotBVariants[1].ReferencePosition); Assert.Equal("C", AnotBVariants[1].ReferenceAllele); Assert.Equal("T", AnotBVariants[1].VariantAlleles[0]); }
public void VennVcf_CombineTwoPoolVariants_RulesAthroughD_Tests() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string OutputPath = Path.Combine(outDir, "outEandF.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S7.genome.vcf"); List <CalledAllele> PoolAVariants = VcfVariantUtilities.Convert(VcfReader.GetAllVariantsInFile(VcfPath_PoolA)).ToList(); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S8.genome.vcf"); List <CalledAllele> PoolBVariants = VcfVariantUtilities.Convert(VcfReader.GetAllVariantsInFile(VcfPath_PoolB)).ToList(); CalledAllele VariantA = PoolAVariants[0]; CalledAllele VariantB = PoolBVariants[0]; List <CalledAllele[]> pairs = VennProcessor.SelectPairs( new List <CalledAllele>() { VariantA }, new List <CalledAllele> { VariantB }); VariantComparisonCase ComparisonCase = VennProcessor.GetComparisonCase(pairs[0][0], pairs[0][1]); ConsensusBuilder consensusBuilder = new ConsensusBuilder("", parameters); CalledAllele Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); //Rule "A" test //A if combined VF<1% and less than 2.6% in each pool, call REF //(note, we were Alt in one pool and ref in another) Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(VariantA.Frequency, 0.9979, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantB.Frequency, 0.0173, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(Consensus.Frequency, 0.9907, 4); Assert.Equal(Consensus.VariantQscore, 100); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { }); //<-low VF tag will NOT added by post-processing b/c is ref call //B if combined VF<1% and more than 2.6% in one pool, call NO CALL VariantA = PoolAVariants[1]; VariantB = PoolBVariants[1]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.0776, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(VariantB.Frequency, 0.9989, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall); Assert.Equal(Consensus.Frequency, 0.0070, 4); Assert.Equal(Consensus.VariantQscore, 0); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { Pisces.Domain.Types.FilterType.PoolBias }); //<-low VF tag will also get added by post-processing //Rule "Ca" test //C-a if combined 1%<VF<2.6% // and more than 2.6% in one pool and less than 1% in the other, call NO CALL w/PB VariantA = PoolAVariants[2]; VariantB = PoolBVariants[2]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.0367, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef); Assert.Equal(VariantB.Frequency, 0.9976, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall); Assert.Equal(Consensus.Frequency, 0.0117, 4); Assert.Equal(Consensus.VariantQscore, 23); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { Pisces.Domain.Types.FilterType.PoolBias }); //Rule "Cb" test //C-a if combined 1%<VF<2.6% // and more than 2.6% in one pool and between 1% and 2.6% in the other, call NO CALL w/ no PB VariantA = PoolAVariants[3]; VariantB = PoolBVariants[3]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.01725, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantB.Frequency, 0.03667, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall); Assert.Equal(Consensus.Frequency, 0.02347, 4); Assert.Equal(Consensus.VariantQscore, 100); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { }); //<-low VF tag will also get added by post-processing //Rule "D" test //D if combined VF>=2.6% call VARIANT (PB if only present in one pool, using 1% as the cutoff) VariantA = PoolAVariants[4]; VariantB = PoolBVariants[4]; ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB); Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantA.Frequency, 0.2509, 4); Assert.Equal(VariantA.VariantQscore, 100); Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(VariantB.Frequency, 0.0367, 4); Assert.Equal(VariantB.VariantQscore, 100); Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> { }); Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate); Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef); Assert.Equal(Consensus.Frequency, 0.1716, 4); Assert.Equal(Consensus.VariantQscore, 100); Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> { }); //<-low VF tag will also get set by post processor }
public void StitchedCollapsedBamGroundTruth() { // SNP ground truth from TingTing var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "collapsed.test.stitched.bam"); var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup(); functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr1"); var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr1") }, OutputBiasFiles = true, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, MaxGapBetweenMNV = 10, NoiseModelHalfWindow = 1, BamFilterParameters = new Domain.Options.BamFilterParameters() { MinimumBaseCallQuality = 20, MinimumMapQuality = 1, OnlyUseProperPairs = false, }, VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MaximumVariantQScore = 100, MinimumVariantQScoreFilter = 30, MinimumVariantQScore = 20, MinimumCoverage = 10, MinimumFrequency = 0.01f, FilterOutVariantsPresentOnlyOneStrand = false, ForcedNoiseLevel = -1, NoiseModel = NoiseModel.Flat, StrandBiasModel = StrandBiasModel.Extended, }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, ReportRcCounts = true, ReportTsCounts = true } }; // Time to build the fake sequences for testing. var mockChrRef = new List <ChrReference>() { new ChrReference() { // position 9770498 ~ 9770669 Name = "chr1", Sequence = new string('N', 9770498 - 1) + "GAAGTAACAACGCAGGATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGGTATGGCCTCCATC" } }; var expectedAlleles = new List <CalledAllele> { new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 9770596, ReferenceAllele = "C", AlternateAllele = "A", Chromosome = "chr1" } }; functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions); var truthvcfFilePath = Path.Combine(Path.GetDirectoryName(appOptions.BAMPaths[0]), "test_truth.stitched.genome.vcf"); var stitchedCollapsedTruth = VcfReader.GetAllVariantsInFile(truthvcfFilePath); var stitchedCollapsedResults = VcfReader.GetAllVariantsInFile(Path.ChangeExtension(bamFilePath, "genome.vcf")); foreach (var variantTruth in stitchedCollapsedTruth) { var variantActual = stitchedCollapsedResults.First(x => x.ReferencePosition == variantTruth.ReferencePosition); Assert.NotNull(variantActual); Assert.Equal(variantTruth.Genotypes[0]["US"], variantActual.Genotypes[0]["US"]); } }
public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests() { //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. //Rule "F" test (ie various alt calls all ended up as no-call. we dont want multiple no call lines in the vcf.) //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf"); string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf"); string OutputPath = Path.Combine(outDir, "outEandF.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor VennVcf = new VennProcessor( new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters); VennVcf.DoPairwiseVenn(false); Assert.Equal(File.Exists(OutputPath), true); List <VcfVariant> PoolAVariants = VcfReader.GetAllVariantsInFile(VcfPath_PoolA); List <VcfVariant> PoolBVariants = VcfReader.GetAllVariantsInFile(VcfPath_PoolB); List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath); //Rule "E" test (ie an Alt+ref call converges to a REf, and we also had a ref call following it) //E if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call. VcfVariant VariantA_1 = PoolAVariants[0]; Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/0"); Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0021"); Assert.Equal(VariantA_1.Quality, 100); Assert.Equal(VariantA_1.Filters, "PASS"); Assert.Equal(VariantA_1.ReferencePosition, 25378561); VcfVariant VariantA_2 = PoolAVariants[1]; Assert.Equal(VariantA_2.ReferencePosition, 25378562); VcfVariant VariantB_1 = PoolBVariants[0]; Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0173"); Assert.Equal(VariantB_1.Quality, 100); Assert.Equal(VariantB_1.Filters, "PASS"); Assert.Equal(VariantB_1.ReferencePosition, 25378561); VcfVariant VariantB_2 = PoolBVariants[1]; Assert.Equal(VariantB_2.Genotypes[0]["GT"], "0/0"); Assert.Equal(VariantB_2.Genotypes[0]["VF"], "0.0021"); Assert.Equal(VariantB_2.Quality, 100); Assert.Equal(VariantB_2.Filters, "PASS"); Assert.Equal(VariantB_2.ReferencePosition, 25378561); VcfVariant Consensus_1 = CombinedVariants[0]; Assert.Equal(Consensus_1.Genotypes[0]["GT"], "0/0"); Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.009"); //slightly improved from .008 Assert.Equal(Consensus_1.Quality, 100); Assert.Equal(Consensus_1.Filters, "PASS"); //<-low VF tag will NOT added by post-processing b/c is ref call Assert.Equal(Consensus_1.ReferencePosition, 25378561); VcfVariant Consensus_2 = CombinedVariants[1]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); //Rule "F" test (ie various alt calls all ended up as no-call. //F if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate VariantA_1 = PoolAVariants[1]; Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0725"); Assert.Equal(VariantA_1.Quality, 100); Assert.Equal(VariantA_1.Filters, "PASS"); Assert.Equal(VariantA_1.ReferencePosition, 25378562); VariantA_2 = PoolAVariants[2]; Assert.Equal(VariantA_2.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantA_2.Genotypes[0]["VF"], "0.0725"); Assert.Equal(VariantA_2.Quality, 100); Assert.Equal(VariantA_2.Filters, "PASS"); Assert.Equal(VariantA_2.ReferencePosition, 25378562); VcfVariant VariantA_3 = PoolAVariants[3]; Assert.Equal(VariantA_3.Genotypes[0]["GT"], "0/1"); Assert.Equal(VariantA_3.Genotypes[0]["VF"], "0.0725"); Assert.Equal(VariantA_3.Quality, 100); Assert.Equal(VariantA_3.Filters, "PASS"); Assert.Equal(VariantA_3.ReferencePosition, 25378562); VariantB_1 = PoolBVariants[2]; Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/0"); Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0024"); Assert.Equal(VariantB_1.Quality, 100); Assert.Equal(VariantB_1.Filters, "PASS"); Assert.Equal(VariantB_1.ReferencePosition, 25378562); VariantB_2 = PoolBVariants[3]; Assert.Equal(VariantB_2.ReferencePosition, 25378563); Consensus_1 = CombinedVariants[1]; Assert.Equal(Consensus_1.ReferencePosition, 25378562); Assert.Equal(Consensus_1.Genotypes[0]["GT"], "./."); Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.007"); Assert.Equal(Consensus_1.Quality, 0); Assert.Equal(Consensus_1.Filters, "PB"); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_1.ReferenceAllele, "C"); Assert.Equal(Consensus_1.VariantAlleles[0], "T"); Consensus_2 = CombinedVariants[2]; Assert.Equal(Consensus_2.ReferencePosition, 25378562); Assert.Equal(Consensus_2.Genotypes[0]["GT"], "./."); Assert.Equal(Consensus_2.Genotypes[0]["VF"], "0.007"); Assert.Equal(Consensus_2.Quality, 0); Assert.Equal(Consensus_2.Filters, "PB"); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_2.ReferenceAllele, "C"); Assert.Equal(Consensus_2.VariantAlleles[0], "TT"); VcfVariant Consensus_3 = CombinedVariants[3]; Assert.Equal(Consensus_3.ReferencePosition, 25378562); Assert.Equal(Consensus_3.Genotypes[0]["GT"], "./."); Assert.Equal(Consensus_3.Genotypes[0]["VF"], "0.007"); Assert.Equal(Consensus_3.Quality, 0); Assert.Equal(Consensus_3.Filters, "PB"); //<-low VF tag will also get added by post-processing Assert.Equal(Consensus_3.ReferenceAllele, "CC"); Assert.Equal(Consensus_3.VariantAlleles[0], "T"); VcfVariant Consensus_4 = CombinedVariants[4]; Assert.Equal(Consensus_4.ReferencePosition, 25378563); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } }