public void CallThroughAnEmptyNbhd() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(0, acceptedMNVs.Count); Assert.Equal(2, acceptedRefs.Count); Assert.Equal(Genotype.HomozygousRef, acceptedRefs[123].Genotype); Assert.Equal(Genotype.HomozygousRef, acceptedRefs[124].Genotype); Assert.Equal(123, acceptedRefs[123].Coordinate); Assert.Equal(124, acceptedRefs[124].Coordinate); }
public static List <VeadGroup> GetSampleVeadGroups(int numVeads = 4, int numVeadGroups = 1, bool useAlternateVariantSites = false, string prefix = "") { var veadgroups = new List <VeadGroup>(); for (int i = 0; i < numVeadGroups; i++) { var veads = new List <Vead>(); for (int j = 0; j < numVeads; j++) { Vead vead; if (useAlternateVariantSites) { vead = PhasedVariantTestUtilities.CreateVeadFromStringArray(prefix + "r" + i * j, new[, ] { { "C", "C" }, { "G", "A" } }); } else { vead = PhasedVariantTestUtilities.CreateVeadFromStringArray(prefix + "r" + i * j, new[, ] { { "A", "T" }, { "G", "C" } }); } veads.Add(vead); } veadgroups.Add(PhasedVariantTestUtilities.CreateVeadGroup(veads)); } return(veadgroups); }
public void AddMnvsFromClusters() { //TODO even with mock cluster this takes too much setting up. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T"); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; nbhd.ReferenceSequence = "CGT"; var mockCluster = new Mock <ICluster>(); mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 }); var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads); mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults); mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>() { consensus }); nbhd.AddMnvsFromClusters(new List <ICluster>() { mockCluster.Object }, 20, 100); var allele = nbhd.CandidateVariants.First(); Assert.Equal(6, allele.TotalCoverage); Assert.Equal(6, allele.AlleleSupport); Assert.Equal("CGT", allele.Reference); Assert.Equal("AAA", allele.Alternate); var depths = nbhd.DepthAtSites(new List <ICluster>() { mockCluster.Object }); Assert.Equal(3, depths.Length); Assert.Equal(3, depths[0]); Assert.Equal(3, depths[1]); Assert.Equal(3, depths[2]); }
public void SetDepthAtSites() { var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T"); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; }
private Mock <IVeadGroupSource> MockVeadSource() { var returnVeads = new List <VeadGroup> { new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new [, ] { { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" } })), new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new [, ] { { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" } })), new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new [, ] { { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" } })), }; var veadSource = new Mock <IVeadGroupSource>(); veadSource.Setup(s => s.GetVeadGroups(It.IsAny <VcfNeighborhood>())).Returns(returnVeads); return(veadSource); }
private List <VcfNeighborhood> GetNeighborhoods(int expectedNumberOfThreads) { var neighborhoods = new List <VcfNeighborhood>(); for (var i = 0; i < expectedNumberOfThreads; i++) { var neighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T") { VcfVariantSites = new List <VariantSite> { new VariantSite(123) { ReferenceName = "chr1", OriginalAlleleFromVcf = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156) //orignally at index 0 }, } }; neighborhoods.Add(neighborhood); } return(neighborhoods); }
public void WriteHeader() { //WriteHeader should write the original header and add a line about phaser used right before the column headers var writer = InitializeWriter(false); writer.WriteHeader(); writer.Dispose(); Assert.Throws <Exception>(() => writer.WriteHeader()); Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156) })); writer.Dispose(); Assert.True(File.Exists(_outputFile)); var fileLines = File.ReadAllLines(_outputFile); Assert.Equal(_origHeader[0], fileLines[0]); Assert.Equal(_origHeader[1], fileLines[1]); Assert.NotEqual(_origHeader[2], fileLines[2]); Assert.True(fileLines[2].StartsWith("##VariantPhaser=Scylla")); Assert.Equal(_origHeader[2], fileLines[4]); }
public void VarCallsBecomeRefsAndNulls() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T", VariantQscore = 35, AlleleSupport = 10, TotalCoverage = 50 }); nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "844" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up and there are refs remaining, we should output it as a ref. nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { { 124, 100 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up all the way // we should output it as a null. nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { { 124, 1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." }, { "DP", "1000" }, { "AD", "0" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); }
public void CallAVariantInANewLocation() { //set up the original variants var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var vs3 = new VariantSite(originalVcfVariant3); var vs4 = new VariantSite(originalVcfVariant4); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", vs1, vs2, ""); nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not goig to get used for phasing. Sps it is a variant that failed filters. nbhd.SetRangeOfInterest(); //now stage one candidate MNV: var newMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", Coordinate = 129, Reference = "A", Alternate = "TT" }; nbhd.AddAcceptedPhasedVariant(newMNV); nbhd.UsedRefCountsLookup = new Dictionary <int, int>() { { 124, 1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[129].Count); Assert.Equal(3, acceptedRefs.Count); VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]); }
public void AddMnvsFromClusters() { //TODO even with mock cluster this takes too much setting up. var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121)); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; vead.SiteResults[0].VcfReferencePosition = 1; vead.SiteResults[1].VcfReferencePosition = 2; vead.SiteResults[2].VcfReferencePosition = 3; vead2.SiteResults[0].VcfReferencePosition = 1; vead2.SiteResults[1].VcfReferencePosition = 2; vead2.SiteResults[2].VcfReferencePosition = 3; vead3.SiteResults[0].VcfReferencePosition = 1; vead3.SiteResults[1].VcfReferencePosition = 2; vead3.SiteResults[2].VcfReferencePosition = 3; var mockCluster = new Mock <ICluster>(); mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 }); var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads); mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults); mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>() { consensus }); var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); callableNeighborhood.NbhdReferenceSequenceSubstring = "CGT"; callableNeighborhood.CreateMnvsFromClusters(new List <ICluster>() { mockCluster.Object }, 20); var allele = callableNeighborhood.CandidateVariants.First(); Assert.Equal(6, allele.TotalCoverage); Assert.Equal(6, allele.AlleleSupport); Assert.Equal("CGT", allele.ReferenceAllele); Assert.Equal("AAA", allele.AlternateAllele); int[] depths = new int[0]; int[] nocalls = new int[0]; callableNeighborhood.DepthAtSites(new List <ICluster>() { mockCluster.Object }, out depths, out nocalls); Assert.Equal(3, depths.Length); Assert.Equal(3, depths[0]); Assert.Equal(3, depths[1]); Assert.Equal(3, depths[2]); }
public void WriteANbhd() { var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf"); var inputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerInput.vcf"); var expectedFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerOutput.vcf"); File.Delete(outputFilePath); var context = new VcfWriterInputContext { CommandLine = new[] { "myCommandLine" }, SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Somatic, AllowMultipleVcfLinesPerLoci = true }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>() { }, null); var reader = new VcfReader(inputFilePath, true); //set up the original variants var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156); var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156); var originalVcfVariant5 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156); var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); //have to replace variants at positon 116380048 (we call two new MNVS here) var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), "chr2", vs1, vs2, ""); nbhd1.SetRangeOfInterest(); //have to replace variants at positon 116380051 and 52 (we call one new MNV at 51) var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), "chr7", vs4, vs5, ""); nbhd2.SetRangeOfInterest(); VcfMerger merger = new VcfMerger(reader); List <CalledAllele> allelesPastNbh = new List <CalledAllele>(); nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant1.Coordinate, new List <CalledAllele> { originalVcfVariant1, originalVcfVariant2 } } }; nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant4.Coordinate, new List <CalledAllele> { originalVcfVariant4 } } }; allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName); allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh); allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName); allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh); merger.WriteRemainingVariants(writer, allelesPastNbh); writer.Dispose(); var expectedLines = File.ReadLines(expectedFilePath).ToList(); var outputLines = File.ReadLines(outputFilePath).ToList(); Assert.Equal(expectedLines.Count(), outputLines.Count()); for (int i = 0; i < expectedLines.Count; i++) { Assert.Equal(expectedLines[i], outputLines[i]); } }
public void Write() { //write a normal vcf var writer = InitializeWriter(false); //Writer should order the variants by chrom, coord, ref, then alt. var variants = new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "T", "A", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr8", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr9", 124, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chrM", 123, "A", "C", 1000, 156), }; // Order should be: var expected = new List <string> { "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tT\tA\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tT\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000" }; writer.Write(variants); writer.Dispose(); Assert.Throws <Exception>(() => writer.WriteHeader()); Assert.Throws <Exception>(() => writer.Write(new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "G", 1000, 156) })); writer.Dispose(); var fileLines = File.ReadAllLines(_outputFile); Assert.Equal(variants.Count, fileLines.Length); for (int i = 0; i < expected.Count; i++) { Assert.Equal(expected[i], fileLines[i]); } //write a crushed vcf writer = InitializeWriter(true); writer.Write(variants); writer.Dispose(); fileLines = File.ReadAllLines(_outputFile); expected = new List <string> { "chrM\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr8\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t123\t.\tA\tC,A,C\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr9\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chr10\t124\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000", "chrX\t123\t.\tA\tC\t100\tPASS\tDP=1000\tGT:GQ:AD:DP:VF:NL:SB:NC\t0/1:0:844,156:1000:0.156:0:0.0000:0.0000" }; Assert.Equal(6, fileLines.Length); //only variants at diff positions for (int i = 0; i < expected.Count; i++) { Assert.Equal(expected[i], fileLines[i]); } }
public void OrderVariants() { var chr10 = PhasedVariantTestUtilities.CreateDummyVariant("chr10", 123, "A", "C", 1000, 156); var chrX = PhasedVariantTestUtilities.CreateDummyVariant("chrX", 123, "A", "C", 1000, 156); var chrXSecond = PhasedVariantTestUtilities.CreateDummyVariant("chrX", 124, "A", "C", 1000, 156); var chrM = PhasedVariantTestUtilities.CreateDummyVariant("chrM", 123, "A", "C", 1000, 156); var chrMSecond = PhasedVariantTestUtilities.CreateDummyVariant("chrM", 124, "A", "C", 1000, 156); var chr9 = PhasedVariantTestUtilities.CreateDummyVariant("chr9", 123, "A", "C", 1000, 156); var chr9Second = PhasedVariantTestUtilities.CreateDummyVariant("chr9", 124, "A", "C", 1000, 156); var nonstandardChrZ = PhasedVariantTestUtilities.CreateDummyVariant("chrZ", 123, "A", "C", 1000, 156); var nonstandardChrA = PhasedVariantTestUtilities.CreateDummyVariant("chrA", 123, "A", "C", 1000, 156); // --------------------------------------------------------------------------- // When neither or both is on chrM, shouldn't matter if we set option to prioritize chrM // --------------------------------------------------------------------------- // Same chrom, different positions - numeric chrom Assert.Equal(-1, Extensions.OrderVariants(chr9, chr9Second, true)); Assert.Equal(-1, Extensions.OrderVariants(chr9, chr9Second, false)); Assert.Equal(1, Extensions.OrderVariants(chr9Second, chr9, true)); Assert.Equal(1, Extensions.OrderVariants(chr9Second, chr9, false)); // Same chrom, different positions - chrX Assert.Equal(-1, Extensions.OrderVariants(chrX, chrXSecond, true)); Assert.Equal(-1, Extensions.OrderVariants(chrX, chrXSecond, false)); Assert.Equal(1, Extensions.OrderVariants(chrXSecond, chrX, true)); Assert.Equal(1, Extensions.OrderVariants(chrXSecond, chrX, false)); // Same chrom, different positions - chrM Assert.Equal(-1, Extensions.OrderVariants(chrM, chrMSecond, true)); Assert.Equal(-1, Extensions.OrderVariants(chrM, chrMSecond, false)); Assert.Equal(1, Extensions.OrderVariants(chrMSecond, chrM, true)); Assert.Equal(1, Extensions.OrderVariants(chrMSecond, chrM, false)); // Different chroms, one is >=10 (direct string compare would not sort these chroms correctly) Assert.Equal(-1, Extensions.OrderVariants(chr9, chr10, true)); Assert.Equal(-1, Extensions.OrderVariants(chr9, chr10, false)); // One numeric, one chrX Assert.Equal(-1, Extensions.OrderVariants(chr9, chrX, true)); Assert.Equal(-1, Extensions.OrderVariants(chr9, chrX, false)); // Same chrom, same position Assert.Equal(0, Extensions.OrderVariants(chr9, chr9, true)); Assert.Equal(0, Extensions.OrderVariants(chrX, chrX, true)); Assert.Equal(0, Extensions.OrderVariants(chrM, chrM, true)); // --------------------------------------------------------------------------- // If one is on chrM, option to prioritize chrM matters // --------------------------------------------------------------------------- // One numeric, one chrM Assert.Equal(1, Extensions.OrderVariants(chr9, chrM, true)); Assert.Equal(-1, Extensions.OrderVariants(chr9, chrM, false)); // One chrX, one chrM Assert.Equal(1, Extensions.OrderVariants(chrX, chrM, true)); Assert.Equal(-1, Extensions.OrderVariants(chrX, chrM, false)); // --------------------------------------------------------------------------- // Nonstandard chroms should be below numerics and then ordered alphabetically // --------------------------------------------------------------------------- // One numeric, one weird Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrZ, true)); Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrZ, false)); // One chrX, one weird Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, true)); Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, false)); // One chrM, one weird Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, true)); Assert.Equal(-1, Extensions.OrderVariants(chrX, nonstandardChrZ, false)); // One numeric, one funny Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrA, true)); Assert.Equal(-1, Extensions.OrderVariants(chr9, nonstandardChrA, false)); // One chrX, one funny Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, true)); Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, false)); // One chrM, one funny Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, true)); Assert.Equal(1, Extensions.OrderVariants(chrX, nonstandardChrA, false)); }
public void Convert() { var vcfVar = PhasedVariantTestUtilities.CreateDummyVariant("chr10", 123, "A", "C", 1000, 156); vcfVar.Genotypes[0]["GT"] = "0/1"; var allele = Extensions.ConvertUnpackedVariant(vcfVar); Assert.Equal(vcfVar.ReferenceName, allele.Chromosome); Assert.Equal(vcfVar.VariantAlleles[0], allele.Alternate); Assert.Equal(vcfVar.ReferenceAllele, allele.Reference); Assert.Equal(vcfVar.ReferencePosition, allele.Coordinate); Assert.Equal(new List <FilterType>() { }, allele.Filters); Assert.Equal(Genotype.HeterozygousAltRef, allele.Genotype); Assert.Equal(AlleleCategory.Snv, allele.Type); vcfVar.Genotypes[0]["GT"] = "./."; vcfVar.Filters = "R5x9"; allele = Extensions.ConvertUnpackedVariant(vcfVar); Assert.Equal(vcfVar.ReferenceName, allele.Chromosome); Assert.Equal(vcfVar.VariantAlleles[0], allele.Alternate); Assert.Equal(vcfVar.ReferenceAllele, allele.Reference); Assert.Equal(vcfVar.ReferencePosition, allele.Coordinate); Assert.Equal(new List <FilterType>() { FilterType.RMxN }, allele.Filters); Assert.Equal(Genotype.AltLikeNoCall, allele.Genotype); Assert.Equal(AlleleCategory.Snv, allele.Type); vcfVar.Genotypes[0]["GT"] = "1/2"; vcfVar.Filters = "R5x9;SB"; allele = Extensions.ConvertUnpackedVariant(vcfVar); Assert.Equal(vcfVar.ReferenceName, allele.Chromosome); Assert.Equal(vcfVar.VariantAlleles[0], allele.Alternate); Assert.Equal(vcfVar.ReferenceAllele, allele.Reference); Assert.Equal(vcfVar.ReferencePosition, allele.Coordinate); Assert.Equal(new List <FilterType>() { FilterType.RMxN, FilterType.StrandBias }, allele.Filters); Assert.Equal(Genotype.HeterozygousAlt1Alt2, allele.Genotype); Assert.Equal(AlleleCategory.Snv, allele.Type); vcfVar.Genotypes[0]["GT"] = "1/1"; vcfVar.Filters = "R8;q30"; allele = Extensions.ConvertUnpackedVariant(vcfVar); Assert.Equal(vcfVar.ReferenceName, allele.Chromosome); Assert.Equal(new List <FilterType>() { FilterType.IndelRepeatLength, FilterType.LowVariantQscore }, allele.Filters); Assert.Equal(Genotype.HomozygousAlt, allele.Genotype); Assert.Equal(AlleleCategory.Snv, allele.Type); vcfVar.Genotypes[0]["GT"] = "1/1"; vcfVar.Filters = "lowvariantfreq;multiallelicsite"; allele = Extensions.ConvertUnpackedVariant(vcfVar); Assert.Equal(vcfVar.ReferenceName, allele.Chromosome); Assert.Equal(new List <FilterType>() { FilterType.LowVariantFrequency, FilterType.MultiAllelicSite }, allele.Filters); Assert.Equal(Genotype.HomozygousAlt, allele.Genotype); Assert.Equal(AlleleCategory.Snv, allele.Type); }
public void ClusterVeadGroups() { // ---------------------------------------------------- // Four Ns // - This is from original "FourNs Test" // ---------------------------------------------------- var veads = new List <Vead>() { PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[2, 2] { { "C", "C" }, { "G", "N" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[2, 2] { { "C", "C" }, { "G", "N" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[2, 2] { { "C", "C" }, { "G", "N" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[2, 2] { { "C", "C" }, { "G", "N" } }), }; var veadgroup = PhasedVariantTestUtilities.CreateVeadGroup(veads); ExecuteClusteringTest(new List <VeadGroup>() { veadgroup }, new List <List <VeadGroup> > { new List <VeadGroup> { veadgroup } }, new List <string>() { "C>C,G>N" } , 1); // ---------------------------------------------------- // Real Data // - This data is from Sample 129 (original "Sample129Test") // ---------------------------------------------------- veads = new List <Vead>() { PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[2, 2] { { "A", "G" }, { "N", "N" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[2, 2] { { "A", "G" }, { "C", "C" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[2, 2] { { "A", "A" }, { "C", "C" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[2, 2] { { "A", "G" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new string[2, 2] { { "N", "N" }, { "C", "C" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r6", new string[2, 2] { { "N", "N" }, { "C", "A" } }), }; var group1 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[2, 2] { { "A", "G" }, { "N", "N" } })); var group4 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[2, 2] { { "A", "G" }, { "C", "A" } })); var group6 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r6", new string[2, 2] { { "N", "N" }, { "C", "A" } })); var group2 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[2, 2] { { "A", "G" }, { "C", "C" } })); var group3 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[2, 2] { { "A", "A" }, { "C", "C" } })); var group5 = new VeadGroup(PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new string[2, 2] { { "N", "N" }, { "C", "C" } })); ExecuteClusteringTest(new List <VeadGroup>() { group1, group2, group3, group4, group5, group6 }, new List <List <VeadGroup> > { new List <VeadGroup> { group4, group6, group1 }, new List <VeadGroup> { group3, group5 }, new List <VeadGroup> { group2 }, }, new List <string>() { "A>G,C>A", "A>G,C>C", "A>A,C>C" } , 1, 0); // ---------------------------------------------------- // Ten grouped reads // - This is from original "10 ReadsTest" // ---------------------------------------------------- group1 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead> { PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new string[6, 2] { { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new string[6, 2] { { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r5", new string[6, 2] { { "N", "N" }, { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" } }), }); group2 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead> { PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new string[6, 2] { { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r4", new string[6, 2] { { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r7", new string[6, 2] { { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r8", new string[6, 2] { { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" } }), PhasedVariantTestUtilities.CreateVeadFromStringArray("r9", new string[6, 2] { { "N", "N" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" } }), }); group3 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead> { PhasedVariantTestUtilities.CreateVeadFromStringArray("r10", new string[6, 2] { { "C", "A" }, { "C", "A" }, { "C", "A" }, { "C", "A" }, { "N", "N" }, { "C", "A" } }), }); group4 = PhasedVariantTestUtilities.CreateVeadGroup(new List <Vead> { PhasedVariantTestUtilities.CreateVeadFromStringArray("r6", new string[6, 2] { { "C", "C" }, { "C", "C" }, { "C", "C" }, { "C", "C" }, { "C", "C" }, { "C", "C" } }), }); ExecuteClusteringTest(new List <VeadGroup>() { group1, group2, group3, group4 }, new List <List <VeadGroup> > { new List <VeadGroup> { group1 }, new List <VeadGroup> { group2, group3 }, new List <VeadGroup> { group4 }, } , new List <string>() { "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C" }, 4, 0); ExecuteClusteringTest(new List <VeadGroup>() { group1, group2, group3, group4 }, new List <List <VeadGroup> > { new List <VeadGroup> { group1 }, new List <VeadGroup> { group2, group3 }, new List <VeadGroup> { group4 }, } , new List <string>() { "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C" }, 4, 0, ploidyConstraint: 3); ExecuteClusteringTest(new List <VeadGroup>() { group1, group2, group3, group4 }, new List <List <VeadGroup> > { new List <VeadGroup> { group1 }, // 6 reads new List <VeadGroup> { group2, group3 }, //3 reads //new List<VeadGroup>{group4}, //1 reads -> the looser } , new List <string>() { "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C" }, 4, 0, ploidyConstraint: 2); ExecuteClusteringTest(new List <VeadGroup>() { group1, group2, group3, group4 }, new List <List <VeadGroup> > { new List <VeadGroup> { group1 }, // 6 reads -> the winner } , new List <string>() { "N>N,N>N,C>A,C>A,C>A,C>A", "C>A,C>A,C>A,C>A,N>N,C>A", "C>C,C>C,C>C,C>C,C>C,C>C" }, 4, 0, ploidyConstraint: 1); }
public void GetAcceptedVariants_MergeVariants() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "C", 1000, 156); var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; var newMNV = new CalledAllele() { Chromosome = "chr1", Coordinate = 229, Reference = "AA", Alternate = "T", Genotype = Genotype.HeterozygousAltRef }; var stagedVcfVariants = new List <CalledAllele> { originalVcfVariant, originalVcfVariant2, originalVcfVariant3, originalVcfVariant4 }; var variantsUsedByCaller2 = new List <CalledAllele>() { originalVcfVariant, originalVcfVariant2, originalVcfVariant3 }; var nbhd = new Mock <IVcfNeighborhood>(); nbhd.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller2.ToList()); var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >() { { newMNV.Coordinate, new List <CalledAllele>() { newMNV } } }; nbhd.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2); // If one has been sucked up all the way, we should output it as a nocall // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion. var stagedCalledRefs2 = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall } }, { 234, new CalledAllele(AlleleCategory.Reference) { Coordinate = 234, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.HomozygousRef } } }; nbhd.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2); var accepted = VcfMerger.GetMergedListOfVariants(nbhd.Object, stagedVcfVariants.ToList()); Assert.Equal(5, accepted.Count); CheckVariantsMatch(vcfVariant0asRef, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(newMNV, accepted[2]); CheckVariantsMatch(vcfVariant3asRef, accepted[3]); CheckVariantsMatch(originalVcfVariant4, accepted[4]); }
public void FilterHeader() { var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileWriterTests.vcf"); File.Delete(outputFilePath); var context = new VcfWriterInputContext { CommandLine = new [] { "myCommandLine" }, SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; // Variant strand bias too high or coverage on only one strand var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Diploid, }; //note, scylla has no SB or RMxN or R8 filters. var variants = new List <CalledAllele> { PhasedVariantTestUtilities.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156), PhasedVariantTestUtilities.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156), }; variants[0].Filters.AddRange(new List <FilterType> { FilterType.RMxN, FilterType.LowDepth, FilterType.LowVariantFrequency }); variants[1].Filters.AddRange(new List <FilterType> { FilterType.IndelRepeatLength, FilterType.LowVariantQscore, FilterType.StrandBias }); var originalHeader = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FILTER=<ID=q20,Description=\"Quality score less than 20\">", "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">", "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">", "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam" }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null); writer.WriteHeader(); writer.Write(variants); writer.Dispose(); VcfReader reader = new VcfReader(outputFilePath); List <string> writtenHeader = reader.HeaderLines; reader.Dispose(); var expectedHeader1 = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##VariantPhaser=Scylla 1.0.0.0", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FILTER=<ID=q20,Description=\"Quality score less than 20\">", "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">", "##FILTER=<ID=R8,Description=\"Indel repeat greater than or equal to 8\">", "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">", "##FILTER=<ID=q30,Description=\"Quality score less than 30, by Scylla\">", "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">", "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">", "##FILTER=<ID=MultiAllelicSite,Description=\"Variant does not conform to diploid model\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam" }; Assert.Equal(expectedHeader1.Count, writtenHeader.Count); for (int i = 0; i < expectedHeader1.Count; i++) { if (expectedHeader1[i].StartsWith("##VariantPhaser=")) { Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=")); continue; } Assert.Equal(expectedHeader1[i], writtenHeader[i]); } config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 22, FrequencyFilterThreshold = 0.007f, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Somatic, }; originalHeader = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam" }; writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null); var expectedHeader2 = new List <string> { "##fileformat=VCFv4.1", "##fileDate=20160620", "##source=Pisces 1.0.0.0", "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout", "##VariantPhaser=Scylla 1.0.0.0", "##reference=WholeGenomeFASTA", "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">", "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", "##FILTER=<ID=q22,Description=\"Quality score less than 22\">", "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called\">", "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070\">", "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HD700n560_miseq1_S7.bam", }; variants[0].Filters = new List <FilterType>(); variants[1].Filters = new List <FilterType>(); writer.WriteHeader(); writer.Write(variants); writer.Dispose(); reader = new VcfReader(outputFilePath); writtenHeader = reader.HeaderLines; reader.Dispose(); Assert.Equal(expectedHeader2.Count, writtenHeader.Count); for (int i = 0; i < expectedHeader2.Count; i++) { if (expectedHeader2[i].StartsWith("##VariantPhaser=")) { Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=")); continue; } Assert.Equal(expectedHeader2[i], writtenHeader[i]); } }
public void GetAcceptedVariants_MergeNull() { var originalVcfVariant = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = PhasedVariantTestUtilities.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var stagedVcfVariants = new List <CalledAllele> { originalVcfVariant, originalVcfVariant2, originalVcfVariant3 }; var variantsUsedByCaller = new List <CalledAllele>() { originalVcfVariant, originalVcfVariant2 }; var stagedCalledMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", Coordinate = 123, Reference = "A", Alternate = "T" }; var stagedCalledMNVs = new Dictionary <int, List <CalledAllele> >() { { stagedCalledMNV.Coordinate, new List <CalledAllele>() { stagedCalledMNV } } }; var stagedCalledRefs = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = "." } } }; //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var mockNeighborhood = new Mock <IVcfNeighborhood>(); mockNeighborhood.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller.ToList()); mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs); mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs); var accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants.ToList()); Assert.Equal(3, accepted.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" } } }, }; CheckVariantsMatch(originalVcfVariant, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(originalVcfVariant3, accepted[2]); //re-stage the MNVs var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >() { { stagedCalledMNV.Coordinate, new List <CalledAllele>() { stagedCalledMNV } } }; mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2); // If one has been sucked up all the way, we should output it as a nocall // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion. var stagedCalledRefs2 = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { Coordinate = 123, Chromosome = "chr1", Reference = "A", Alternate = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { Coordinate = 124, Chromosome = "chr1", Reference = "A", Alternate = ".", Genotype = Genotype.RefLikeNoCall } } }; mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2); accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants); Assert.Equal(3, accepted.Count); vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; CheckVariantsMatch(originalVcfVariant, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(originalVcfVariant3, accepted[2]); }