public void GetAcceptedVariants_MergeNull() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var stagedVcfVariants = new List <CalledAllele> { originalVcfVariant, originalVcfVariant2, originalVcfVariant3 }; var variantsUsedByCaller = new List <CalledAllele>() { originalVcfVariant, originalVcfVariant2 }; var stagedCalledMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T" }; var stagedCalledMNVs = new Dictionary <int, List <CalledAllele> >() { { stagedCalledMNV.ReferencePosition, new List <CalledAllele>() { stagedCalledMNV } } }; var stagedCalledRefs = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 123, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 124, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = "." } } }; //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var mockNeighborhood = new Mock <IVcfNeighborhood>(); mockNeighborhood.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller.ToList()); mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs); mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs); var accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants.ToList()); Assert.Equal(3, accepted.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" }, { "DP", "1000" }, { "AD", "744" } } }, }; CheckVariantsMatch(originalVcfVariant, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(originalVcfVariant3, accepted[2]); //re-stage the MNVs var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >() { { stagedCalledMNV.ReferencePosition, new List <CalledAllele>() { stagedCalledMNV } } }; mockNeighborhood.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2); // If one has been sucked up all the way, we should output it as a nocall // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion. var stagedCalledRefs2 = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 123, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 124, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = ".", Genotype = Genotype.RefLikeNoCall } } }; mockNeighborhood.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2); accepted = VcfMerger.GetMergedListOfVariants(mockNeighborhood.Object, stagedVcfVariants); Assert.Equal(3, accepted.Count); vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; CheckVariantsMatch(originalVcfVariant, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(originalVcfVariant3, accepted[2]); }
public void GetAcceptedVariants_MergeVariants() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "C", 1000, 156); var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/0" } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; var newMNV = new CalledAllele() { Chromosome = "chr1", ReferencePosition = 229, ReferenceAllele = "AA", AlternateAllele = "T", Genotype = Genotype.HeterozygousAltRef }; var stagedVcfVariants = new List <CalledAllele> { originalVcfVariant, originalVcfVariant2, originalVcfVariant3, originalVcfVariant4 }; var variantsUsedByCaller2 = new List <CalledAllele>() { originalVcfVariant, originalVcfVariant2, originalVcfVariant3 }; var nbhd = new Mock <IVcfNeighborhood>(); nbhd.Setup(n => n.GetOriginalVcfVariants()).Returns(variantsUsedByCaller2.ToList()); var stagedCalledMNVs2 = new Dictionary <int, List <CalledAllele> >() { { newMNV.ReferencePosition, new List <CalledAllele>() { newMNV } } }; nbhd.Setup(n => n.CalledVariants).Returns(stagedCalledMNVs2); // If one has been sucked up all the way, we should output it as a nocall // (but we have to statge it already as a no call allready, becasue the merger can't do the conversion. var stagedCalledRefs2 = new Dictionary <int, CalledAllele>() { { 123, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 123, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = "." } }, { 124, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 124, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = ".", Genotype = Genotype.RefLikeNoCall } }, { 234, new CalledAllele(AlleleCategory.Reference) { ReferencePosition = 234, Chromosome = "chr1", ReferenceAllele = "A", AlternateAllele = ".", Genotype = Genotype.HomozygousRef } } }; nbhd.Setup(n => n.CalledRefs).Returns(stagedCalledRefs2); var accepted = VcfMerger.GetMergedListOfVariants(nbhd.Object, stagedVcfVariants.ToList()); Assert.Equal(5, accepted.Count); CheckVariantsMatch(vcfVariant0asRef, accepted[0]); CheckVariantsMatch(vcfVariant2asNull, accepted[1]); CheckVariantsMatch(newMNV, accepted[2]); CheckVariantsMatch(vcfVariant3asRef, accepted[3]); CheckVariantsMatch(originalVcfVariant4, accepted[4]); }
public void GetMergedListOfVariants_LeaveUntouchedAsIs() { //chr7 55242464 . A G 6 LowSupport DP=287 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:286,1:287:0.00348:30:-7.4908:0.0304:0,0,0,0,0,1,56,17,49,56,69,40:4.294:0.000 //chr7 55242464 . AGGAATTAAGAGAAGC A 100 PASS DP=298 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:100:284,14:298:0.04698:30:-75.6792:0.0000:1,0,1,4,5,3,58,18,49,55,71,41:100.000:100.000 //chr7 55242481 . A T 6 LowSupport DP=306 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:305,1:306:0.00327:30:-7.4622:0.0556:0,0,0,0,0,1,63,20,54,52,69,48:3.669:0.000 //chr7 55242487 . C T 6 LowSupport DP=325 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:324,1:325:0.00308:30:-7.1283:0.0469:0,0,0,1,0,0,67,24,61,53,68,52:1.954:0.000 //chr7 55242489 . G T 6 LowSupport DP=327 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:326,1:327:0.00306:30:-7.0226:0.0411:0,0,1,0,0,0,71,23,60,54,67,52:2.177:0.000 var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr7", 55242464, "A", "G", 287, 1); originalVcfVariant1.ReferenceSupport = 286; var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 55242464, "AGGAATTAAGAGAAGC", "A", 298, 14); originalVcfVariant2.ReferenceSupport = 284; var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr7", 55242481, "A", "T", 306, 1); originalVcfVariant3.ReferenceSupport = 305; var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 55242487, "C", "T", 325, 1); originalVcfVariant4.ReferenceSupport = 324; var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 55242489, "G", "T", 327, 1); originalVcfVariant5.ReferenceSupport = 326; //#2mnv accepted: chr7 55242464 . AGGAATTAAGAGAAGC A //chr7 55242464 . AGGAATTAAGAGAAGC A 100 PASS DP=286 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:100:272,13:286:0.04545:30:-100.0000:0.3024:0,0,0,0,0,0,0,0,0,0,0,0 //#3mnv accepted: chr7 55242464 . AGGAATTAAGAGAAGCAA GAT. //chr7 55242464 . AGGAATTAAGAGAAGCAA GAT 6 PASS DP=293 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:6:226,1:293:0.00341:30:-100.0000:0.2854:0,0,0,0,0,0,0,0,0,0,0,0 var mnv1 = TestHelper.CreateDummyAllele("chr7", 55242464, "AGGAATTAAGAGAAGC", "A", 286, 13); mnv1.ReferenceSupport = 272; var mnv2 = TestHelper.CreateDummyAllele("chr7", 55242464, "AGGAATTAAGAGAAGCAA", "GAT", 293, 1); mnv2.ReferenceSupport = 226; //#4mnv accepted: chr7 55242487 . C T. var mnv3 = TestHelper.CreateDummyAllele("chr7", 55242487, "C", "T", 325, 1); mnv3.ReferenceSupport = 324; //#5mnv accepted: chr7 55242489 . G T. var mnv4 = TestHelper.CreateDummyAllele("chr7", 55242489, "G", "T", 327, 1); mnv4.ReferenceSupport = 326; var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs3 = new VariantSite((originalVcfVariant3)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); var nbhd1 = new VcfNeighborhood(0, "chr7", vs1, vs2); nbhd1.AddVariantSite(vs3); nbhd1.AddVariantSite(vs4); nbhd1.AddVariantSite(vs5); var calledNbhd = new CallableNeighborhood(nbhd1, new VariantCallingParameters()); calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; //Became ref //chr7 55242481 . A . 100 PASS DP=306 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/.:100:305:306:0.00327:30:-100.0000:0.0556:0,0,0,0,0,0,0,0,0,0,0,0 var var3AsRef = TestHelper.CreateDummyAllele("chr7", 55242481, "A", ".", 306, 0); calledNbhd.CalledRefs = new Dictionary <int, CalledAllele>() { { var3AsRef.ReferencePosition, var3AsRef } }; var origAlleles = new List <Tuple <CalledAllele, string> >(); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant1, "Variant1")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant2, "Variant2")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant3, "Variant3")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant4, "Variant4")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant5, "Variant5")); var mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(3, mergedList.Count(x => x.Item2 == "")); // Variant4 and 5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); //Should take new one if anything is changed // Pretend mnv3 had a ref base sucked up by other MNV mnv3.ReferenceSupport = originalVcfVariant4.ReferenceSupport - 1; calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(4, mergedList.Count(x => x.Item2 == "")); // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage // Variant 4 has changed in terms of ref support. Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); // Pretend mnv3 had coverage changed (not sure this is realistic, but to cover all bases adding test) mnv3.ReferenceSupport = originalVcfVariant4.ReferenceSupport; mnv3.TotalCoverage = originalVcfVariant4.TotalCoverage - 1; calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(4, mergedList.Count(x => x.Item2 == "")); // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage // Variant 4 has changed in terms of ref support. Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); // Pretend mnv3 had allele support changed (not sure this is realistic, but to cover all bases adding test) mnv3.TotalCoverage = originalVcfVariant4.TotalCoverage; mnv3.AlleleSupport = originalVcfVariant4.AlleleSupport - 1; calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(4, mergedList.Count(x => x.Item2 == "")); // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage // Variant 4 has changed in terms of ref support. Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); }