private void ProcessNeighborhood(VcfNeighborhood neighborhood) { Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id); try { var clusterer = _factory.CreateNeighborhoodClusterer(); var veadGroupSource = _factory.CreateVeadGroupSource(); var collapsedReads = veadGroupSource.GetVeadGroups(neighborhood); //(1) Get CLUSTERS var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList(), neighborhood.Id); //clean out vg, we dont need them any more veadGroupSource = null; collapsedReads = null; bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci; //(2) Turn clusters into MNV candidates neighborhood.CreateMnvsFromClusters(clusters.Clusters, _factory.Options.BamFilterParams.MinimumBaseCallQuality, _factory.Options.VariantCallingParams.MaximumVariantQScore, crushNbhdVariantsToSamePositon); neighborhood.SetGenotypesAndPruneExcessAlleles(); // (3) Variant call the candidates var variantCaller = _factory.CreateVariantCaller(); variantCaller.CallMNVs(neighborhood); variantCaller.CallRefs(neighborhood); //wait untill vcf is ready to write... } catch (Exception ex) { Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id); Logger.WriteExceptionToLog(ex); } }
public void AddMnvsFromClusters() { //TODO even with mock cluster this takes too much setting up. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T"); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; vead.SiteResults[0].VcfReferencePosition = 1; vead.SiteResults[1].VcfReferencePosition = 2; vead.SiteResults[2].VcfReferencePosition = 3; vead2.SiteResults[0].VcfReferencePosition = 1; vead2.SiteResults[1].VcfReferencePosition = 2; vead2.SiteResults[2].VcfReferencePosition = 3; vead3.SiteResults[0].VcfReferencePosition = 1; vead3.SiteResults[1].VcfReferencePosition = 2; vead3.SiteResults[2].VcfReferencePosition = 3; nbhd.ReferenceSequence = "CGT"; var mockCluster = new Mock <ICluster>(); mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 }); var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads); mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults); mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>() { consensus }); nbhd.CreateMnvsFromClusters(new List <ICluster>() { mockCluster.Object }, 20, 100); var allele = nbhd.CandidateVariants.First(); Assert.Equal(6, allele.TotalCoverage); Assert.Equal(6, allele.AlleleSupport); Assert.Equal("CGT", allele.ReferenceAllele); Assert.Equal("AAA", allele.AlternateAllele); int[] depths = new int[0]; int[] nocalls = new int[0]; nbhd.DepthAtSites(new List <ICluster>() { mockCluster.Object }, out depths, out nocalls); Assert.Equal(3, depths.Length); Assert.Equal(3, depths[0]); Assert.Equal(3, depths[1]); Assert.Equal(3, depths[2]); }
//this unit test was made after we found bug ScyllaLoosingRefCalls_PICS-723. //We had a 1/. GT reported when it should be 1/0. //The reason for this is that all the refs (the "0"s) got incorrectly sucked up. //Ie, MNV ACG-> AG claimed 50 refs, so we (incorrectly) subtracted 50 refs from it. //The bug is that the ref counts got subtractedfrom the exact same mnv that claimed them. // This should never happen, and was not the intent of the alg. // //The affected mehtod is: CreateMnvsFromClusters in VcfNbhd public void CreateMnvsFromClusters_TakeUpRefCount() { var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "ACG", "AT", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "TTTTTT", 1000, 200); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 123, "AC", "TT", 1000, 100); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200 }); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Mnv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "ACG", AlternateAllele = "AT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 300 }); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Insertion) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "AAAAA", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 250 }); //default behavior, nothing gets sucked up nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; vs1.VcfReferencePosition = 123; var vead = new Vead("dummy", new VariantSite[] { vs1 }); var vg = new VeadGroup(vead); var fakeCluster = new Cluster("test", new List <VeadGroup>() { vg }); fakeCluster.ResetConsensus(); nbhd.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20, 100); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(2, acceptedMNVs.Count); Assert.Equal(3, acceptedMNVs[123].Count); Assert.Equal(1, acceptedRefs.Count); //check the ref counts on all the MNVs. Nothing should be sucked up. Assert.Equal(1000 - 200, acceptedMNVs[123][0].ReferenceSupport); //total depth - allele suport. overly simple for now) Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport); //total depth - allele suport. overly simple for now) Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport); //total depth - allele suport. overly simple for now) // now variant 0 will suck up 100 ref calls: var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = nbhd.CandidateVariants[0] }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 123, suckedUpRefRecord100 } }; nbhd.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20, 100); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; //check the ref counts on all the MNVs. refs should only be taken up by the first one Assert.Equal(1000 - 200, acceptedMNVs[123][0].ReferenceSupport); //total depth - allele suport. overly simple for now) //old result - has bug //Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport); //total depth - allele suport - sucked up ref) //Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport); //total depth - allele suport - sucked up ref) //new result, fixed Assert.Equal(1000 - 300 - 100, acceptedMNVs[123][1].ReferenceSupport); //total depth - allele suport - sucked up ref) Assert.Equal(1000 - 250 - 100, acceptedMNVs[123][2].ReferenceSupport); //total depth - allele suport - sucked up ref) }