public IEnumerable <VeadGroup> GetVeadGroups(VcfNeighborhood neighborhood) { var veadGroups = new Dictionary <string, VeadGroup>(); var neighbors = neighborhood.VcfVariantSites; var refName = neighbors.First().ReferenceName; _alignmentExtractor.Jump(refName); // keep reading the alignments while we're on the same reference sequence var veadMaker = new VeadFinder(_options); var debugLog = Path.Combine(_debugLogRoot, refName + "_" + neighborhood.Id + "_ReadsInNbhd.txt"); WriteToReadLog(debugLog, string.Join("\t", "ReadName", "used?", "IsFirstMate", "CigarData", "Read.Position")); Read read = new Read(); while (true) { if (!_alignmentExtractor.GetNextAlignment(read)) { break; // no more reads } if (ShouldSkipRead(read, neighborhood)) { WriteToReadLog(debugLog, (string.Join("\t", read.Name, "skipped", read.IsFirstMate, read.CigarData.ToString(), read.Position))); continue; } if (PastNeighborhood(read, neighborhood)) { WriteToReadLog(debugLog, (string.Join("\t", read.Name, "past nbhd", read.IsFirstMate, read.CigarData.ToString(), read.Position))); break; } //Make a vead and add it to our list var readName = read.Name + "_"; if (read.IsFirstMate) { readName += "fwd_" + read.Position; } else { readName += "rev_" + read.Position; } WriteToReadLog(debugLog, (string.Join("\t", read.Name, "will use", read.IsFirstMate, read.CigarData.ToString(), read.Position))); //map from bases to ref position var vead = new Vead(readName, veadMaker.FindVariantResults(neighbors, read)); if (vead.SiteResults == null || !vead.SiteResults.Any()) { continue; } // Add vead to a veadgroup. var hash = vead.ToVariantSequence(); if (!veadGroups.ContainsKey(hash)) { veadGroups.Add(hash, new VeadGroup(vead)); } else { veadGroups[hash].AddSupport(vead); } } return(veadGroups.Values); }
//this unit test was made after we found bug ScyllaLoosingRefCalls_PICS-723. //We had a 1/. GT reported when it should be 1/0. //The reason for this is that all the refs (the "0"s) got incorrectly sucked up. //Ie, MNV ACG-> AG claimed 50 refs, so we (incorrectly) subtracted 50 refs from it. //The bug is that the ref counts got subtractedfrom the exact same mnv that claimed them. // This should never happen, and was not the intent of the alg. // //The affected mehtod is: CreateMnvsFromClusters in VcfNbhd public void CreateMnvsFromClusters_TakeUpRefCount() { var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "ACG", "AT", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "TTTTTT", 1000, 200); // var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 123, "AC", "TT", 1000, 100); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "A", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200, ReferenceSupport = 350 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Mnv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "ACG", AlternateAllele = "AT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 300, ReferenceSupport = 350 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Insertion) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "AAAAA", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 250, ReferenceSupport = 350 }); //default behavior, nothing gets sucked up callableNeighborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; vs1.VcfReferencePosition = 123; var vead = new Vead("dummy", new VariantSite[] { vs1 }); var vg = new VeadGroup(vead); var fakeCluster = new Cluster("test", new List <VeadGroup>() { vg }); fakeCluster.ResetConsensus(); callableNeighborhood.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20); caller.CallMNVs(callableNeighborhood); caller.CallRefs(callableNeighborhood); var acceptedMNVs = callableNeighborhood.CalledVariants; var acceptedRefs = callableNeighborhood.CalledRefs; Assert.Equal(2, acceptedMNVs.Count); Assert.Equal(3, acceptedMNVs[123].Count); Assert.Equal(1, acceptedRefs.Count); //check the ref counts on all the MNVs. Nothing should be sucked up. Assert.Equal(350, acceptedMNVs[123][0].ReferenceSupport); // Previously: total depth - allele suport. overly simple for now) Assert.Equal(350, acceptedMNVs[123][1].ReferenceSupport); // Now: explicitly set ref support Assert.Equal(350, acceptedMNVs[123][2].ReferenceSupport); // // now variant 0 will suck up 100 ref calls: var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = callableNeighborhood.CandidateVariants[0] }; callableNeighborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 123, suckedUpRefRecord100 } }; callableNeighborhood.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20); caller.CallMNVs(callableNeighborhood); caller.CallRefs(callableNeighborhood); acceptedMNVs = callableNeighborhood.CalledVariants; acceptedRefs = callableNeighborhood.CalledRefs; //check the ref counts on all the MNVs. refs should only be taken up by the first one Assert.Equal(350, acceptedMNVs[123][0].ReferenceSupport); //Previously: total depth - allele suport. overly simple for now) //old result - has bug //Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport); // Previously: total depth - allele suport - sucked up ref) //Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport); // Now: explicitly set ref support //new result, fixed Assert.Equal(350 - 100, acceptedMNVs[123][1].ReferenceSupport); // refSupport - sucked up ref) Assert.Equal(350 - 100, acceptedMNVs[123][2].ReferenceSupport); // refSupport - sucked up ref) }