private void ExecuteGroupingTest(List <Read> reads, List <int> expectedGroupMemberships, IEnumerable <Tuple <int, string, string> > variants) { var variantSites = new List <VariantSite>(); foreach (var variant in variants) { variantSites.Add(new VariantSite(variant.Item1) { VcfReferenceAllele = variant.Item2, VcfAlternateAllele = variant.Item3 }); } var alignmentExtractor = new MockAlignmentExtractor(reads); var veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121)) { VcfVariantSites = variantSites }; var callableNeighborhood = new CallableNeighborhood(vcfNeighborhood, new VariantCallingParameters()); var veadGroups = veadSource.GetVeadGroups(callableNeighborhood).ToList(); Assert.Equal(expectedGroupMemberships.Count, veadGroups.Count()); for (var i = 0; i < veadGroups.Count(); i++) { Assert.Equal(expectedGroupMemberships[i], veadGroups[i].NumVeads); } }
public bool ShouldSkipRead(Read read, CallableNeighborhood neighborhood) { if (_options.RemoveDuplicates) { if (read.IsPcrDuplicate) { return(true); } } if (_options.OnlyUseProperPairs) { if (!read.IsProperPair) { return(true); } } if (read.MapQuality < _options.MinimumMapQuality) { return(true); } if (read.EndPosition < neighborhood.FirstPositionOfInterest) { return(true); } return(false); }
public void CallThroughAnEmptyNbhd() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref) var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters()); caller.CallMNVs(callableNbhd); caller.CallRefs(callableNbhd); var acceptedMNVs = callableNbhd.CalledVariants; var acceptedRefs = callableNbhd.CalledRefs; Assert.Equal(0, acceptedMNVs.Count); Assert.Equal(2, acceptedRefs.Count); Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype); Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype); Assert.Equal(123, acceptedRefs[123].ReferencePosition); Assert.Equal(124, acceptedRefs[124].ReferencePosition); }
public void GetOriginalVcfIndexes() { var originalVar1 = new CalledAllele() { ReferencePosition = 1 }; var originalVar10 = new CalledAllele() { ReferencePosition = 10 }; var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(123) { OriginalAlleleFromVcf = originalVar1 } , new VariantSite(123) { OriginalAlleleFromVcf = originalVar10 }); var originalVcfIndexes = new CallableNeighborhood(nbhd, new VariantCallingParameters()).GetOriginalVcfVariants(); Assert.Equal(2, originalVcfIndexes.Count); Assert.Equal(1, originalVcfIndexes[0].ReferencePosition); Assert.Equal(10, originalVcfIndexes[1].ReferencePosition); }
public void SupplementSupportWithClippedReads() { // In this test we create reads that are either normal or clipped (identified by "clip_" in their name) // This test does not take cigar data into account. var mockClippedReadComparator = new Mock <IMNVClippedReadComparator>(); // Mock read comparator returns true if read name starts with c mockClippedReadComparator.Setup(x => x.DoesClippedReadSupportMNV(It.IsAny <Read>(), It.IsAny <CalledAllele>())) .Returns((Read read, CalledAllele allele) => read.Name[0] == 'c' ? true : false); var reads = new List <Read>(); reads.Add(CreateRead("chr1", "ACGT", 3, "read4")); reads.Add(CreateRead("chr1", "ACGT", 3, "clip_read4", matePosition: 3)); // +1 not in neighborhood, but still gets counted because mocked ClippedReadComparator reads.Add(CreateRead("chr1", "ACGT", 12, "read1", matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read2", matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read1", read2: true, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read_notmapped", isMapped: false, isProperPair: false, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read3", isProperPair: false, read2: true, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read2", read2: true, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", read2: true, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read_notmapped", isMapped: false, isProperPair: false, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read3", isProperPair: false, read2: true, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", read2: true, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 30, "read5")); reads.Add(CreateRead("chr1", "ACGT", 30, "clip_read5", matePosition: 30)); // not in neighborhood, not counted var mockAlignmentExtractor = new MockAlignmentExtractor(reads); int qNoiseLevel = 20; int maxQscore = 100; int minMNVsize = 6; MNVSoftClipSupportFinder mnvClippedSupportFinder = new MNVSoftClipSupportFinder(mockAlignmentExtractor, mockClippedReadComparator.Object, qNoiseLevel, maxQscore, minMNVsize); var mnv1 = TestHelper.CreateDummyAllele("chr1", 10, "AAAAAA", "CCC", 2000, 50); var neighbor1 = new VcfNeighborhood(0, "chr", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C", ReferenceName = "chr" }, new VariantSite(25) { VcfReferenceAllele = "T", VcfAlternateAllele = "G", ReferenceName = "chr" }, }, }; var callableNbhd = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null); callableNbhd.AddAcceptedPhasedVariant(mnv1); Assert.Equal(50, callableNbhd.CandidateVariants[0].AlleleSupport); mnvClippedSupportFinder.SupplementSupportWithClippedReads(callableNbhd); Assert.Equal(57, callableNbhd.CandidateVariants[0].AlleleSupport); }
public void CheckAddingFilters() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var variantCallingParameters = new VariantCallingParameters(); //Set up filters so calls are sure to trigger them. variantCallingParameters.LowDepthFilter = 2000; variantCallingParameters.MinimumFrequencyFilter = 0.80F; variantCallingParameters.MinimumVariantQScoreFilter = 300; var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters()); var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNeihborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); callableNeihborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }); callableNeihborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; caller.CallMNVs(callableNeihborhood); caller.CallRefs(callableNeihborhood); var acceptedMNVs = callableNeihborhood.CalledVariants; var acceptedRefs = callableNeihborhood.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth)); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency)); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore)); Assert.Equal(2, acceptedRefs.Count); Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth)); Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore)); //note reference calls dont win the "LowVariantFrequency" flag. }
public void SupplementSupportWithClippedReads(CallableNeighborhood neighborhood) { var neighbors = neighborhood.VcfVariantSites; var refName = neighbors.First().ReferenceName; _alignmentExtractor.Jump(refName); Logger.WriteToLog("Supplementing candidate variant support with soft clipped reads."); //var readFilter = new NeighborhoodReadFilter(_options); //var clippedReadComparator = new ClippedReadComparator(); //var mnvClippedReadComparator = new MNVClippedReadComparator(scReadFilter); Read read = new Read(); while (true) { if (!_alignmentExtractor.GetNextAlignment(read)) { break; // no more reads } // Check if clipped part matches alternate allele of any candidate variant foreach (var mnv in neighborhood.CandidateVariants) { // Do not boost support for SNVs and short MNVs if (mnv.ReferenceAllele.Length + mnv.AlternateAllele.Length < _minSizeForClipRescue) { continue; } if (_mnvClippedReadComparator.DoesClippedReadSupportMNV(read, mnv)) { // Nima: in current implementation, same read can support multiple candidate variants. // In future we may want to "assign" reads to only one candidate variant. // Risk: reads that support an MNV, may also support candidate variants. This can lead to false positives. mnv.AlleleSupport++; mnv.SoftClipAlleleSupport++; } } if (read.Position > neighborhood.LastPositionOfInterestWithLookAhead) { break; } } // Update Q score before moving on // Nima: Q score will be calculated twice for some variants // (once in PhasedVariantExtractor.cs>Create() , and another time here) foreach (var mnv in neighborhood.CandidateVariants) { mnv.VariantQscore = VariantQualityCalculator.AssignPoissonQScore(mnv.AlleleSupport, mnv.ReferenceSupport, _qNoiseLevel, _maxQscore); Logger.WriteToLog("Added soft clip support of {0} to MNV: {1}.", mnv.AlleleSupport - mnv.SoftClipAlleleSupport, mnv.ToString()); } }
public bool IsClippedWithinNeighborhood(Read read, CallableNeighborhood neighborhood) { // Check if clipped at beginning of read, and position of read (end of clipping) falls into neighborhood if (read.StartsWithSoftClip && (read.Position >= neighborhood.SoftClipEndBeforeNbhd && read.Position <= neighborhood.SoftClipPosAfterNbhd)) { return(true); } // Check if clipped at end of read, and end position of read (beginning of clip) falls into neighborhood else if (read.EndsWithSoftClip && (read.EndPosition >= neighborhood.SoftClipEndBeforeNbhd && read.EndPosition <= neighborhood.SoftClipPosAfterNbhd)) { return(true); } return(false); }
private void CallMnvsForNeighborhood(CallableNeighborhood neighborhood) { Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id); try { var clusterer = _factory.CreateNeighborhoodClusterer(); var veadGroupSource = _factory.CreateVeadGroupSource(); var collapsedReads = veadGroupSource.GetVeadGroups(neighborhood); //(1) Get CLUSTERS var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList(), neighborhood.Id); //clean out vg, we dont need them any more veadGroupSource = null; collapsedReads = null; bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci; //(2) Turn clusters into MNV candidates neighborhood.CreateMnvsFromClusters(clusters.Clusters, _factory.Options.BamFilterParams.MinimumBaseCallQuality, crushNbhdVariantsToSamePositon); if (neighborhood.NumberClippedReads > 0 && _factory.Options.SoftClipSupportParams.UseSoftClippedReads) { var softClippedSupportFinder = _factory.CreateSoftClipSupportFinder(); softClippedSupportFinder.SupplementSupportWithClippedReads(neighborhood); } neighborhood.SetGenotypesAndPruneExcessAlleles(); // (3) Variant call the candidates var variantCaller = _factory.CreateVariantCaller(); variantCaller.CallMNVs(neighborhood); variantCaller.CallRefs(neighborhood); //wait untill vcf is ready to write... } catch (Exception ex) { Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id); Logger.WriteExceptionToLog(ex); } }
public void SetRangeOfInterestTests() { /// <summary> /// This method sets the NbdhReferenceSequenceSubstring, and the first/last positions of interest /// </summary> var refName = "chr"; //test with no Genome given var nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121)); Assert.Equal(-1, nbhd.FirstPositionOfInterest); Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf); Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead); var readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters()); Assert.Equal("RR", readyNbhd.NbhdReferenceSequenceSubstring); Assert.Equal(120, nbhd.FirstPositionOfInterest); Assert.Equal(121, nbhd.LastPositionOfInterestInVcf); Assert.Equal(122, nbhd.LastPositionOfInterestWithLookAhead); //test with a genome given var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta"); Genome genome = new Genome(genomePath, new List <string>() { refName }); ChrReference chrReference = genome.GetChrReference(refName); nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121)); Assert.Equal(-1, nbhd.FirstPositionOfInterest); Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf); Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead); readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters(), chrReference); Assert.Equal("TG", readyNbhd.NbhdReferenceSequenceSubstring); Assert.Equal(120, readyNbhd.FirstPositionOfInterest); Assert.Equal(121, readyNbhd.LastPositionOfInterestInVcf); Assert.Equal(122, readyNbhd.LastPositionOfInterestWithLookAhead); }
public void ShouldSkipReadTest() { var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters() { MinimumMapQuality = 20 }); var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(15) { VcfReferenceAllele = "G", VcfAlternateAllele = "A" }, }, }; neighbor1.SetRangeOfInterest(); var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null); var read1 = TestHelper.CreateRead("chr1", "ACGT", 6); // Read ends before first variant Assert.Equal(true, nbhdReadFilter.ShouldSkipRead(read1, callableNeighbor1)); var read2 = TestHelper.CreateRead("chr1", "ACGT", 7); // Read covers 1 base of the nbhd Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read2, callableNeighbor1)); var read3 = TestHelper.CreateRead("chr1", "ACGT", 12); // Read partially covers neighborhood Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read3, callableNeighbor1)); var read4 = TestHelper.CreateRead("chr1", "ACGT", 16); // Read starts after neighborhood Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read4, callableNeighbor1)); // Nima: we can maybe add features to CreateRead to be able to create PCR duplicate, low mapQ, and non proper pair reads // but i think these conditions are somewhat trivial, and this may not be necessary. }
public void AddMnvsFromClusters() { //TODO even with mock cluster this takes too much setting up. var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121)); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; vead.SiteResults[0].VcfReferencePosition = 1; vead.SiteResults[1].VcfReferencePosition = 2; vead.SiteResults[2].VcfReferencePosition = 3; vead2.SiteResults[0].VcfReferencePosition = 1; vead2.SiteResults[1].VcfReferencePosition = 2; vead2.SiteResults[2].VcfReferencePosition = 3; vead3.SiteResults[0].VcfReferencePosition = 1; vead3.SiteResults[1].VcfReferencePosition = 2; vead3.SiteResults[2].VcfReferencePosition = 3; var mockCluster = new Mock <ICluster>(); mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 }); var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads); mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults); mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>() { consensus }); var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); callableNeighborhood.NbhdReferenceSequenceSubstring = "CGT"; callableNeighborhood.CreateMnvsFromClusters(new List <ICluster>() { mockCluster.Object }, 20); var allele = callableNeighborhood.CandidateVariants.First(); Assert.Equal(6, allele.TotalCoverage); Assert.Equal(6, allele.AlleleSupport); Assert.Equal("CGT", allele.ReferenceAllele); Assert.Equal("AAA", allele.AlternateAllele); int[] depths = new int[0]; int[] nocalls = new int[0]; callableNeighborhood.DepthAtSites(new List <ICluster>() { mockCluster.Object }, out depths, out nocalls); Assert.Equal(3, depths.Length); Assert.Equal(3, depths[0]); Assert.Equal(3, depths[1]); Assert.Equal(3, depths[2]); }
public void CallAVariantInANewLocation() { //set up the original variants var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var vs3 = new VariantSite(originalVcfVariant3); var vs4 = new VariantSite(originalVcfVariant4); var vcParams = new VariantCallingParameters(); vcParams.Validate(); var caller = new VariantCaller(vcParams, new BamFilterParameters()); var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); nbhd.AddVariantSite(vs3); //note, we do not add vs4, that is not going to get used for phasing. Sps it is a variant that failed filters. var callableNbhd = new CallableNeighborhood(nbhd, vcParams, null); //now stage one candidate MNV: var newMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 129, ReferenceAllele = "A", AlternateAllele = "TT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }; callableNbhd.AddAcceptedPhasedVariant(newMNV); var suckedUpRefRecord1000 = new SuckedUpRefRecord() { Counts = 1000, AlleleThatClaimedIt = new CalledAllele() }; callableNbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord1000 } }; caller.CallMNVs(callableNbhd); caller.CallRefs(callableNbhd); var acceptedMNVs = callableNbhd.CalledVariants; var acceptedRefs = callableNbhd.CalledRefs; var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[129].Count); Assert.Equal(3, acceptedRefs.Count); VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]); }
public void ClippedReadCountTest() { var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(15) { VcfReferenceAllele = "G", VcfAlternateAllele = "A" }, new VariantSite(25) { VcfReferenceAllele = "T", VcfAlternateAllele = "G" }, }, }; neighbor1.SetRangeOfInterest(); Assert.Equal(9, neighbor1.SoftClipEndBeforeNbhd); Assert.Equal(26, neighbor1.SoftClipPosAfterNbhd); var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null); var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters() { MinimumMapQuality = 20 }); var cigarMatch = new CigarAlignment("4M"); var read1 = TestHelper.CreateRead("chr1", "ACGT", 6, cigarMatch); // No clip, ends before neighborhood starts Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read1, callableNeighbor1)); var read2 = TestHelper.CreateRead("chr1", "ACGT", 8, cigarMatch); // No clip, partially covers neighborhood Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read2, callableNeighbor1)); var read3 = TestHelper.CreateRead("chr1", "ACGT", 15, cigarMatch); // No clip, inside neighborhood Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read3, callableNeighbor1)); // Clipped portion of read starts before neighborhood -> NOT within neighborhood // POS 8 9 10 11 // Read M S S S var cigar21 = new CigarAlignment("1M3S"); var read21 = TestHelper.CreateRead("chr1", "ACGT", 8, cigar21); Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read21, callableNeighbor1)); // Clipped portion of read starts on first variant site -> within neighborhood // POS 8 9 10 11 // Read M M S S var cigar4 = new CigarAlignment("2M2S"); var read4 = TestHelper.CreateRead("chr1", "ACGT", 8, cigar4); Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read4, callableNeighbor1)); // Clipped portion of read starts after first variant site but before end of neighborhood -> within neighborhood // POS 8 9 10 11 // Read M M M S var cigar5 = new CigarAlignment("3M1S"); var read5 = TestHelper.CreateRead("chr1", "ACGT", 8, cigar5); // clipped end matches start of neighborhood Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read5, callableNeighbor1)); // Clipped portion of read ends before end of neighborhood -> within neighborhood // POS 24 25 26 27 // Read S M M M var cigar22 = new CigarAlignment("1S3M"); var read22 = TestHelper.CreateRead("chr1", "ACGT", 25, cigar22); Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read22, callableNeighbor1)); // Clipped portion of read ends at last variant site of neighborhood -> within neighborhood // POS 24 25 26 27 // Read S S M M var cigar6 = new CigarAlignment("2S2M"); var read6 = TestHelper.CreateRead("chr1", "ACGT", 26, cigar6); Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read6, callableNeighbor1)); // Clipped portion of read ends after neighborhood's last variant site -> NOT within neighborhood // POS 24 25 26 27 // Read S S S M var cigar7 = new CigarAlignment("3S1M"); var read7 = TestHelper.CreateRead("chr1", "ACGT", 27, cigar7); Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read7, callableNeighbor1)); // TODO (maybe test in future) // Nima: These borders are not very necessary given we don't check exact match in first pass over clipped reads. // Testing SoftClip position and End for neighborhoods with deletion var neighbor2 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "ACC", VcfAlternateAllele = "A" }, new VariantSite(25) { VcfReferenceAllele = "TCC", VcfAlternateAllele = "T" }, }, }; neighbor2.SetRangeOfInterest(); Assert.Equal(10, neighbor2.SoftClipEndBeforeNbhd); Assert.Equal(28, neighbor2.SoftClipPosAfterNbhd); // Testing SoftClip position and End for neighborhoods with insertion var neighbor3 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "ACC" }, new VariantSite(25) { VcfReferenceAllele = "T", VcfAlternateAllele = "TCC" }, }, }; neighbor3.SetRangeOfInterest(); Assert.Equal(10, neighbor3.SoftClipEndBeforeNbhd); Assert.Equal(26, neighbor3.SoftClipPosAfterNbhd); }
public bool PastNeighborhood(Read read, CallableNeighborhood neighborhood) { return(read.Position > neighborhood.LastPositionOfInterestWithLookAhead); }
public void GetVeads() { var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(100) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(400) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(505) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(703) { VcfReferenceAllele = "A", VcfAlternateAllele = "T" }, new VariantSite(800) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, } }; var callableNeighborhood = new CallableNeighborhood(vcfNeighborhood, new VariantCallingParameters()); var reads = new List <Read>(); reads.Add(CreateRead("chr1", "ACGT", 10)); // Before neighborhood reads.Add(CreateRead("chr1", "ACGT", 96)); // Ends right before neighborhood's first variant site reads.Add(CreateRead("chr1", "ACGT", 100)); // Match (100) reads.Add(CreateRead("chr1", "ACGT", 300)); // Within neighborhood but no VariantSite reads.Add(CreateRead("chr1", "ACGT", 400, qualityForAll: 19)); // Within neighbhorhood but low quals reads.Add(CreateRead("chr1", "ACGT", 500)); // Within neighborhood but no VariantSite (ends right before 505) reads.Add(CreateRead("chr1", "ACGT", 700)); // Match (703) reads.Add(CreateRead("chr1", "ACGT", 800)); // Match (800) reads.Add(CreateRead("chr1", "ACGT", 805)); // Past neighborhood reads.Add(CreateRead("chr1", "ACGT", 900)); // Past neighborhood reads.Add(CreateRead("chr2", "ACGT", 100)); // Wrong chromosome var alignmentExtractor = new MockAlignmentExtractor(reads); var veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); var veadGroups = veadSource.GetVeadGroups(callableNeighborhood); // Collect all reads that could relate to the neighborhood // - Skip anything that has quality less than MinimumMapQuality // - Skip anything that ends before neighborhood begins // - Stop collecting once we've passed the end of the neighborhood // We should have collected the reads at 100, 700, and 800. Assert.Equal(801, callableNeighborhood.LastPositionOfInterestWithLookAhead); Assert.Equal(3, veadGroups.Count()); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900"))); foreach (var veadGroup in veadGroups) { Assert.Equal(1, veadGroup.NumVeads); } vcfNeighborhood.VcfVariantSites.Add( new VariantSite(790) { VcfReferenceAllele = "ACAGTGAAAGACTTGTGAC", VcfAlternateAllele = "C" }); callableNeighborhood = new CallableNeighborhood(vcfNeighborhood, new VariantCallingParameters()); Assert.Equal(809, callableNeighborhood.LastPositionOfInterestWithLookAhead); alignmentExtractor = new MockAlignmentExtractor(reads); veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); veadGroups = veadSource.GetVeadGroups(callableNeighborhood); Assert.Equal(3, veadGroups.Count()); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900"))); // Boundary case - read ends exactly at neighborhood's first variant site reads = new List <Read>(); reads.Add(CreateRead("chr1", "ACGT", 10)); // Before neighborhood reads.Add(CreateRead("chr1", "ACGT", 96)); // Ends right before neighborhood's first variant site reads.Add(CreateRead("chr1", "ACGT", 97)); // Ends exactly at neighborhood's first variant site alignmentExtractor = new MockAlignmentExtractor(reads); veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); veadGroups = veadSource.GetVeadGroups(callableNeighborhood); // The veadgroup for 97 should be the only one Assert.Equal(1, veadGroups.Count()); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("97"))); foreach (var veadGroup in veadGroups) { Assert.Equal(1, veadGroup.NumVeads); } }
public void WriteANbhd() { var outputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf"); var inputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerInput.vcf"); var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerOutput.vcf"); File.Delete(outputFilePath); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Somatic, AllowMultipleVcfLinesPerLoci = true }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>() { }, null); var reader = new AlleleReader(inputFilePath, true); //set up the original variants var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156); var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156); var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); //have to replace variants at positon 116380048 (we call two new MNVS here) var nbhd1 = new VcfNeighborhood(0, "chr2", vs1, vs2); var calledNbh1 = new CallableNeighborhood(nbhd1, new VariantCallingParameters()); //have to replace variants at positon 116380051 and 52 (we call one new MNV at 51) var nbhd2 = new VcfNeighborhood(0, "chr7", vs4, vs5); var calledNbh2 = new CallableNeighborhood(nbhd2, new VariantCallingParameters()); VcfMerger merger = new VcfMerger(reader); List <Tuple <CalledAllele, string> > alleleTuplesPastNbhd = new List <Tuple <CalledAllele, string> >(); calledNbh1.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant1.ReferencePosition, new List <CalledAllele> { originalVcfVariant1, originalVcfVariant2 } } }; calledNbh2.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant4.ReferencePosition, new List <CalledAllele> { originalVcfVariant4 } } }; alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd1.ReferenceName); alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh1); alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd2.ReferenceName); alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh2); merger.WriteRemainingVariants(writer, alleleTuplesPastNbhd); writer.Dispose(); var expectedLines = File.ReadLines(expectedFilePath).ToList(); var outputLines = File.ReadLines(outputFilePath).ToList(); Assert.Equal(expectedLines.Count(), outputLines.Count()); for (int i = 0; i < expectedLines.Count; i++) { Assert.Equal(expectedLines[i], outputLines[i]); } }
public static void CheckNeighborhoodVariants(List <VariantSite> expectedVariantSites, CallableNeighborhood neighborhood) { Assert.Equal(expectedVariantSites.Count, neighborhood.VcfVariantSites.Count); foreach (var expectedVariantSite in expectedVariantSites) { Assert.True(neighborhood.VcfVariantSites.Any(v => v.ReferenceName == expectedVariantSite.ReferenceName && v.VcfReferencePosition == expectedVariantSite.VcfReferencePosition && v.VcfReferenceAllele == expectedVariantSite.VcfReferenceAllele && v.VcfAlternateAllele == expectedVariantSite.VcfAlternateAllele)); } }
public static void CheckNeighborhoodVariants(List <VcfVariant> expectedVariants, CallableNeighborhood neighborhood) { var variants = expectedVariants.Select(expectedVariant => new VariantSite() { VcfReferencePosition = expectedVariant.ReferencePosition, ReferenceName = expectedVariant.ReferenceName, VcfReferenceAllele = expectedVariant.ReferenceAllele, VcfAlternateAllele = expectedVariant.VariantAlleles.First() }).ToList(); CheckNeighborhoodVariants(variants, neighborhood); }
public void WriteADiploidNbhd() { var outputDir = Path.Combine(TestPaths.LocalScratchDirectory, "MergerWriteADiploidNbhd"); var outputFilePath = Path.Combine(outputDir, "TinyDiploid.Phased.vcf"); var inputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploid.vcf"); var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploidOutput.vcf"); TestHelper.RecreateDirectory(outputDir); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chr22", 51304566), new Tuple <string, long>("chrX", 500) } }; var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.DiploidByThresholding, AllowMultipleVcfLinesPerLoci = false }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>() { }, null); var reader = new AlleleReader(inputFilePath, true); //set up the original variants var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 1, "A", "G", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 1, "A", "T", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "G", 1000, 156); var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "GTCT", 1000, 156); var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); //have to replace variants at positon 116380048 (we call two new MNVS here) var nbhd1 = new VcfNeighborhood(0, "chr1", vs1, vs2); var calledNbh1 = new CallableNeighborhood(nbhd1, new VariantCallingParameters()); VcfMerger merger = new VcfMerger(reader); List <Tuple <CalledAllele, string> > alleleTuplesPastNbhd = new List <Tuple <CalledAllele, string> >(); //we will just say, we called the variants that were in the origina vcf. Ie, we agree with it. calledNbh1.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant1.ReferencePosition, new List <CalledAllele> { originalVcfVariant1, originalVcfVariant2 } } }; //Realizes the first nbhd starts at chr1 . We have to do something with the first lines of the vcf (chr1 1 . A G,T) //so, alleleTuplesPastNbhd = chr1 1 . A G,T alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd1.ReferenceName); Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant1)); Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant2)); //This method writes everything up to the end of nbhd 1, //so "(chr1 1 . A G,T)" from the vcf and the variants scylla detected "(chr1 1 . A G,T)" need to be dealt with. //Since these 4 variants are actually the same two, we need to remove the vcf ones and only write the scylla ones. //Thn we peek into the vcf and see the next line is "chr22 1230237 . GTC G,GTCT", clearly outside nbh1. //so we write out everything we need for nbhd1, and save the peeked line alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh1); Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant4)); Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant5)); //now write out //chr22 1230237.GTC G,GTCT 50 DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US 1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2 //chrX 79.CG GTG,AA 50 DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US 1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2 merger.WriteRemainingVariants(writer, alleleTuplesPastNbhd); writer.Dispose(); var expectedLines = File.ReadLines(expectedFilePath).ToList(); var outputLines = File.ReadLines(outputFilePath).ToList(); Assert.Equal(expectedLines.Count(), outputLines.Count()); for (int i = 0; i < expectedLines.Count; i++) { Assert.Equal(expectedLines[i], outputLines[i]); } }
public void VarCallsBecomeRefsAndNulls() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var vcParams = new VariantCallingParameters(); vcParams.Validate(); var caller = new VariantCaller(vcParams, new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNeighbor1 = new CallableNeighborhood(nbhd, vcParams); callableNeighbor1.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }); callableNeighbor1.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; caller.CallMNVs(callableNeighbor1); caller.CallRefs(callableNeighbor1); var acceptedMNVs = callableNeighbor1.CalledVariants; var acceptedRefs = callableNeighbor1.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." }, { "DP", "1000" }, { "AD", "844" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up and there are refs remaining, we should output it as a ref. var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = new CalledAllele() }; callableNeighbor1.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord100 } }; caller.CallMNVs(callableNeighbor1); caller.CallRefs(callableNeighbor1); acceptedMNVs = callableNeighbor1.CalledVariants; acceptedRefs = callableNeighbor1.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." }, { "DP", "1000" }, { "AD", "744" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up all the way // we should output it as a null. var suckedUpRefRecord1000 = new SuckedUpRefRecord() { Counts = 1000, AlleleThatClaimedIt = new CalledAllele() }; callableNeighbor1.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord1000 } }; caller.CallMNVs(callableNeighbor1); caller.CallRefs(callableNeighbor1); acceptedMNVs = callableNeighbor1.CalledVariants; acceptedRefs = callableNeighbor1.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." }, { "DP", "1000" }, { "AD", "0" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); }
//this unit test was made after we found bug ScyllaLoosingRefCalls_PICS-723. //We had a 1/. GT reported when it should be 1/0. //The reason for this is that all the refs (the "0"s) got incorrectly sucked up. //Ie, MNV ACG-> AG claimed 50 refs, so we (incorrectly) subtracted 50 refs from it. //The bug is that the ref counts got subtractedfrom the exact same mnv that claimed them. // This should never happen, and was not the intent of the alg. // //The affected mehtod is: CreateMnvsFromClusters in VcfNbhd public void CreateMnvsFromClusters_TakeUpRefCount() { var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "ACG", "AT", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "TTTTTT", 1000, 200); // var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 123, "AC", "TT", 1000, 100); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "A", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200, ReferenceSupport = 350 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Mnv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "ACG", AlternateAllele = "AT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 300, ReferenceSupport = 350 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Insertion) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "AAAAA", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 250, ReferenceSupport = 350 }); //default behavior, nothing gets sucked up callableNeighborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; vs1.VcfReferencePosition = 123; var vead = new Vead("dummy", new VariantSite[] { vs1 }); var vg = new VeadGroup(vead); var fakeCluster = new Cluster("test", new List <VeadGroup>() { vg }); fakeCluster.ResetConsensus(); callableNeighborhood.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20); caller.CallMNVs(callableNeighborhood); caller.CallRefs(callableNeighborhood); var acceptedMNVs = callableNeighborhood.CalledVariants; var acceptedRefs = callableNeighborhood.CalledRefs; Assert.Equal(2, acceptedMNVs.Count); Assert.Equal(3, acceptedMNVs[123].Count); Assert.Equal(1, acceptedRefs.Count); //check the ref counts on all the MNVs. Nothing should be sucked up. Assert.Equal(350, acceptedMNVs[123][0].ReferenceSupport); // Previously: total depth - allele suport. overly simple for now) Assert.Equal(350, acceptedMNVs[123][1].ReferenceSupport); // Now: explicitly set ref support Assert.Equal(350, acceptedMNVs[123][2].ReferenceSupport); // // now variant 0 will suck up 100 ref calls: var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = callableNeighborhood.CandidateVariants[0] }; callableNeighborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 123, suckedUpRefRecord100 } }; callableNeighborhood.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20); caller.CallMNVs(callableNeighborhood); caller.CallRefs(callableNeighborhood); acceptedMNVs = callableNeighborhood.CalledVariants; acceptedRefs = callableNeighborhood.CalledRefs; //check the ref counts on all the MNVs. refs should only be taken up by the first one Assert.Equal(350, acceptedMNVs[123][0].ReferenceSupport); //Previously: total depth - allele suport. overly simple for now) //old result - has bug //Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport); // Previously: total depth - allele suport - sucked up ref) //Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport); // Now: explicitly set ref support //new result, fixed Assert.Equal(350 - 100, acceptedMNVs[123][1].ReferenceSupport); // refSupport - sucked up ref) Assert.Equal(350 - 100, acceptedMNVs[123][2].ReferenceSupport); // refSupport - sucked up ref) }
//this unit test was made after we found bug ScyllaShouldMergeClusters_PICS-1122. //We had an output vcf with the following lines //chr11 64577365 . C . 100 PASS DP=1429 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/0:1:1429:1429:0.00000:65:-100.0000:0.0592:0,0,0,0,0,0,0,0,0,0,0,0 //chr11 64577366 . A T 78 PASS DP = 559 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0 //chr11 64577366 . A T 78 PASS DP = 559 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0 //chr11 64577367 . G. 100 PASS DP = 1411 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/0:1:1411:1411:0.00000:65:-100.0000:0.0741:0,0,0,0,0,0,0,0,0,0,0,0 //The affected methods are "AddAcceptedPhasedVariant" and "AddRejectedPhasedVariant" //the new fix will merge the added variant, if its the same as a varaint that already exists public void AddAcceptedAndRejectedPhasedVariantTests() { //for this test we take three SNPs, two of which can be combined and 1 that cannot, and //we take three ref calls, two of which can be combined and 1 that cannot. //So 6 diff alleles go in, but only 4 should come out in the lists. var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 500, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 200); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); //variants: callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200, ReferenceSupport = 350, NoiseLevelApplied = 20 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "G", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "AG", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); //refs: callableNeighborhood.AddRejectedPhasedVariant( new CalledAllele(AlleleCategory.Reference) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = ".", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200, ReferenceSupport = 350, NoiseLevelApplied = 20 }); callableNeighborhood.AddRejectedPhasedVariant( new CalledAllele(AlleleCategory.Reference) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = ".", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); callableNeighborhood.AddRejectedPhasedVariant( new CalledAllele(AlleleCategory.Reference) { Chromosome = "chr1", ReferencePosition = 124, ReferenceAllele = "A", AlternateAllele = ".", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); //check results. //check we got the right number of results Assert.Equal(3, callableNeighborhood.CandidateVariants.Count); Assert.Equal(2, callableNeighborhood.Refs.Count); //check the snps did what we expected var combinedSnp = callableNeighborhood.CandidateVariants[0]; Assert.Equal(123, combinedSnp.ReferencePosition); Assert.Equal("chr1", combinedSnp.Chromosome); Assert.Equal("A", combinedSnp.ReferenceAllele); Assert.Equal("T", combinedSnp.AlternateAllele); Assert.Equal(200 + 300, combinedSnp.AlleleSupport); Assert.Equal(0, combinedSnp.NumNoCalls); Assert.Equal(100, combinedSnp.VariantQscore); Assert.Equal((1000 + 500) / 2, combinedSnp.TotalCoverage); Assert.Equal((350 + 50) / 2, combinedSnp.ReferenceSupport); Assert.Equal(AlleleCategory.Snv, combinedSnp.Type); Assert.Equal(20, combinedSnp.NoiseLevelApplied); //these values should not have changed var justAddedSnp = callableNeighborhood.CandidateVariants[1]; Assert.Equal(123, justAddedSnp.ReferencePosition); Assert.Equal("chr1", justAddedSnp.Chromosome); Assert.Equal("A", justAddedSnp.ReferenceAllele); Assert.Equal("G", justAddedSnp.AlternateAllele); Assert.Equal(300, justAddedSnp.AlleleSupport); Assert.Equal(0, justAddedSnp.NumNoCalls); Assert.Equal(20, justAddedSnp.VariantQscore); Assert.Equal(500, justAddedSnp.TotalCoverage); Assert.Equal(50, justAddedSnp.ReferenceSupport); Assert.Equal(AlleleCategory.Snv, justAddedSnp.Type); Assert.Equal(20, justAddedSnp.NoiseLevelApplied); }
public void GetMergedListOfVariants_LeaveUntouchedAsIs() { //chr7 55242464 . A G 6 LowSupport DP=287 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:286,1:287:0.00348:30:-7.4908:0.0304:0,0,0,0,0,1,56,17,49,56,69,40:4.294:0.000 //chr7 55242464 . AGGAATTAAGAGAAGC A 100 PASS DP=298 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:100:284,14:298:0.04698:30:-75.6792:0.0000:1,0,1,4,5,3,58,18,49,55,71,41:100.000:100.000 //chr7 55242481 . A T 6 LowSupport DP=306 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:305,1:306:0.00327:30:-7.4622:0.0556:0,0,0,0,0,1,63,20,54,52,69,48:3.669:0.000 //chr7 55242487 . C T 6 LowSupport DP=325 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:324,1:325:0.00308:30:-7.1283:0.0469:0,0,0,1,0,0,67,24,61,53,68,52:1.954:0.000 //chr7 55242489 . G T 6 LowSupport DP=327 GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ 0/1:6:326,1:327:0.00306:30:-7.0226:0.0411:0,0,1,0,0,0,71,23,60,54,67,52:2.177:0.000 var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr7", 55242464, "A", "G", 287, 1); originalVcfVariant1.ReferenceSupport = 286; var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 55242464, "AGGAATTAAGAGAAGC", "A", 298, 14); originalVcfVariant2.ReferenceSupport = 284; var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr7", 55242481, "A", "T", 306, 1); originalVcfVariant3.ReferenceSupport = 305; var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 55242487, "C", "T", 325, 1); originalVcfVariant4.ReferenceSupport = 324; var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 55242489, "G", "T", 327, 1); originalVcfVariant5.ReferenceSupport = 326; //#2mnv accepted: chr7 55242464 . AGGAATTAAGAGAAGC A //chr7 55242464 . AGGAATTAAGAGAAGC A 100 PASS DP=286 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:100:272,13:286:0.04545:30:-100.0000:0.3024:0,0,0,0,0,0,0,0,0,0,0,0 //#3mnv accepted: chr7 55242464 . AGGAATTAAGAGAAGCAA GAT. //chr7 55242464 . AGGAATTAAGAGAAGCAA GAT 6 PASS DP=293 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:6:226,1:293:0.00341:30:-100.0000:0.2854:0,0,0,0,0,0,0,0,0,0,0,0 var mnv1 = TestHelper.CreateDummyAllele("chr7", 55242464, "AGGAATTAAGAGAAGC", "A", 286, 13); mnv1.ReferenceSupport = 272; var mnv2 = TestHelper.CreateDummyAllele("chr7", 55242464, "AGGAATTAAGAGAAGCAA", "GAT", 293, 1); mnv2.ReferenceSupport = 226; //#4mnv accepted: chr7 55242487 . C T. var mnv3 = TestHelper.CreateDummyAllele("chr7", 55242487, "C", "T", 325, 1); mnv3.ReferenceSupport = 324; //#5mnv accepted: chr7 55242489 . G T. var mnv4 = TestHelper.CreateDummyAllele("chr7", 55242489, "G", "T", 327, 1); mnv4.ReferenceSupport = 326; var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs3 = new VariantSite((originalVcfVariant3)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); var nbhd1 = new VcfNeighborhood(0, "chr7", vs1, vs2); nbhd1.AddVariantSite(vs3); nbhd1.AddVariantSite(vs4); nbhd1.AddVariantSite(vs5); var calledNbhd = new CallableNeighborhood(nbhd1, new VariantCallingParameters()); calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; //Became ref //chr7 55242481 . A . 100 PASS DP=306 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/.:100:305:306:0.00327:30:-100.0000:0.0556:0,0,0,0,0,0,0,0,0,0,0,0 var var3AsRef = TestHelper.CreateDummyAllele("chr7", 55242481, "A", ".", 306, 0); calledNbhd.CalledRefs = new Dictionary <int, CalledAllele>() { { var3AsRef.ReferencePosition, var3AsRef } }; var origAlleles = new List <Tuple <CalledAllele, string> >(); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant1, "Variant1")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant2, "Variant2")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant3, "Variant3")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant4, "Variant4")); origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant5, "Variant5")); var mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(3, mergedList.Count(x => x.Item2 == "")); // Variant4 and 5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); //Should take new one if anything is changed // Pretend mnv3 had a ref base sucked up by other MNV mnv3.ReferenceSupport = originalVcfVariant4.ReferenceSupport - 1; calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(4, mergedList.Count(x => x.Item2 == "")); // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage // Variant 4 has changed in terms of ref support. Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); // Pretend mnv3 had coverage changed (not sure this is realistic, but to cover all bases adding test) mnv3.ReferenceSupport = originalVcfVariant4.ReferenceSupport; mnv3.TotalCoverage = originalVcfVariant4.TotalCoverage - 1; calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(4, mergedList.Count(x => x.Item2 == "")); // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage // Variant 4 has changed in terms of ref support. Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); // Pretend mnv3 had allele support changed (not sure this is realistic, but to cover all bases adding test) mnv3.TotalCoverage = originalVcfVariant4.TotalCoverage; mnv3.AlleleSupport = originalVcfVariant4.AlleleSupport - 1; calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > { { mnv1.ReferencePosition, new List <CalledAllele>() { mnv1, mnv2 } }, { mnv3.ReferencePosition, new List <CalledAllele>() { mnv3 } }, { mnv4.ReferencePosition, new List <CalledAllele>() { mnv4 } }, }; mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles); Assert.Equal(5, mergedList.Count); // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple. Assert.Equal(4, mergedList.Count(x => x.Item2 == "")); // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage // Variant 4 has changed in terms of ref support. Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4")); Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5")); }
public IEnumerable <VeadGroup> GetVeadGroups(CallableNeighborhood neighborhood) { var veadGroups = new Dictionary <string, VeadGroup>(); var neighbors = neighborhood.VcfVariantSites; var refName = neighbors.First().ReferenceName; _alignmentExtractor.Jump(refName); // keep reading the alignments while we're on the same reference sequence var veadMaker = new VeadFinder(_options); var debugLog = Path.Combine(_debugLogRoot, refName + "_" + neighborhood.Id + "_ReadsInNbhd.txt"); WriteToReadLog(debugLog, string.Join("\t", "ReadName", "used?", "IsFirstMate", "CigarData", "Read.Position")); Read read = new Read(); while (true) { if (!_alignmentExtractor.GetNextAlignment(read)) { break; // no more reads } if (_readfilter.IsClippedWithinNeighborhood(read, neighborhood)) { neighborhood.NumberClippedReads++; // continue } if (_readfilter.ShouldSkipRead(read, neighborhood)) { //WriteToReadLog(debugLog,(string.Join("\t", read.Name, "skipped", read.IsFirstMate, read.CigarData.ToString(), read.Position))); continue; } if (_readfilter.PastNeighborhood(read, neighborhood)) { //WriteToReadLog(debugLog,(string.Join("\t", read.Name, "past nbhd", read.IsFirstMate, read.CigarData.ToString(), read.Position))); break; } //Make a vead and add it to our list var readName = read.Name + "_"; if (read.IsFirstMate) { readName += "fwd_" + read.Position; } else { readName += "rev_" + read.Position; } WriteToReadLog(debugLog, (string.Join("\t", read.Name, "will use", read.IsFirstMate, read.CigarData.ToString(), read.Position, read.Sequence, string.Join(",", read.Qualities)))); //map from bases to ref position var vead = new Vead(readName, veadMaker.FindVariantResults(neighbors, read)); if (vead.SiteResults == null || !vead.SiteResults.Any()) { continue; } // Add vead to a veadgroup. var hash = vead.ToVariantSequence(); if (!veadGroups.ContainsKey(hash)) { veadGroups.Add(hash, new VeadGroup(vead)); } else { veadGroups[hash].AddSupport(vead); } } LogVeadGroupInfo(veadGroups.Values); return(veadGroups.Values); }
public void PastNeighborhoodTest() { var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters() { MinimumMapQuality = 20 }); // Scenario 1: neighborhood with 2 SNVs var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(15) { VcfReferenceAllele = "G", VcfAlternateAllele = "A" }, }, }; var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters()); var read1 = TestHelper.CreateRead("chr1", "ACGT", 6); // ends before neighborhood starts Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read1, callableNeighbor1)); var read2 = TestHelper.CreateRead("chr1", "ACGT", 8); // read partially covers neighborhood from left Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read2, callableNeighbor1)); var read3 = TestHelper.CreateRead("chr1", "ACGT", 11); // read enclosed in neighborhood Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read3, callableNeighbor1)); var read4 = TestHelper.CreateRead("chr1", "ACGT", 14); // read partially sticks out of neighborhood from right Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read4, callableNeighbor1)); var read5 = TestHelper.CreateRead("chr1", "ACGT", 15); // read starts on last variant site Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read5, callableNeighbor1)); var read6 = TestHelper.CreateRead("chr1", "ACGT", 16); // read starts right after neighborhood // Nima: Minimum lookahead is pos+1, so this is still not considered past neighborhood (maybe we should -1 but it doesn't really make a huge diff?) Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read6, callableNeighbor1)); var read7 = TestHelper.CreateRead("chr1", "ACGT", 17); // read starts after neighborhood lookahead Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read7, callableNeighbor1)); // Scenario 2: neighborhood with one SNV, and one insertion (should extend lookahead) var neighbor2 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(15) { VcfReferenceAllele = "G", VcfAlternateAllele = "GAAA" }, }, }; var callableNeighbor2 = new CallableNeighborhood(neighbor2, new VariantCallingParameters(), null); var read8 = TestHelper.CreateRead("chr1", "ACGT", 15); // read starts at the last variant site Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read8, callableNeighbor2)); var read9 = TestHelper.CreateRead("chr1", "ACGT", 16); // read starts after last variant position, but before lookahead Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read9, callableNeighbor2)); var read10 = TestHelper.CreateRead("chr1", "ACGT", 17); // read starts after last variant position, but before lookahead Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read10, callableNeighbor2)); var read11 = TestHelper.CreateRead("chr1", "ACGT", 18); // read starts after last variant position, but before lookahead Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read11, callableNeighbor2)); var read12 = TestHelper.CreateRead("chr1", "ACGT", 19); // read starts after last variant position, but before lookahead // Nima: Minimum lookahead is pos+4, so this is still not considered past neighborhood (maybe we should -1 but it doesn't really make a huge diff?) Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read12, callableNeighbor2)); var read13 = TestHelper.CreateRead("chr1", "ACGT", 20); // read starts after lookahead Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read13, callableNeighbor2)); // Scenario 3: neighborhood with one SNV, and one deletion (similar to Scenario 2) var neighbor3 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(15) { VcfReferenceAllele = "GAAA", VcfAlternateAllele = "G" }, }, }; var callableNeighbor3 = new CallableNeighborhood(neighbor3, new VariantCallingParameters(), null); var read14 = TestHelper.CreateRead("chr1", "ACGT", 18); // read starts after last variant position, but before lookahead Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read14, callableNeighbor3)); var read15 = TestHelper.CreateRead("chr1", "ACGT", 19); // read starts after last variant position, but before lookahead // Nima: Minimum lookahead is pos+4, so this is still not considered past neighborhood (maybe we should -1 but it doesn't really make a huge diff?) Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read15, callableNeighbor3)); var read16 = TestHelper.CreateRead("chr1", "ACGT", 20); // read starts after lookahead Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read16, callableNeighbor3)); // Scenario 4: long indel variant in the beginning of neighborhood can extend lookahead var neighbor4 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "ATTTTTTT" }, new VariantSite(15) { VcfReferenceAllele = "G", VcfAlternateAllele = "A" }, }, }; var callableNeighbor4 = new CallableNeighborhood(neighbor4, new VariantCallingParameters(), null); var read17 = TestHelper.CreateRead("chr1", "ACGT", 16); // read starts after last variant position, but before lookahead from first variant Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read17, callableNeighbor4)); var read18 = TestHelper.CreateRead("chr1", "ACGT", 17); // read starts after last variant position, but before lookahead from first variant Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read18, callableNeighbor4)); var read19 = TestHelper.CreateRead("chr1", "ACGT", 18); // read starts after last variant position, but before lookahead from first variant Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read19, callableNeighbor4)); var read20 = TestHelper.CreateRead("chr1", "ACGT", 20); // read starts after lookahead of first variant Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read20, callableNeighbor4)); }