public void VcfNeighborhood() { var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(123), new VariantSite(124)); Assert.Equal("NbhdNum0_chr1_123", nbhd.Id); Assert.Equal("chr1", nbhd.ReferenceName); }
private void ProcessNewNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, string referenceStringBetweenVariants) { var newNeighborhood = new VcfNeighborhood(_variantCallingParams, currentVariantSite.ReferenceName, lastVariantSite, currentVariantSite, referenceStringBetweenVariants); _neighborhoods.Add(newNeighborhood); }
private void ExecuteGroupingTest(List <Read> reads, List <int> expectedGroupMemberships, IEnumerable <Tuple <int, string, string> > variants) { var variantSites = new List <VariantSite>(); foreach (var variant in variants) { variantSites.Add(new VariantSite(variant.Item1) { VcfReferenceAllele = variant.Item2, VcfAlternateAllele = variant.Item3 }); } var alignmentExtractor = new MockAlignmentExtractor(reads); var veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); var vcfNeighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T") { VcfVariantSites = variantSites }; vcfNeighborhood.SetRangeOfInterest(); var veadGroups = veadSource.GetVeadGroups(vcfNeighborhood).ToList(); Assert.Equal(expectedGroupMemberships.Count, veadGroups.Count()); for (var i = 0; i < veadGroups.Count(); i++) { Assert.Equal(expectedGroupMemberships[i], veadGroups[i].NumVeads); } }
private void ProcessNeighborhood(VcfNeighborhood neighborhood, NeighborhoodClusterer clusterer, IEnumerable <VeadGroup> collapsedReads) { Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id); try { var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList()); if (clusters != null) { Logger.WriteToLog("Found " + clusters.Clusters.Length + " clusters in Nbhd " + neighborhood.Id); //tjd+ //Commenting out for speed. We currently never use these results //neighborhood.PhasingProbabiltiies = // VariantPhaser.GetPhasingProbabilities(neighborhood.VcfVariantSites, clusters); //tjd- } bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci; neighborhood.AddMnvsFromClusters(clusters.Clusters, _factory.Options.BamFilterParams.MinimumBaseCallQuality, _factory.Options.VariantCallingParams.MaximumVariantQScore, crushNbhdVariantsToSamePositon); neighborhood.SetGenotypesAndPruneExcessAlleles(); } catch (Exception ex) { Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id); Logger.WriteExceptionToLog(ex); } }
public void VcfNeighborhood() { var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(123), new VariantSite(124), "A"); Assert.Equal("NbhdNum0_chr1_123", nbhd.Id); Assert.Equal("chr1", nbhd.ReferenceName); }
private bool ShouldSkipRead(Read read, VcfNeighborhood neighborhood) { if (_options.RemoveDuplicates) { if (read.IsPcrDuplicate) { return(true); } } if (_options.OnlyUseProperPairs) { if (!read.IsProperPair) { return(true); } } if (read.MapQuality < _options.MinimumMapQuality) { return(true); } if (read.EndPosition < neighborhood.VcfVariantSites.First().VcfReferencePosition) { return(true); } return(false); }
public void GetOriginalVcfIndexes() { var originalVar1 = new Pisces.Domain.Models.Alleles.CalledAllele() { ReferencePosition = 1 }; var originalVar10 = new Pisces.Domain.Models.Alleles.CalledAllele() { ReferencePosition = 10 }; var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(123) { OriginalAlleleFromVcf = originalVar1 } , new VariantSite(123) { OriginalAlleleFromVcf = originalVar10 }, "T"); var originalVcfIndexes = nbhd.GetOriginalVcfVariants(); Assert.Equal(2, originalVcfIndexes.Count); Assert.Equal(1, originalVcfIndexes[0].ReferencePosition); Assert.Equal(10, originalVcfIndexes[1].ReferencePosition); }
public void GetOriginalVcfIndexes() { var originalVar1 = new CalledAllele() { ReferencePosition = 1 }; var originalVar10 = new CalledAllele() { ReferencePosition = 10 }; var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(123) { OriginalAlleleFromVcf = originalVar1 } , new VariantSite(123) { OriginalAlleleFromVcf = originalVar10 }); var originalVcfIndexes = new CallableNeighborhood(nbhd, new VariantCallingParameters()).GetOriginalVcfVariants(); Assert.Equal(2, originalVcfIndexes.Count); Assert.Equal(1, originalVcfIndexes[0].ReferencePosition); Assert.Equal(10, originalVcfIndexes[1].ReferencePosition); }
public void CallThroughAnEmptyNbhd() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref) var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(0, acceptedMNVs.Count); Assert.Equal(2, acceptedRefs.Count); Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype); Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype); Assert.Equal(123, acceptedRefs[123].ReferencePosition); Assert.Equal(124, acceptedRefs[124].ReferencePosition); }
public void SupplementSupportWithClippedReads() { // In this test we create reads that are either normal or clipped (identified by "clip_" in their name) // This test does not take cigar data into account. var mockClippedReadComparator = new Mock <IMNVClippedReadComparator>(); // Mock read comparator returns true if read name starts with c mockClippedReadComparator.Setup(x => x.DoesClippedReadSupportMNV(It.IsAny <Read>(), It.IsAny <CalledAllele>())) .Returns((Read read, CalledAllele allele) => read.Name[0] == 'c' ? true : false); var reads = new List <Read>(); reads.Add(CreateRead("chr1", "ACGT", 3, "read4")); reads.Add(CreateRead("chr1", "ACGT", 3, "clip_read4", matePosition: 3)); // +1 not in neighborhood, but still gets counted because mocked ClippedReadComparator reads.Add(CreateRead("chr1", "ACGT", 12, "read1", matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read2", matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read1", read2: true, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read_notmapped", isMapped: false, isProperPair: false, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read3", isProperPair: false, read2: true, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "read2", read2: true, matePosition: 10)); reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", read2: true, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read_notmapped", isMapped: false, isProperPair: false, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read3", isProperPair: false, read2: true, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", read2: true, matePosition: 10)); // +1 clipped read reads.Add(CreateRead("chr1", "ACGT", 30, "read5")); reads.Add(CreateRead("chr1", "ACGT", 30, "clip_read5", matePosition: 30)); // not in neighborhood, not counted var mockAlignmentExtractor = new MockAlignmentExtractor(reads); int qNoiseLevel = 20; int maxQscore = 100; int minMNVsize = 6; MNVSoftClipSupportFinder mnvClippedSupportFinder = new MNVSoftClipSupportFinder(mockAlignmentExtractor, mockClippedReadComparator.Object, qNoiseLevel, maxQscore, minMNVsize); var mnv1 = TestHelper.CreateDummyAllele("chr1", 10, "AAAAAA", "CCC", 2000, 50); var neighbor1 = new VcfNeighborhood(0, "chr", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C", ReferenceName = "chr" }, new VariantSite(25) { VcfReferenceAllele = "T", VcfAlternateAllele = "G", ReferenceName = "chr" }, }, }; var callableNbhd = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null); callableNbhd.AddAcceptedPhasedVariant(mnv1); Assert.Equal(50, callableNbhd.CandidateVariants[0].AlleleSupport); mnvClippedSupportFinder.SupplementSupportWithClippedReads(callableNbhd); Assert.Equal(57, callableNbhd.CandidateVariants[0].AlleleSupport); }
private void MakeAHangingNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, int numNbhdsSoFar) { //buffer this for our next call to "GetBatchOfNeighborhoods" . var newNeighborhood = new VcfNeighborhood(numNbhdsSoFar + _maxNumNbhdsInBatch, currentVariantSite.ReferenceName, lastVariantSite, currentVariantSite); _unfinshedNeighborhoods.Add(newNeighborhood); }
private void AddNewNeighborhoodToBatch(VariantSite lastVariantSite, VariantSite currentVariantSite, int numNbhdsSoFar) { int numNbhdInBatchSoFar = _nextBatchOfVcfNeighborhoods.Count; var newNeighborhood = new VcfNeighborhood(numNbhdsSoFar + numNbhdInBatchSoFar, currentVariantSite.ReferenceName, lastVariantSite, currentVariantSite); _nextBatchOfVcfNeighborhoods.Add(newNeighborhood); }
private void AddNewNeighborhoodToBatch(VariantSite lastVariantSite, VariantSite currentVariantSite, string referenceStringBetweenVariants, int numNbhdsSoFar) { int numNbhdInBatchSoFar = _nextBatchOfNeighborhoods.Count; var newNeighborhood = new VcfNeighborhood(_variantCallingParams, numNbhdsSoFar + numNbhdInBatchSoFar, currentVariantSite.ReferenceName, lastVariantSite, currentVariantSite, referenceStringBetweenVariants); _nextBatchOfNeighborhoods.Add(newNeighborhood); }
private void MakeAHangingNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, string referenceStringBetweenVariants, int numNbhdsSoFar) { //buffer this for our next call to "GetBatchOfNeighborhoods" . var newNeighborhood = new VcfNeighborhood(_variantCallingParams, numNbhdsSoFar + _maxNumNbhdsInBatch, currentVariantSite.ReferenceName, lastVariantSite, currentVariantSite, referenceStringBetweenVariants); _unfinshedNeighborhoods.Add(newNeighborhood); }
public void CheckAddingFilters() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var variantCallingParameters = new VariantCallingParameters(); //Set up filters so calls are sure to trigger them. variantCallingParameters.LowDepthFilter = 2000; variantCallingParameters.MinimumFrequencyFilter = 0.80F; variantCallingParameters.MinimumVariantQScoreFilter = 300; var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters()); var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }); nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth)); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency)); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore)); Assert.Equal(2, acceptedRefs.Count); Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth)); Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore)); //note reference calls dont win the "LowVariantFrequency" flag. }
public void AddMnvsFromClusters() { //TODO even with mock cluster this takes too much setting up. var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T"); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; nbhd.ReferenceSequence = "CGT"; var mockCluster = new Mock <ICluster>(); mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 }); var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads); mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults); mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>() { consensus }); nbhd.AddMnvsFromClusters(new List <ICluster>() { mockCluster.Object }, 20, 100); var allele = nbhd.CandidateVariants.First(); Assert.Equal(6, allele.TotalCoverage); Assert.Equal(6, allele.AlleleSupport); Assert.Equal("CGT", allele.Reference); Assert.Equal("AAA", allele.Alternate); var depths = nbhd.DepthAtSites(new List <ICluster>() { mockCluster.Object }); Assert.Equal(3, depths.Length); Assert.Equal(3, depths[0]); Assert.Equal(3, depths[1]); Assert.Equal(3, depths[2]); }
public void LastPositionIsNotMatch() { var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T"); var variantSite = new VariantSite(123); nbhd.AddVariantSite(variantSite, "ATCG"); var vsPositionMatch = new VariantSite(123); Assert.False(nbhd.LastPositionIsNotMatch(vsPositionMatch)); var vsPositionMismatch = new VariantSite(124); Assert.True(nbhd.LastPositionIsNotMatch(vsPositionMismatch)); }
public void AddVariantSite() { var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120) { VcfReferenceAllele = "A" }, new VariantSite(121)); Assert.Equal("NbhdNum0_chr1_120", nbhd.Id); var variantSite = new VariantSite(123); nbhd.AddVariantSite(variantSite); Assert.Equal(3, nbhd.VcfVariantSites.Count); Assert.Equal("NbhdNum0_chr1_120", nbhd.Id); }
public void LastPositionIsNotMatch() { var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121)); var variantSite = new VariantSite(123); nbhd.AddVariantSite(variantSite); var vsPositionMatch = new VariantSite(123); Assert.False(nbhd.LastPositionIsNotMatch(vsPositionMatch)); var vsPositionMismatch = new VariantSite(124); Assert.True(nbhd.LastPositionIsNotMatch(vsPositionMismatch)); }
public void AddVariantSite() { var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120) { VcfReferenceAllele = "A" }, new VariantSite(121), "T"); Assert.Equal("NbhdNum0_chr1_120", nbhd.Id); var variantSite = new VariantSite(123); nbhd.AddVariantSite(variantSite, "ATCG"); Assert.Equal("ATATCG", nbhd.ReferenceSequence); Assert.Equal(3, nbhd.VcfVariantSites.Count); Assert.Equal("NbhdNum0_chr1_120", nbhd.Id); }
public void SetDepthAtSites() { var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T"); var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] { { "C", "A" }, { "G", "A" }, { "T", "A" } }); var veads = new List <Vead> { vead, vead2, vead3 }; }
public void SetRangeOfInterestTests() { /// <summary> /// This method sets the NbdhReferenceSequenceSubstring, and the first/last positions of interest /// </summary> var refName = "chr"; //test with no Genome given var nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121)); Assert.Equal(-1, nbhd.FirstPositionOfInterest); Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf); Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead); var readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters()); Assert.Equal("RR", readyNbhd.NbhdReferenceSequenceSubstring); Assert.Equal(120, nbhd.FirstPositionOfInterest); Assert.Equal(121, nbhd.LastPositionOfInterestInVcf); Assert.Equal(122, nbhd.LastPositionOfInterestWithLookAhead); //test with a genome given var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta"); Genome genome = new Genome(genomePath, new List <string>() { refName }); ChrReference chrReference = genome.GetChrReference(refName); nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121)); Assert.Equal(-1, nbhd.FirstPositionOfInterest); Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf); Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead); readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters(), chrReference); Assert.Equal("TG", readyNbhd.NbhdReferenceSequenceSubstring); Assert.Equal(120, readyNbhd.FirstPositionOfInterest); Assert.Equal(121, readyNbhd.LastPositionOfInterestInVcf); Assert.Equal(122, readyNbhd.LastPositionOfInterestWithLookAhead); }
public void ShouldSkipReadTest() { var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters() { MinimumMapQuality = 20 }); var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000)) { VcfVariantSites = new List <VariantSite> { new VariantSite(10) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(15) { VcfReferenceAllele = "G", VcfAlternateAllele = "A" }, }, }; neighbor1.SetRangeOfInterest(); var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null); var read1 = TestHelper.CreateRead("chr1", "ACGT", 6); // Read ends before first variant Assert.Equal(true, nbhdReadFilter.ShouldSkipRead(read1, callableNeighbor1)); var read2 = TestHelper.CreateRead("chr1", "ACGT", 7); // Read covers 1 base of the nbhd Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read2, callableNeighbor1)); var read3 = TestHelper.CreateRead("chr1", "ACGT", 12); // Read partially covers neighborhood Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read3, callableNeighbor1)); var read4 = TestHelper.CreateRead("chr1", "ACGT", 16); // Read starts after neighborhood Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read4, callableNeighbor1)); // Nima: we can maybe add features to CreateRead to be able to create PCR duplicate, low mapQ, and non proper pair reads // but i think these conditions are somewhat trivial, and this may not be necessary. }
private void ProcessNeighborhood(VcfNeighborhood neighborhood) { Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id); try { var clusterer = _factory.CreateNeighborhoodClusterer(); var veadGroupSource = _factory.CreateVeadGroupSource(); var collapsedReads = veadGroupSource.GetVeadGroups(neighborhood); //(1) Get CLUSTERS var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList(), neighborhood.Id); //clean out vg, we dont need them any more veadGroupSource = null; collapsedReads = null; bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci; //(2) Turn clusters into MNV candidates neighborhood.CreateMnvsFromClusters(clusters.Clusters, _factory.Options.BamFilterParams.MinimumBaseCallQuality, _factory.Options.VariantCallingParams.MaximumVariantQScore, crushNbhdVariantsToSamePositon); neighborhood.SetGenotypesAndPruneExcessAlleles(); // (3) Variant call the candidates var variantCaller = _factory.CreateVariantCaller(); variantCaller.CallMNVs(neighborhood); variantCaller.CallRefs(neighborhood); //wait untill vcf is ready to write... } catch (Exception ex) { Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id); Logger.WriteExceptionToLog(ex); } }
private List <VcfNeighborhood> GetNeighborhoods(int expectedNumberOfThreads) { var neighborhoods = new List <VcfNeighborhood>(); for (var i = 0; i < expectedNumberOfThreads; i++) { var neighborhood = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T") { VcfVariantSites = new List <VariantSite> { new VariantSite(123) { ReferenceName = "chr1", OriginalAlleleFromVcf = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156) //orignally at index 0 }, } }; neighborhoods.Add(neighborhood); } return(neighborhoods); }
public void VarCallsBecomeRefsAndNulls() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var vcParams = new VariantCallingParameters(); vcParams.Validate(); var caller = new VariantCaller(vcParams, new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }); nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." }, { "DP", "1000" }, { "AD", "844" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up and there are refs remaining, we should output it as a ref. var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = new CalledAllele() }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord100 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." }, { "DP", "1000" }, { "AD", "744" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up all the way // we should output it as a null. var suckedUpRefRecord1000 = new SuckedUpRefRecord() { Counts = 1000, AlleleThatClaimedIt = new CalledAllele() }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." }, { "DP", "1000" }, { "AD", "0" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); }
public void SortSites() { var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120) { VcfReferenceAllele = "A" }, new VariantSite(121), "T"); var variantSite1 = new VariantSite(123); variantSite1.VcfReferencePosition = 140453137; variantSite1.VcfReferenceAllele = "C"; variantSite1.VcfAlternateAllele = "CGTA"; variantSite1.OriginalAlleleFromVcf = new Pisces.Domain.Models.Alleles.CalledAllele() { ReferencePosition = 7 }; nbhd.AddVariantSite(variantSite1, "ATCG"); var variantSite2 = new VariantSite(); variantSite2.VcfReferencePosition = 140453137; variantSite2.VcfReferenceAllele = "C"; variantSite2.VcfAlternateAllele = "T"; variantSite2.OriginalAlleleFromVcf = new Pisces.Domain.Models.Alleles.CalledAllele() { ReferencePosition = 8 }; nbhd.AddVariantSite(variantSite2, ""); var variantSite3 = new VariantSite(); variantSite3.VcfReferencePosition = 140453130; variantSite3.VcfReferenceAllele = "C"; variantSite3.VcfAlternateAllele = "T"; variantSite3.OriginalAlleleFromVcf = new Pisces.Domain.Models.Alleles.CalledAllele() { ReferencePosition = 9 }; nbhd.AddVariantSite(variantSite3, ""); Assert.Equal(5, nbhd.VcfVariantSites.Count); Assert.Equal(120, nbhd.VcfVariantSites[0].VcfReferencePosition); Assert.Equal(120, nbhd.VcfVariantSites[0].TrueFirstBaseOfDiff); Assert.Equal("A", nbhd.VcfVariantSites[0].VcfReferenceAllele); Assert.Equal("N", nbhd.VcfVariantSites[0].VcfAlternateAllele); Assert.Equal(121, nbhd.VcfVariantSites[1].VcfReferencePosition); Assert.Equal(121, nbhd.VcfVariantSites[1].TrueFirstBaseOfDiff); Assert.Equal("N", nbhd.VcfVariantSites[1].VcfReferenceAllele); Assert.Equal("N", nbhd.VcfVariantSites[1].VcfAlternateAllele); Assert.Equal(140453137, nbhd.VcfVariantSites[2].VcfReferencePosition); Assert.Equal(140453138, nbhd.VcfVariantSites[2].TrueFirstBaseOfDiff); Assert.Equal("C", nbhd.VcfVariantSites[2].VcfReferenceAllele); Assert.Equal("CGTA", nbhd.VcfVariantSites[2].VcfAlternateAllele); Assert.Equal(140453137, nbhd.VcfVariantSites[3].VcfReferencePosition); Assert.Equal(140453137, nbhd.VcfVariantSites[3].TrueFirstBaseOfDiff); Assert.Equal("C", nbhd.VcfVariantSites[3].VcfReferenceAllele); Assert.Equal("T", nbhd.VcfVariantSites[3].VcfAlternateAllele); Assert.Equal(140453130, nbhd.VcfVariantSites[4].VcfReferencePosition); Assert.Equal(140453130, nbhd.VcfVariantSites[4].TrueFirstBaseOfDiff); Assert.Equal("C", nbhd.VcfVariantSites[4].VcfReferenceAllele); Assert.Equal("T", nbhd.VcfVariantSites[4].VcfAlternateAllele); nbhd.OrderVariantSitesByFirstTrueStartPosition(); Assert.Equal(120, nbhd.VcfVariantSites[0].VcfReferencePosition); Assert.Equal(120, nbhd.VcfVariantSites[0].TrueFirstBaseOfDiff); Assert.Equal("A", nbhd.VcfVariantSites[0].VcfReferenceAllele); Assert.Equal("N", nbhd.VcfVariantSites[0].VcfAlternateAllele); Assert.Equal(121, nbhd.VcfVariantSites[1].VcfReferencePosition); Assert.Equal(121, nbhd.VcfVariantSites[1].TrueFirstBaseOfDiff); Assert.Equal("N", nbhd.VcfVariantSites[1].VcfReferenceAllele); Assert.Equal("N", nbhd.VcfVariantSites[1].VcfAlternateAllele); Assert.Equal(140453130, nbhd.VcfVariantSites[2].VcfReferencePosition); Assert.Equal(140453130, nbhd.VcfVariantSites[2].TrueFirstBaseOfDiff); Assert.Equal("C", nbhd.VcfVariantSites[2].VcfReferenceAllele); Assert.Equal("T", nbhd.VcfVariantSites[2].VcfAlternateAllele); Assert.Equal(7, nbhd.VcfVariantSites[2].OriginalAlleleFromVcf.ReferencePosition); Assert.Equal(140453137, nbhd.VcfVariantSites[3].VcfReferencePosition); Assert.Equal(140453137, nbhd.VcfVariantSites[3].TrueFirstBaseOfDiff); Assert.Equal("C", nbhd.VcfVariantSites[3].VcfReferenceAllele); Assert.Equal("T", nbhd.VcfVariantSites[3].VcfAlternateAllele); Assert.Equal(8, nbhd.VcfVariantSites[3].OriginalAlleleFromVcf.ReferencePosition); Assert.Equal(140453137, nbhd.VcfVariantSites[4].VcfReferencePosition); Assert.Equal(140453138, nbhd.VcfVariantSites[4].TrueFirstBaseOfDiff); Assert.Equal("C", nbhd.VcfVariantSites[4].VcfReferenceAllele); Assert.Equal("CGTA", nbhd.VcfVariantSites[4].VcfAlternateAllele); Assert.Equal(9, nbhd.VcfVariantSites[4].OriginalAlleleFromVcf.ReferencePosition); }
public void GetVeads() { var vcfNeighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(10000), new VariantSite(200000), "T") { VcfVariantSites = new List <VariantSite> { new VariantSite(100) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(400) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(505) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, new VariantSite(703) { VcfReferenceAllele = "A", VcfAlternateAllele = "T" }, new VariantSite(800) { VcfReferenceAllele = "A", VcfAlternateAllele = "C" }, } }; var reads = new List <Read>(); reads.Add(CreateRead("chr1", "ACGT", 10)); // Before neighborhood reads.Add(CreateRead("chr1", "ACGT", 96)); // Ends right before neighborhood's first variant site reads.Add(CreateRead("chr1", "ACGT", 100)); // Match (100) reads.Add(CreateRead("chr1", "ACGT", 300)); // Within neighborhood but no VariantSite reads.Add(CreateRead("chr1", "ACGT", 400, qualityForAll: 19)); // Within neighbhorhood but low quals reads.Add(CreateRead("chr1", "ACGT", 500)); // Within neighborhood but no VariantSite (ends right before 505) reads.Add(CreateRead("chr1", "ACGT", 700)); // Match (703) reads.Add(CreateRead("chr1", "ACGT", 800)); // Match (800) reads.Add(CreateRead("chr1", "ACGT", 805)); // Past neighborhood reads.Add(CreateRead("chr1", "ACGT", 900)); // Past neighborhood reads.Add(CreateRead("chr2", "ACGT", 100)); // Wrong chromosome vcfNeighborhood.SetRangeOfInterest(); var alignmentExtractor = new MockAlignmentExtractor(reads); var veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); var veadGroups = veadSource.GetVeadGroups(vcfNeighborhood); // Collect all reads that could relate to the neighborhood // - Skip anything that has quality less than MinimumMapQuality // - Skip anything that ends before neighborhood begins // - Stop collecting once we've passed the end of the neighborhood // We should have collected the reads at 100, 700, and 800. Assert.Equal(801, vcfNeighborhood.LastPositionOfInterestWithLookAhead); Assert.Equal(3, veadGroups.Count()); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900"))); foreach (var veadGroup in veadGroups) { Assert.Equal(1, veadGroup.NumVeads); } vcfNeighborhood.VcfVariantSites.Add( new VariantSite(790) { VcfReferenceAllele = "ACAGTGAAAGACTTGTGAC", VcfAlternateAllele = "C" }); vcfNeighborhood.SetRangeOfInterest(); Assert.Equal(809, vcfNeighborhood.LastPositionOfInterestWithLookAhead); alignmentExtractor = new MockAlignmentExtractor(reads); veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); veadGroups = veadSource.GetVeadGroups(vcfNeighborhood); Assert.Equal(3, veadGroups.Count()); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700"))); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805"))); Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900"))); // Boundary case - read ends exactly at neighborhood's first variant site reads = new List <Read>(); reads.Add(CreateRead("chr1", "ACGT", 10)); // Before neighborhood reads.Add(CreateRead("chr1", "ACGT", 96)); // Ends right before neighborhood's first variant site reads.Add(CreateRead("chr1", "ACGT", 97)); // Ends exactly at neighborhood's first variant site alignmentExtractor = new MockAlignmentExtractor(reads); veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters() { MinimumMapQuality = 20 }, false, ""); veadGroups = veadSource.GetVeadGroups(vcfNeighborhood); // The veadgroup for 97 should be the only one Assert.Equal(1, veadGroups.Count()); Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("97"))); foreach (var veadGroup in veadGroups) { Assert.Equal(1, veadGroup.NumVeads); } }
public void CallAVariantInANewLocation() { //set up the original variants var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var vs3 = new VariantSite(originalVcfVariant3); var vs4 = new VariantSite(originalVcfVariant4); var vcParams = new VariantCallingParameters(); vcParams.Validate(); var caller = new VariantCaller(vcParams, new BamFilterParameters()); var nbhd = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, ""); nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not going to get used for phasing. Sps it is a variant that failed filters. nbhd.SetRangeOfInterest(); //now stage one candidate MNV: var newMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 129, ReferenceAllele = "A", AlternateAllele = "TT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }; nbhd.AddAcceptedPhasedVariant(newMNV); var suckedUpRefRecord1000 = new SuckedUpRefRecord() { Counts = 1000, AlleleThatClaimedIt = new CalledAllele() }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[129].Count); Assert.Equal(3, acceptedRefs.Count); VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]); }
public void WriteANbhd() { var outputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf"); var inputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerInput.vcf"); var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerOutput.vcf"); File.Delete(outputFilePath); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var config = new VcfWriterConfig { DepthFilterThreshold = 500, VariantQualityFilterThreshold = 30, FrequencyFilterThreshold = 0.007f, ShouldOutputNoCallFraction = true, ShouldOutputStrandBiasAndNoiseLevel = true, EstimatedBaseCallQuality = 23, PloidyModel = PloidyModel.Somatic, AllowMultipleVcfLinesPerLoci = true }; var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>() { }, null); var reader = new VcfReader(inputFilePath, true); //set up the original variants var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156); var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156); var vs1 = new VariantSite((originalVcfVariant1)); var vs2 = new VariantSite((originalVcfVariant2)); var vs4 = new VariantSite((originalVcfVariant4)); var vs5 = new VariantSite((originalVcfVariant5)); //have to replace variants at positon 116380048 (we call two new MNVS here) var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr2", vs1, vs2, ""); nbhd1.SetRangeOfInterest(); //have to replace variants at positon 116380051 and 52 (we call one new MNV at 51) var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr7", vs4, vs5, ""); nbhd2.SetRangeOfInterest(); VcfMerger merger = new VcfMerger(reader); List <CalledAllele> allelesPastNbh = new List <CalledAllele>(); nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant1.ReferencePosition, new List <CalledAllele> { originalVcfVariant1, originalVcfVariant2 } } }; nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > { { originalVcfVariant4.ReferencePosition, new List <CalledAllele> { originalVcfVariant4 } } }; allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName); allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh); allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName); allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh); merger.WriteRemainingVariants(writer, allelesPastNbh); writer.Dispose(); var expectedLines = File.ReadLines(expectedFilePath).ToList(); var outputLines = File.ReadLines(outputFilePath).ToList(); Assert.Equal(expectedLines.Count(), outputLines.Count()); for (int i = 0; i < expectedLines.Count; i++) { Assert.Equal(expectedLines[i], outputLines[i]); } }