public static void TestExceptionThrownForUnclippedAlignment() { var refseq = "ACAATATA"; var queryseq = "ACAATAT-"; var r = new Sequence(DnaAlphabet.Instance, refseq); var q = new Sequence(DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment(r, q); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true)); refseq = "AAACAATATA"; queryseq = "AA-CAATATA"; r = new Sequence(DnaAlphabet.Instance, refseq); q = new Sequence(DnaAlphabet.Instance, queryseq); aln = new PairwiseSequenceAlignment(r, q); pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true)); }
public void CallThroughAnEmptyNbhd() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref) var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(0, acceptedMNVs.Count); Assert.Equal(2, acceptedRefs.Count); Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype); Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype); Assert.Equal(123, acceptedRefs[123].ReferencePosition); Assert.Equal(124, acceptedRefs[124].ReferencePosition); }
public static void Test1BPDeletionCall() { string seq1seq = "ATACCCCTT"; string seq2seq = "ATA-CCCTT".Replace("-", String.Empty); int[] seq2qual = new int[] { 30, 30, 30, 2, 30, 30, 30, 30 }; var refseq = new Sequence(AmbiguousDnaAlphabet.Instance, seq1seq, false); var query = new Sequence(AmbiguousDnaAlphabet.Instance, seq2seq, false); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(refseq, query).First(); // Need to add in the QV Values. ConvertAlignedSequenceToQualSeq(aln, seq2qual); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(variants.Count, 1); var variant = variants.First(); Assert.AreEqual(2, variant.QV); Assert.AreEqual(2, variant.StartPosition); Assert.AreEqual(VariantType.INDEL, variant.Type); var vi = variant as IndelVariant; Assert.AreEqual("C", vi.InsertedOrDeletedBases); Assert.AreEqual('C', vi.HomopolymerBase); Assert.AreEqual(4, vi.HomopolymerLengthInReference); Assert.AreEqual(true, vi.InHomopolymer); Assert.AreEqual(vi.InsertionOrDeletion, IndelType.Deletion); }
public static void TestTrickyQVInversions() { // This will be hard because normally flip the QV value for a homopolymer, but in this case we won't. // Note the whole notion of flipping is poorly defined. string seq1seq = "ATTGC"; string seq2seq = "ATAGC"; int[] seq2qual = new int[] { 30, 30, 2, 30, 30 }; var refseq = new Sequence(DnaAlphabet.Instance, seq1seq); var query = new Sequence(DnaAlphabet.Instance, seq2seq); var s1rc = refseq.GetReverseComplementedSequence(); var s2rc = query.GetReverseComplementedSequence(); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(s1rc, s2rc).First(); VariantCallTests.ConvertAlignedSequenceToQualSeq(aln, seq2qual.Reverse().ToArray()); aln.PairwiseAlignedSequences [0].Sequences [1].MarkAsReverseComplement(); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(1, variants.Count); var variant = variants.First(); Assert.AreEqual(VariantType.SNP, variant.Type); Assert.AreEqual(2, variant.QV); var vs = variant as SNPVariant; Assert.AreEqual('T', vs.AltBP); Assert.AreEqual('A', vs.RefBP); }
public static void TestSNPCallAtStart() { string seq1seq = "CTCCCCCTT"; string seq2seq = "TTCCCCCTT"; int[] seq2qual = new int[] { 10, 30, 30, 30, 5, 3, 30, 30, 10 }; var refseq = new Sequence(DnaAlphabet.Instance, seq1seq); var query = new Sequence(DnaAlphabet.Instance, seq2seq); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(refseq, query).First(); ConvertAlignedSequenceToQualSeq(aln, seq2qual); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(variants.Count, 1); var variant = variants.First(); Assert.AreEqual(10, variant.QV); Assert.AreEqual(0, variant.StartPosition); Assert.AreEqual(variant.Type, VariantType.SNP); var vi = variant as SNPVariant; Assert.AreEqual(1, vi.Length); Assert.AreEqual('T', vi.AltBP); Assert.AreEqual('C', vi.RefBP); Assert.AreEqual(VariantType.SNP, vi.Type); Assert.AreEqual(true, vi.AtEndOfAlignment); }
public void CheckAddingFilters() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var variantCallingParameters = new VariantCallingParameters(); //Set up filters so calls are sure to trigger them. variantCallingParameters.LowDepthFilter = 2000; variantCallingParameters.MinimumFrequencyFilter = 0.80F; variantCallingParameters.MinimumVariantQScoreFilter = 300; var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters()); var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }); nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth)); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency)); Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore)); Assert.Equal(2, acceptedRefs.Count); Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth)); Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore)); //note reference calls dont win the "LowVariantFrequency" flag. }
public static void TestLeftAlignmentStep() { var refseq = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT"; var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT"; var r = new Sequence(DnaAlphabet.Instance, refseq); var q = new Sequence(DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment(r, q); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add(pas); var tpl = VariantCaller.LeftAlignIndelsAndCallVariants(aln, true); // Check the left alignment aln = tpl.Item1 as PairwiseSequenceAlignment; var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString(); var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString(); var exprefseq = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT"; var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT"; Assert.AreEqual(exprefseq, lar); Assert.AreEqual(expqueryseq, laq); // And it's hard, so we might as well check the variants var variants = tpl.Item2; Assert.AreEqual(3, variants.Count); string[] bases = new string[] { "A", "GCGC", "TA" }; char[] hpbases = new char[] { 'A', 'G', 'T' }; bool[] inHp = new bool[] { true, false, false }; int[] lengths = new int[] { 1, 4, 2 }; int[] starts = new int[] { 4, 8, 24 }; IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion }; for (int i = 0; i < 3; i++) { Assert.AreEqual(VariantType.INDEL, variants [i].Type); var vi = variants [i] as IndelVariant; Assert.AreEqual(hpbases[i], vi.HomopolymerBase); Assert.AreEqual(starts [i], vi.StartPosition); Assert.AreEqual(lengths [i], vi.Length); Assert.AreEqual(bases [i], vi.InsertedOrDeletedBases); Assert.AreEqual(inHp [i], vi.InHomopolymer); Assert.AreEqual(types [i], vi.InsertionOrDeletion); } }
public void TestGetGtLogLikelihoodScore() { var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory(); var copyNumberModel = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200, meanCoverage: 100, diploidAlleleMeanCounts: 50.0); var simulatedCn = 3; var gtModelCounts = PedigreeInfo.GeneratePhasedGenotype(numCnStates: 5).Where(gt => gt.TotalCopyNumber == simulatedCn) .Select(gt => gt.PhasedGenotype).ToList(); var gtObservedCounts = new Balleles(new List <Ballele> { new Ballele(1, 1, 73), new Ballele(100, 2, 74), new Ballele(200, 1, 76), new Ballele(300, 0, 74), new Ballele(400, 1, 75), new Ballele(500, 2, 74) }); var gt0_3 = new PhasedGenotype(3, 0); int?selectedGtState = null; // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3] double logLikelihoodScore = VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState, copyNumberModel); Assert.Equal(gtModelCounts.IndexOf(gt0_3), selectedGtState); gtObservedCounts = new Balleles(new List <Ballele> { new Ballele(1, 23, 53), new Ballele(100, 22, 54), new Ballele(200, 25, 46), new Ballele(300, 24, 50), new Ballele(400, 26, 51), new Ballele(500, 24, 51) }); var gt1_2 = new PhasedGenotype(2, 1); selectedGtState = null; // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3] logLikelihoodScore = VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState, copyNumberModel); Assert.Equal(gtModelCounts.IndexOf(gt1_2), selectedGtState); }
public static void TestReverseComplement1BPIndelCall() { string seq1seq = "ATACCCCTTGCGC"; string seq2seq = "ATA-CCCTTGCGC".Replace("-", String.Empty); int[] seq2qual = new int[] { 30, 30, 30, 2, 30, 30, 30, 30, 30, 30, 30, 30 }; var refseq = new Sequence(DnaAlphabet.Instance, seq1seq); var query = new Sequence(DnaAlphabet.Instance, seq2seq); var s1rc = refseq.GetReverseComplementedSequence(); var s2rc = query.GetReverseComplementedSequence(); NeedlemanWunschAligner aligner = new NeedlemanWunschAligner(); var aln = aligner.Align(s1rc, s2rc).First(); VariantCallTests.ConvertAlignedSequenceToQualSeq(aln, seq2qual.Reverse().ToArray()); aln.PairwiseAlignedSequences [0].Sequences [1].MarkAsReverseComplement(); var variants = VariantCaller.CallVariants(aln); Assert.AreEqual(variants.Count, 1); var variant = variants.First(); Assert.AreEqual(2, variant.QV); Assert.AreEqual(5, variant.StartPosition); Assert.AreEqual(VariantType.INDEL, variant.Type); var vi = variant as IndelVariant; Assert.AreEqual(IndelType.Deletion, vi.InsertionOrDeletion); Assert.AreEqual('G', vi.HomopolymerBase); Assert.AreEqual(1, vi.Length); Assert.AreEqual(4, vi.HomopolymerLengthInReference); Assert.AreEqual(true, vi.InHomopolymer); Assert.AreEqual("G", vi.InsertedOrDeletedBases); Assert.AreEqual(false, vi.AtEndOfAlignment); Assert.AreEqual(6, vi.EndPosition); }
public void VarCallsBecomeRefsAndNulls() { var originalVcfVariant = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant); var vs2 = new VariantSite(originalVcfVariant2); var vcParams = new VariantCallingParameters(); vcParams.Validate(); var caller = new VariantCaller(vcParams, new BamFilterParameters()); //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls. //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference. //So, we can still make a confident ref call. var nbhd = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }); nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." }, { "DP", "1000" }, { "AD", "844" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up and there are refs remaining, we should output it as a ref. var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = new CalledAllele() }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord100 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); vcfVariant2asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." }, { "DP", "1000" }, { "AD", "744" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]); // If one has been sucked up all the way // we should output it as a null. var suckedUpRefRecord1000 = new SuckedUpRefRecord() { Counts = 1000, AlleleThatClaimedIt = new CalledAllele() }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[123].Count); Assert.Equal(2, acceptedRefs.Count); var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." }, { "DP", "1000" }, { "AD", "0" } } }, }; VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); }
public void CallAVariantInANewLocation() { //set up the original variants var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var vs3 = new VariantSite(originalVcfVariant3); var vs4 = new VariantSite(originalVcfVariant4); var vcParams = new VariantCallingParameters(); vcParams.Validate(); var caller = new VariantCaller(vcParams, new BamFilterParameters()); var nbhd = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, ""); nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not going to get used for phasing. Sps it is a variant that failed filters. nbhd.SetRangeOfInterest(); //now stage one candidate MNV: var newMNV = new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 129, ReferenceAllele = "A", AlternateAllele = "TT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 500 }; nbhd.AddAcceptedPhasedVariant(newMNV); var suckedUpRefRecord1000 = new SuckedUpRefRecord() { Counts = 1000, AlleleThatClaimedIt = new CalledAllele() }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 124, suckedUpRefRecord1000 } }; caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; var vcfVariant0asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 123, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." } } }, }; var vcfVariant3asRef = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 234, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "0/." } } }, }; var vcfVariant2asNull = new VcfVariant() { ReferenceName = "chr1", ReferencePosition = 124, ReferenceAllele = "A", VariantAlleles = new[] { "." }, Genotypes = new List <Dictionary <string, string> >() { new Dictionary <string, string>() { { "GT", "./." } } }, }; Assert.Equal(1, acceptedMNVs.Count); Assert.Equal(1, acceptedMNVs[129].Count); Assert.Equal(3, acceptedRefs.Count); VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]); VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]); VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]); VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]); }
//this unit test was made after we found bug ScyllaLoosingRefCalls_PICS-723. //We had a 1/. GT reported when it should be 1/0. //The reason for this is that all the refs (the "0"s) got incorrectly sucked up. //Ie, MNV ACG-> AG claimed 50 refs, so we (incorrectly) subtracted 50 refs from it. //The bug is that the ref counts got subtractedfrom the exact same mnv that claimed them. // This should never happen, and was not the intent of the alg. // //The affected mehtod is: CreateMnvsFromClusters in VcfNbhd public void CreateMnvsFromClusters_TakeUpRefCount() { var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "ACG", "AT", 1000, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "TTTTTT", 1000, 200); var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 123, "AC", "TT", 1000, 100); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, ""); nbhd.SetRangeOfInterest(); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200 }); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Mnv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "ACG", AlternateAllele = "AT", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 300 }); nbhd.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Insertion) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "AAAAA", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 250 }); //default behavior, nothing gets sucked up nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { }; vs1.VcfReferencePosition = 123; var vead = new Vead("dummy", new VariantSite[] { vs1 }); var vg = new VeadGroup(vead); var fakeCluster = new Cluster("test", new List <VeadGroup>() { vg }); fakeCluster.ResetConsensus(); nbhd.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20, 100); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); var acceptedMNVs = nbhd.CalledVariants; var acceptedRefs = nbhd.CalledRefs; Assert.Equal(2, acceptedMNVs.Count); Assert.Equal(3, acceptedMNVs[123].Count); Assert.Equal(1, acceptedRefs.Count); //check the ref counts on all the MNVs. Nothing should be sucked up. Assert.Equal(1000 - 200, acceptedMNVs[123][0].ReferenceSupport); //total depth - allele suport. overly simple for now) Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport); //total depth - allele suport. overly simple for now) Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport); //total depth - allele suport. overly simple for now) // now variant 0 will suck up 100 ref calls: var suckedUpRefRecord100 = new SuckedUpRefRecord() { Counts = 100, AlleleThatClaimedIt = nbhd.CandidateVariants[0] }; nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>() { { 123, suckedUpRefRecord100 } }; nbhd.CreateMnvsFromClusters(new List <Cluster> { fakeCluster }, 20, 100); caller.CallMNVs(nbhd); caller.CallRefs(nbhd); acceptedMNVs = nbhd.CalledVariants; acceptedRefs = nbhd.CalledRefs; //check the ref counts on all the MNVs. refs should only be taken up by the first one Assert.Equal(1000 - 200, acceptedMNVs[123][0].ReferenceSupport); //total depth - allele suport. overly simple for now) //old result - has bug //Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport); //total depth - allele suport - sucked up ref) //Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport); //total depth - allele suport - sucked up ref) //new result, fixed Assert.Equal(1000 - 300 - 100, acceptedMNVs[123][1].ReferenceSupport); //total depth - allele suport - sucked up ref) Assert.Equal(1000 - 250 - 100, acceptedMNVs[123][2].ReferenceSupport); //total depth - allele suport - sucked up ref) }
//this unit test was made after we found bug ScyllaShouldMergeClusters_PICS-1122. //We had an output vcf with the following lines //chr11 64577365 . C . 100 PASS DP=1429 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/0:1:1429:1429:0.00000:65:-100.0000:0.0592:0,0,0,0,0,0,0,0,0,0,0,0 //chr11 64577366 . A T 78 PASS DP = 559 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0 //chr11 64577366 . A T 78 PASS DP = 559 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0 //chr11 64577367 . G. 100 PASS DP = 1411 GT:GQ:AD:DP:VF:NL:SB:NC:US 0/0:1:1411:1411:0.00000:65:-100.0000:0.0741:0,0,0,0,0,0,0,0,0,0,0,0 //The affected methods are "AddAcceptedPhasedVariant" and "AddRejectedPhasedVariant" //the new fix will merge the added variant, if its the same as a varaint that already exists public void AddAcceptedAndRejectedPhasedVariantTests() { //for this test we take three SNPs, two of which can be combined and 1 that cannot, and //we take three ref calls, two of which can be combined and 1 that cannot. //So 6 diff alleles go in, but only 4 should come out in the lists. var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 500, 156); var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 200); var vs1 = new VariantSite(originalVcfVariant1); var vs2 = new VariantSite(originalVcfVariant2); var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters()); var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2); var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters()); //variants: callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200, ReferenceSupport = 350, NoiseLevelApplied = 20 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "T", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "G", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); callableNeighborhood.AddAcceptedPhasedVariant( new CalledAllele(AlleleCategory.Snv) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = "AG", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); //refs: callableNeighborhood.AddRejectedPhasedVariant( new CalledAllele(AlleleCategory.Reference) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = ".", VariantQscore = 100, TotalCoverage = 1000, AlleleSupport = 200, ReferenceSupport = 350, NoiseLevelApplied = 20 }); callableNeighborhood.AddRejectedPhasedVariant( new CalledAllele(AlleleCategory.Reference) { Chromosome = "chr1", ReferencePosition = 123, ReferenceAllele = "A", AlternateAllele = ".", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); callableNeighborhood.AddRejectedPhasedVariant( new CalledAllele(AlleleCategory.Reference) { Chromosome = "chr1", ReferencePosition = 124, ReferenceAllele = "A", AlternateAllele = ".", VariantQscore = 20, TotalCoverage = 500, AlleleSupport = 300, ReferenceSupport = 50, NoiseLevelApplied = 20 }); //check results. //check we got the right number of results Assert.Equal(3, callableNeighborhood.CandidateVariants.Count); Assert.Equal(2, callableNeighborhood.Refs.Count); //check the snps did what we expected var combinedSnp = callableNeighborhood.CandidateVariants[0]; Assert.Equal(123, combinedSnp.ReferencePosition); Assert.Equal("chr1", combinedSnp.Chromosome); Assert.Equal("A", combinedSnp.ReferenceAllele); Assert.Equal("T", combinedSnp.AlternateAllele); Assert.Equal(200 + 300, combinedSnp.AlleleSupport); Assert.Equal(0, combinedSnp.NumNoCalls); Assert.Equal(100, combinedSnp.VariantQscore); Assert.Equal((1000 + 500) / 2, combinedSnp.TotalCoverage); Assert.Equal((350 + 50) / 2, combinedSnp.ReferenceSupport); Assert.Equal(AlleleCategory.Snv, combinedSnp.Type); Assert.Equal(20, combinedSnp.NoiseLevelApplied); //these values should not have changed var justAddedSnp = callableNeighborhood.CandidateVariants[1]; Assert.Equal(123, justAddedSnp.ReferencePosition); Assert.Equal("chr1", justAddedSnp.Chromosome); Assert.Equal("A", justAddedSnp.ReferenceAllele); Assert.Equal("G", justAddedSnp.AlternateAllele); Assert.Equal(300, justAddedSnp.AlleleSupport); Assert.Equal(0, justAddedSnp.NumNoCalls); Assert.Equal(20, justAddedSnp.VariantQscore); Assert.Equal(500, justAddedSnp.TotalCoverage); Assert.Equal(50, justAddedSnp.ReferenceSupport); Assert.Equal(AlleleCategory.Snv, justAddedSnp.Type); Assert.Equal(20, justAddedSnp.NoiseLevelApplied); }
public static void Main(string[] args) { try { PlatformManager.Services.MaxSequenceSize = int.MaxValue; PlatformManager.Services.DefaultBufferSize = 4096; PlatformManager.Services.Is64BitProcessType = true; if (args.Length > 3) { Console.WriteLine("Too many arguments"); DisplayHelp(); } else if (args.Length < 2) { Console.WriteLine("Not enough arguments"); DisplayHelp(); } else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h") { DisplayHelp(); } else { string bam_name = args [0]; string out_dir = args [1]; string ref_name = args.Length > 2 ? args [2] : null; if (!File.Exists(bam_name)) { Console.WriteLine("Can't find file: " + bam_name); return; } if (ref_name != null && !File.Exists(ref_name)) { Console.WriteLine("Can't find file: " + ref_name); return; } if (Directory.Exists(out_dir)) { Console.WriteLine("The output directory already exists, please specify a new directory or delete the old one."); return; } Directory.CreateDirectory(out_dir); List <CCSReadMetricsOutputter> outputters = new List <CCSReadMetricsOutputter> () { new ZmwOutputFile(out_dir), new ZScoreOutputter(out_dir), new VariantOutputter(out_dir), new SNROutputFile(out_dir), new QVCalibration(out_dir) }; ISequenceParser reader; if (bam_name.EndsWith(".fastq", StringComparison.OrdinalIgnoreCase)) { reader = new FastQCCSReader(); } else { reader = new PacBioCCSBamReader(); } BWAPairwiseAligner bwa = null; bool callVariants = ref_name != null; if (callVariants) { bwa = new BWAPairwiseAligner(ref_name, false); } // Produce aligned reads with variants called in parallel. var reads = new BlockingCollection <Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> > >(); Task producer = Task.Factory.StartNew(() => { try { Parallel.ForEach(reader.Parse(bam_name), y => { var z = y as PacBioCCSRead; try { BWAPairwiseAlignment aln = null; List <Variant> variants = null; if (callVariants) { aln = bwa.AlignRead(z.Sequence) as BWAPairwiseAlignment; if (aln != null) { variants = VariantCaller.CallVariants(aln); variants.ForEach(p => { p.StartPosition += aln.AlignedSAMSequence.Pos; p.RefName = aln.Reference; }); } } var res = new Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> >(z, aln, variants); reads.Add(res); } catch (Exception thrown) { Console.WriteLine("CCS READ FAIL: " + z.Sequence.ID); Console.WriteLine(thrown.Message); } }); } catch (Exception thrown) { Console.WriteLine("Could not parse BAM file: " + thrown.Message); while (thrown.InnerException != null) { Console.WriteLine(thrown.InnerException.Message); thrown = thrown.InnerException; } } reads.CompleteAdding(); }); // Consume them into output files. foreach (var r in reads.GetConsumingEnumerable()) { foreach (var outputter in outputters) { outputter.ConsumeCCSRead(r.Item1, r.Item2, r.Item3); } } // throw any exceptions (this should be used after putting the consumer on a separate thread) producer.Wait(); // Close the files outputters.ForEach(z => z.Finish()); } } catch (DllNotFoundException thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results."); Console.WriteLine("A shared library was not found. To solve this, please add the folder" + " with the downloaded files libbwasharp and libMonoPosixHelper" + "to your environmental variables (LD_LIBRARY_PATH on Ubuntu, DYLD_LIBRARY_PATH on Mac OS X)."); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); } catch (Exception thrown) { Console.WriteLine("Error thrown when attempting to generate the CCS results"); Console.WriteLine("Error: " + thrown.Message); Console.WriteLine(thrown.StackTrace); while (thrown.InnerException != null) { Console.WriteLine("Inner Exception: " + thrown.InnerException.Message); thrown = thrown.InnerException; } } }