Exemple #1
0
        public static void TestExceptionThrownForUnclippedAlignment()
        {
            var refseq   = "ACAATATA";
            var queryseq = "ACAATAT-";

            var r   = new Sequence(DnaAlphabet.Instance, refseq);
            var q   = new Sequence(DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment(r, q);
            var pas = new PairwiseAlignedSequence();

            pas.FirstSequence  = r;
            pas.SecondSequence = q;
            aln.Add(pas);
            Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true));

            refseq   = "AAACAATATA";
            queryseq = "AA-CAATATA";

            r   = new Sequence(DnaAlphabet.Instance, refseq);
            q   = new Sequence(DnaAlphabet.Instance, queryseq);
            aln = new PairwiseSequenceAlignment(r, q);
            pas = new PairwiseAlignedSequence();
            pas.FirstSequence  = r;
            pas.SecondSequence = q;
            aln.Add(pas);
            Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true));
        }
Exemple #2
0
        public void CallThroughAnEmptyNbhd()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref)

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(0, acceptedMNVs.Count);
            Assert.Equal(2, acceptedRefs.Count);

            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype);
            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype);
            Assert.Equal(123, acceptedRefs[123].ReferencePosition);
            Assert.Equal(124, acceptedRefs[124].ReferencePosition);
        }
Exemple #3
0
        public static void Test1BPDeletionCall()
        {
            string seq1seq = "ATACCCCTT";
            string seq2seq = "ATA-CCCTT".Replace("-", String.Empty);

            int[] seq2qual = new int[] { 30, 30, 30, 2, 30, 30, 30, 30 };
            var   refseq   = new Sequence(AmbiguousDnaAlphabet.Instance, seq1seq, false);
            var   query    = new Sequence(AmbiguousDnaAlphabet.Instance, seq2seq, false);
            NeedlemanWunschAligner aligner = new NeedlemanWunschAligner();
            var aln = aligner.Align(refseq, query).First();

            // Need to add in the QV Values.
            ConvertAlignedSequenceToQualSeq(aln, seq2qual);
            var variants = VariantCaller.CallVariants(aln);

            Assert.AreEqual(variants.Count, 1);
            var variant = variants.First();

            Assert.AreEqual(2, variant.QV);
            Assert.AreEqual(2, variant.StartPosition);
            Assert.AreEqual(VariantType.INDEL, variant.Type);
            var vi = variant as IndelVariant;

            Assert.AreEqual("C", vi.InsertedOrDeletedBases);
            Assert.AreEqual('C', vi.HomopolymerBase);
            Assert.AreEqual(4, vi.HomopolymerLengthInReference);
            Assert.AreEqual(true, vi.InHomopolymer);
            Assert.AreEqual(vi.InsertionOrDeletion, IndelType.Deletion);
        }
Exemple #4
0
        public static void TestTrickyQVInversions()
        {
            // This will be hard because normally flip the QV value for a homopolymer, but in this case we won't.
            // Note the whole notion of flipping is poorly defined.
            string seq1seq = "ATTGC";
            string seq2seq = "ATAGC";

            int[] seq2qual = new int[] { 30, 30, 2, 30, 30 };
            var   refseq   = new Sequence(DnaAlphabet.Instance, seq1seq);
            var   query    = new Sequence(DnaAlphabet.Instance, seq2seq);

            var s1rc = refseq.GetReverseComplementedSequence();
            var s2rc = query.GetReverseComplementedSequence();

            NeedlemanWunschAligner aligner = new NeedlemanWunschAligner();
            var aln = aligner.Align(s1rc, s2rc).First();

            VariantCallTests.ConvertAlignedSequenceToQualSeq(aln, seq2qual.Reverse().ToArray());
            aln.PairwiseAlignedSequences [0].Sequences [1].MarkAsReverseComplement();
            var variants = VariantCaller.CallVariants(aln);

            Assert.AreEqual(1, variants.Count);
            var variant = variants.First();

            Assert.AreEqual(VariantType.SNP, variant.Type);
            Assert.AreEqual(2, variant.QV);

            var vs = variant as SNPVariant;

            Assert.AreEqual('T', vs.AltBP);
            Assert.AreEqual('A', vs.RefBP);
        }
Exemple #5
0
        public static void TestSNPCallAtStart()
        {
            string seq1seq = "CTCCCCCTT";
            string seq2seq = "TTCCCCCTT";

            int[] seq2qual = new int[] { 10, 30, 30, 30, 5, 3, 30, 30, 10 };
            var   refseq   = new Sequence(DnaAlphabet.Instance, seq1seq);
            var   query    = new Sequence(DnaAlphabet.Instance, seq2seq);

            NeedlemanWunschAligner aligner = new NeedlemanWunschAligner();
            var aln = aligner.Align(refseq, query).First();

            ConvertAlignedSequenceToQualSeq(aln, seq2qual);
            var variants = VariantCaller.CallVariants(aln);

            Assert.AreEqual(variants.Count, 1);
            var variant = variants.First();

            Assert.AreEqual(10, variant.QV);
            Assert.AreEqual(0, variant.StartPosition);
            Assert.AreEqual(variant.Type, VariantType.SNP);
            var vi = variant as SNPVariant;

            Assert.AreEqual(1, vi.Length);
            Assert.AreEqual('T', vi.AltBP);
            Assert.AreEqual('C', vi.RefBP);
            Assert.AreEqual(VariantType.SNP, vi.Type);
            Assert.AreEqual(true, vi.AtEndOfAlignment);
        }
Exemple #6
0
        public void CheckAddingFilters()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var variantCallingParameters = new VariantCallingParameters();

            //Set up filters so calls are sure to trigger them.
            variantCallingParameters.LowDepthFilter             = 2000;
            variantCallingParameters.MinimumFrequencyFilter     = 0.80F;
            variantCallingParameters.MinimumVariantQScoreFilter = 300;


            var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters());


            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore));

            Assert.Equal(2, acceptedRefs.Count);

            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore));
            //note reference calls dont win the "LowVariantFrequency" flag.
        }
Exemple #7
0
        public static void TestLeftAlignmentStep()
        {
            var refseq   = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT";
            var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT";

            var r   = new Sequence(DnaAlphabet.Instance, refseq);
            var q   = new Sequence(DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment(r, q);
            var pas = new PairwiseAlignedSequence();

            pas.FirstSequence  = r;
            pas.SecondSequence = q;
            aln.Add(pas);
            var tpl = VariantCaller.LeftAlignIndelsAndCallVariants(aln, true);

            // Check the left alignment
            aln = tpl.Item1 as PairwiseSequenceAlignment;
            var lar         = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString();
            var laq         = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString();
            var exprefseq   = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT";
            var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT";

            Assert.AreEqual(exprefseq, lar);
            Assert.AreEqual(expqueryseq, laq);

            // And it's hard, so we might as well check the variants
            var variants = tpl.Item2;

            Assert.AreEqual(3, variants.Count);
            string[]    bases   = new string[] { "A", "GCGC", "TA" };
            char[]      hpbases = new char[] { 'A', 'G', 'T' };
            bool[]      inHp    = new bool[] { true, false, false };
            int[]       lengths = new int[] { 1, 4, 2 };
            int[]       starts  = new int[] { 4, 8, 24 };
            IndelType[] types   = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion };
            for (int i = 0; i < 3; i++)
            {
                Assert.AreEqual(VariantType.INDEL, variants [i].Type);
                var vi = variants [i] as IndelVariant;
                Assert.AreEqual(hpbases[i], vi.HomopolymerBase);
                Assert.AreEqual(starts [i], vi.StartPosition);
                Assert.AreEqual(lengths [i], vi.Length);
                Assert.AreEqual(bases [i], vi.InsertedOrDeletedBases);
                Assert.AreEqual(inHp [i], vi.InHomopolymer);
                Assert.AreEqual(types [i], vi.InsertionOrDeletion);
            }
        }
Exemple #8
0
        public void TestGetGtLogLikelihoodScore()
        {
            var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
            var copyNumberModel        = copyNumberModelFactory.CreateModel(numCnStates: 5, maxCoverage: 200,
                                                                            meanCoverage: 100, diploidAlleleMeanCounts: 50.0);
            var simulatedCn   = 3;
            var gtModelCounts = PedigreeInfo.GeneratePhasedGenotype(numCnStates: 5).Where(gt => gt.TotalCopyNumber == simulatedCn)
                                .Select(gt => gt.PhasedGenotype).ToList();
            var gtObservedCounts = new Balleles(new List <Ballele>
            {
                new Ballele(1, 1, 73),
                new Ballele(100, 2, 74),
                new Ballele(200, 1, 76),
                new Ballele(300, 0, 74),
                new Ballele(400, 1, 75),
                new Ballele(500, 2, 74)
            });
            var gt0_3           = new PhasedGenotype(3, 0);
            int?selectedGtState = null;
            // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3]
            double logLikelihoodScore =
                VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState,
                                                      copyNumberModel);

            Assert.Equal(gtModelCounts.IndexOf(gt0_3), selectedGtState);

            gtObservedCounts = new Balleles(new List <Ballele>
            {
                new Ballele(1, 23, 53),
                new Ballele(100, 22, 54),
                new Ballele(200, 25, 46),
                new Ballele(300, 24, 50),
                new Ballele(400, 26, 51),
                new Ballele(500, 24, 51)
            });
            var gt1_2 = new PhasedGenotype(2, 1);

            selectedGtState = null;
            // variant caller only calls MCC, only upper triangle of CN genotypes is selected - i.e. CNa=3,CNb=0 from [CNa=3,CNb=0,CNa=0,CNb=3]
            logLikelihoodScore =
                VariantCaller.GetGtLogLikelihoodScore(gtObservedCounts, gtModelCounts, ref selectedGtState,
                                                      copyNumberModel);
            Assert.Equal(gtModelCounts.IndexOf(gt1_2), selectedGtState);
        }
Exemple #9
0
        public static void TestReverseComplement1BPIndelCall()
        {
            string seq1seq = "ATACCCCTTGCGC";
            string seq2seq = "ATA-CCCTTGCGC".Replace("-", String.Empty);

            int[] seq2qual = new int[] { 30, 30, 30, 2, 30, 30, 30, 30, 30, 30, 30, 30 };
            var   refseq   = new Sequence(DnaAlphabet.Instance, seq1seq);
            var   query    = new Sequence(DnaAlphabet.Instance, seq2seq);

            var s1rc = refseq.GetReverseComplementedSequence();
            var s2rc = query.GetReverseComplementedSequence();

            NeedlemanWunschAligner aligner = new NeedlemanWunschAligner();
            var aln = aligner.Align(s1rc, s2rc).First();

            VariantCallTests.ConvertAlignedSequenceToQualSeq(aln, seq2qual.Reverse().ToArray());
            aln.PairwiseAlignedSequences [0].Sequences [1].MarkAsReverseComplement();
            var variants = VariantCaller.CallVariants(aln);

            Assert.AreEqual(variants.Count, 1);
            var variant = variants.First();

            Assert.AreEqual(2, variant.QV);
            Assert.AreEqual(5, variant.StartPosition);
            Assert.AreEqual(VariantType.INDEL, variant.Type);
            var vi = variant as IndelVariant;

            Assert.AreEqual(IndelType.Deletion, vi.InsertionOrDeletion);
            Assert.AreEqual('G', vi.HomopolymerBase);
            Assert.AreEqual(1, vi.Length);
            Assert.AreEqual(4, vi.HomopolymerLengthInReference);
            Assert.AreEqual(true, vi.InHomopolymer);
            Assert.AreEqual("G", vi.InsertedOrDeletedBases);
            Assert.AreEqual(false, vi.AtEndOfAlignment);
            Assert.AreEqual(6, vi.EndPosition);
        }
Exemple #10
0
        public void VarCallsBecomeRefsAndNulls()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var vcParams = new VariantCallingParameters();

            vcParams.Validate();
            var caller = new VariantCaller(vcParams, new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);


            var vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }, { "DP", "1000" }, { "AD", "844" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);

            // If one has been sucked up and there are refs remaining, we should output it as a ref.
            var suckedUpRefRecord100 = new SuckedUpRefRecord()
            {
                Counts = 100, AlleleThatClaimedIt = new CalledAllele()
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord100 }
            };


            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);


            // If one has been sucked up all the way
            // we should output it as a null.
            var suckedUpRefRecord1000 = new SuckedUpRefRecord()
            {
                Counts = 1000, AlleleThatClaimedIt = new CalledAllele()
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord1000 }
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }, { "DP", "1000" }, { "AD", "0" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
        }
Exemple #11
0
        public void CallAVariantInANewLocation()
        {
            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);
            var vs3 = new VariantSite(originalVcfVariant3);
            var vs4 = new VariantSite(originalVcfVariant4);

            var vcParams = new VariantCallingParameters();

            vcParams.Validate();
            var caller = new VariantCaller(vcParams, new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, "");

            nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not going to get used for phasing. Sps it is a variant that failed filters.
            nbhd.SetRangeOfInterest();

            //now stage one candidate MNV:
            var newMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 129,
                ReferenceAllele   = "A",
                AlternateAllele   = "TT",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            };


            nbhd.AddAcceptedPhasedVariant(newMNV);
            var suckedUpRefRecord1000 = new SuckedUpRefRecord()
            {
                Counts = 1000, AlleleThatClaimedIt = new CalledAllele()
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord1000 }
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;


            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[129].Count);

            Assert.Equal(3, acceptedRefs.Count);

            VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
            VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]);
        }
        //this unit test was made after we found bug ScyllaLoosingRefCalls_PICS-723.
        //We had a 1/. GT reported when it should be 1/0.
        //The reason for this is that all the refs (the "0"s) got incorrectly sucked up.
        //Ie, MNV ACG-> AG claimed 50 refs, so we (incorrectly) subtracted 50 refs from it.
        //The bug is that the ref counts got subtractedfrom the exact same mnv that claimed them.
        // This should never happen, and was not the intent of the alg.
        //
        //The affected mehtod is: CreateMnvsFromClusters in VcfNbhd
        public void CreateMnvsFromClusters_TakeUpRefCount()
        {
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "ACG", "AT", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "TTTTTT", 1000, 200);
            var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 123, "AC", "TT", 1000, 100);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");


            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 200
            });


            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Mnv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "ACG",
                AlternateAllele   = "AT",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 300
            });

            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Insertion)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "AAAAA",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 250
            });


            //default behavior, nothing gets sucked up
            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };
            vs1.VcfReferencePosition = 123;
            var vead        = new Vead("dummy", new VariantSite[] { vs1 });
            var vg          = new VeadGroup(vead);
            var fakeCluster = new Cluster("test", new List <VeadGroup>()
            {
                vg
            });

            fakeCluster.ResetConsensus();
            nbhd.CreateMnvsFromClusters(new List <Cluster> {
                fakeCluster
            },
                                        20, 100);
            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(2, acceptedMNVs.Count);
            Assert.Equal(3, acceptedMNVs[123].Count);
            Assert.Equal(1, acceptedRefs.Count);

            //check the ref counts on all the MNVs. Nothing should be sucked up.
            Assert.Equal(1000 - 200, acceptedMNVs[123][0].ReferenceSupport);  //total depth - allele suport. overly simple for now)
            Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport);  //total depth - allele suport. overly simple for now)
            Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport);  //total depth - allele suport. overly simple for now)

            // now variant 0 will suck up 100 ref calls:
            var suckedUpRefRecord100 = new SuckedUpRefRecord()
            {
                Counts = 100, AlleleThatClaimedIt = nbhd.CandidateVariants[0]
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 123, suckedUpRefRecord100 }
            };
            nbhd.CreateMnvsFromClusters(new List <Cluster> {
                fakeCluster
            },
                                        20, 100);

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;


            //check the ref counts on all the MNVs. refs should only be taken up by the first one
            Assert.Equal(1000 - 200, acceptedMNVs[123][0].ReferenceSupport);  //total depth - allele suport. overly simple for now)

            //old result - has bug
            //Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport);  //total depth - allele suport - sucked up ref)
            //Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport);  //total depth - allele suport - sucked up ref)

            //new result, fixed
            Assert.Equal(1000 - 300 - 100, acceptedMNVs[123][1].ReferenceSupport);  //total depth - allele suport - sucked up ref)
            Assert.Equal(1000 - 250 - 100, acceptedMNVs[123][2].ReferenceSupport);  //total depth - allele suport - sucked up ref)
        }
Exemple #13
0
        //this unit test was made after we found bug ScyllaShouldMergeClusters_PICS-1122.
        //We had an output vcf with the following lines
        //chr11	64577365	.	C	.	100	PASS	DP=1429	GT:GQ:AD:DP:VF:NL:SB:NC:US	0/0:1:1429:1429:0.00000:65:-100.0000:0.0592:0,0,0,0,0,0,0,0,0,0,0,0
        //chr11	64577366	.	A T	78	PASS DP = 559  GT:GQ:AD:DP:VF:NL:SB:NC:US	0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0
        //chr11	64577366	.	A T	78	PASS DP = 559  GT:GQ:AD:DP:VF:NL:SB:NC:US	0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0
        //chr11	64577367	.	G.   100	PASS DP = 1411 GT:GQ:AD:DP:VF:NL:SB:NC:US	0/0:1:1411:1411:0.00000:65:-100.0000:0.0741:0,0,0,0,0,0,0,0,0,0,0,0

        //The affected methods are "AddAcceptedPhasedVariant" and "AddRejectedPhasedVariant"
        //the new fix will merge the added variant, if its the same as a varaint that already exists
        public void AddAcceptedAndRejectedPhasedVariantTests()
        {
            //for this test we take three SNPs, two of which can be combined and 1 that cannot, and
            //we take three ref calls, two of which can be combined and 1 that cannot.
            //So 6 diff alleles go in, but only 4 should come out in the lists.

            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 500, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 200);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            //variants:

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 200,
                ReferenceSupport  = 350,
                NoiseLevelApplied = 20
            });


            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "G",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "AG",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            //refs:

            callableNeighborhood.AddRejectedPhasedVariant(
                new CalledAllele(AlleleCategory.Reference)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = ".",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 200,
                ReferenceSupport  = 350,
                NoiseLevelApplied = 20
            });


            callableNeighborhood.AddRejectedPhasedVariant(
                new CalledAllele(AlleleCategory.Reference)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = ".",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            callableNeighborhood.AddRejectedPhasedVariant(
                new CalledAllele(AlleleCategory.Reference)
            {
                Chromosome        = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                AlternateAllele   = ".",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });


            //check results.

            //check we got the right number of results
            Assert.Equal(3, callableNeighborhood.CandidateVariants.Count);
            Assert.Equal(2, callableNeighborhood.Refs.Count);

            //check the snps did what we expected

            var combinedSnp = callableNeighborhood.CandidateVariants[0];

            Assert.Equal(123, combinedSnp.ReferencePosition);
            Assert.Equal("chr1", combinedSnp.Chromosome);
            Assert.Equal("A", combinedSnp.ReferenceAllele);
            Assert.Equal("T", combinedSnp.AlternateAllele);
            Assert.Equal(200 + 300, combinedSnp.AlleleSupport);
            Assert.Equal(0, combinedSnp.NumNoCalls);
            Assert.Equal(100, combinedSnp.VariantQscore);
            Assert.Equal((1000 + 500) / 2, combinedSnp.TotalCoverage);
            Assert.Equal((350 + 50) / 2, combinedSnp.ReferenceSupport);
            Assert.Equal(AlleleCategory.Snv, combinedSnp.Type);
            Assert.Equal(20, combinedSnp.NoiseLevelApplied);

            //these values should not have changed
            var justAddedSnp = callableNeighborhood.CandidateVariants[1];

            Assert.Equal(123, justAddedSnp.ReferencePosition);
            Assert.Equal("chr1", justAddedSnp.Chromosome);
            Assert.Equal("A", justAddedSnp.ReferenceAllele);
            Assert.Equal("G", justAddedSnp.AlternateAllele);
            Assert.Equal(300, justAddedSnp.AlleleSupport);
            Assert.Equal(0, justAddedSnp.NumNoCalls);
            Assert.Equal(20, justAddedSnp.VariantQscore);
            Assert.Equal(500, justAddedSnp.TotalCoverage);
            Assert.Equal(50, justAddedSnp.ReferenceSupport);
            Assert.Equal(AlleleCategory.Snv, justAddedSnp.Type);
            Assert.Equal(20, justAddedSnp.NoiseLevelApplied);
        }
Exemple #14
0
        public static void Main(string[] args)
        {
            try {
                PlatformManager.Services.MaxSequenceSize    = int.MaxValue;
                PlatformManager.Services.DefaultBufferSize  = 4096;
                PlatformManager.Services.Is64BitProcessType = true;

                if (args.Length > 3)
                {
                    Console.WriteLine("Too many arguments");
                    DisplayHelp();
                }
                else if (args.Length < 2)
                {
                    Console.WriteLine("Not enough arguments");
                    DisplayHelp();
                }
                else if (args [0] == "h" || args [0] == "help" || args [0] == "?" || args [0] == "-h")
                {
                    DisplayHelp();
                }
                else
                {
                    string bam_name = args [0];
                    string out_dir  = args [1];
                    string ref_name = args.Length > 2 ? args [2] : null;
                    if (!File.Exists(bam_name))
                    {
                        Console.WriteLine("Can't find file: " + bam_name);
                        return;
                    }
                    if (ref_name != null && !File.Exists(ref_name))
                    {
                        Console.WriteLine("Can't find file: " + ref_name);
                        return;
                    }
                    if (Directory.Exists(out_dir))
                    {
                        Console.WriteLine("The output directory already exists, please specify a new directory or delete the old one.");
                        return;
                    }

                    Directory.CreateDirectory(out_dir);

                    List <CCSReadMetricsOutputter> outputters = new List <CCSReadMetricsOutputter> ()
                    {
                        new ZmwOutputFile(out_dir),
                        new ZScoreOutputter(out_dir),
                        new VariantOutputter(out_dir),
                        new SNROutputFile(out_dir),
                        new QVCalibration(out_dir)
                    };

                    ISequenceParser reader;
                    if (bam_name.EndsWith(".fastq", StringComparison.OrdinalIgnoreCase))
                    {
                        reader = new FastQCCSReader();
                    }
                    else
                    {
                        reader = new PacBioCCSBamReader();
                    }
                    BWAPairwiseAligner bwa = null;
                    bool callVariants      = ref_name != null;
                    if (callVariants)
                    {
                        bwa = new BWAPairwiseAligner(ref_name, false);
                    }

                    // Produce aligned reads with variants called in parallel.
                    var  reads    = new BlockingCollection <Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> > >();
                    Task producer = Task.Factory.StartNew(() =>
                    {
                        try
                        {
                            Parallel.ForEach(reader.Parse(bam_name), y => {
                                var z = y as PacBioCCSRead;
                                try {
                                    BWAPairwiseAlignment aln = null;
                                    List <Variant> variants  = null;
                                    if (callVariants)
                                    {
                                        aln = bwa.AlignRead(z.Sequence) as BWAPairwiseAlignment;
                                        if (aln != null)
                                        {
                                            variants = VariantCaller.CallVariants(aln);
                                            variants.ForEach(p => {
                                                p.StartPosition += aln.AlignedSAMSequence.Pos;
                                                p.RefName        = aln.Reference;
                                            });
                                        }
                                    }
                                    var res = new Tuple <PacBioCCSRead, BWAPairwiseAlignment, List <Variant> >(z, aln, variants);
                                    reads.Add(res);
                                }
                                catch (Exception thrown) {
                                    Console.WriteLine("CCS READ FAIL: " + z.Sequence.ID);
                                    Console.WriteLine(thrown.Message);
                                }
                            });
                        } catch (Exception thrown) {
                            Console.WriteLine("Could not parse BAM file: " + thrown.Message);
                            while (thrown.InnerException != null)
                            {
                                Console.WriteLine(thrown.InnerException.Message);
                                thrown = thrown.InnerException;
                            }
                        }
                        reads.CompleteAdding();
                    });


                    // Consume them into output files.
                    foreach (var r in reads.GetConsumingEnumerable())
                    {
                        foreach (var outputter in outputters)
                        {
                            outputter.ConsumeCCSRead(r.Item1, r.Item2, r.Item3);
                        }
                    }

                    // throw any exceptions (this should be used after putting the consumer on a separate thread)
                    producer.Wait();

                    // Close the files
                    outputters.ForEach(z => z.Finish());
                }
            }
            catch (DllNotFoundException thrown) {
                Console.WriteLine("Error thrown when attempting to generate the CCS results.");
                Console.WriteLine("A shared library was not found.  To solve this, please add the folder" +
                                  " with the downloaded files libbwasharp and libMonoPosixHelper" +
                                  "to your environmental variables (LD_LIBRARY_PATH on Ubuntu, DYLD_LIBRARY_PATH on Mac OS X).");
                Console.WriteLine("Error: " + thrown.Message);
                Console.WriteLine(thrown.StackTrace);
            }
            catch (Exception thrown) {
                Console.WriteLine("Error thrown when attempting to generate the CCS results");
                Console.WriteLine("Error: " + thrown.Message);
                Console.WriteLine(thrown.StackTrace);
                while (thrown.InnerException != null)
                {
                    Console.WriteLine("Inner Exception: " + thrown.InnerException.Message);
                    thrown = thrown.InnerException;
                }
            }
        }