示例#1
0
        private void ExecuteTest(BamAlignment read, int minBaseCallQuality, List <VariantSite> vsFromVcf, Action <Dictionary <SubsequenceType, List <VariantSite> > > setCandidatesAssertions = null,
                                 Action <VariantSite[]> matchVariantsAssertions = null)
        {
            var readProcessor = new VeadFinder(new BamFilterParameters()
            {
                MinimumBaseCallQuality = minBaseCallQuality
            });
            int lastPos;
            var foundVariants
                = readProcessor.SetCandidateVariantsFoundInRead(minBaseCallQuality, read, out lastPos);

            //if (setCandidatesAssertions != null)
            //{
            setCandidatesAssertions(foundVariants);
            //}

            var matchedVariants
                = readProcessor.MatchReadVariantsWithVcfVariants(vsFromVcf, foundVariants, read.Position + 1, lastPos);

            //if (matchVariantsAssertions != null)
            //{
            matchVariantsAssertions(matchedVariants);
            //}

            readProcessor.FindVariantResults(vsFromVcf, read);
        }
示例#2
0
        /// <summary>
        /// Note, this test is deliberately stressing the algorithm over the usecase
        /// of a pathological MNV (mnvs do not really have prepended bases inside Pisces/Scylla/ect).
        /// The point is to demonstrate that the method still check the whole alt allele sequence
        /// is a match with the read seqeunce being queried.
        /// </summary>
        private static void CheckWeCanFindAnMNVInARead_pathologicalMNV()
        {
            var vcfSNP = new VariantSite();

            vcfSNP.VcfReferencePosition = 4;
            vcfSNP.VcfReferenceAllele   = "ATA";
            vcfSNP.VcfAlternateAllele   = "ACG";

            //a VS section mined from a read that indeed contains the MNV (exactly)

            //what we are looking for:

            //                  1 2 3 4 5 6 7 8 9
            // looking for ->   - - - A C G - - -
            // read:       ->   ? ? ? A C G ? ? ?


            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 4;
            vs1.VcfReferenceAllele   = "ATA";
            vs1.VcfAlternateAllele   = "ACG";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs1));

            //a VS mined from a longer read that also contains the MNV

            //what we are looking for:

            //                  1 2 3 4 5 6 7 8 9
            // looking for  ->  - - - A C G - - -
            // read:        ->  ? ? A A C G A ? ?

            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 3;
            vs2.VcfReferenceAllele   = "AATAA";
            vs2.VcfAlternateAllele   = "AACGA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs2));


            //a VS mined from a shorter read that does NOT prove the existence of the MNV
            //IN this case (to get a section this short to parse) we must have seen a whacky cigar string with only 1 match (ex, 4I1M8I)
            //or we are approaching the end of the read and have one base left to check, perhaps the other bases failed filters or got softclipped.


            //what we are looking for:

            //                  1 2 3 4 5 6 7 8 9
            // looking for  ->  - - - A C G - - -
            // read:        ->  ? ? ? ? C ? ? ? ?


            var vs3 = new VariantSite();

            vs3.VcfReferencePosition = 5;
            vs3.VcfReferenceAllele   = "T";
            vs3.VcfAlternateAllele   = "C";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs3));

            //a VS section mined from a read that indeed contain a ref site
            var vs4 = new VariantSite();

            vs4.VcfReferencePosition = 3;
            vs4.VcfReferenceAllele   = "AATAA";
            vs4.VcfAlternateAllele   = "AATAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundReferenceVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs4));

            //a VS section mined from a read that indeed contain a diff MNV site


            //what we are looking for:

            //                  1 2 3 4 5 6 7 8 9
            // looking for  ->  - - - A C G - - -
            // read:        ->  ? ? G G G G G ? ?


            var vs5 = new VariantSite();

            vs5.VcfReferencePosition = 3;
            vs5.VcfReferenceAllele   = "AATAA";
            vs5.VcfAlternateAllele   = "GGGGG";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs5));

            //a VS section mined from a read that indeed contain a diff MNV site
            var vs6 = new VariantSite();

            vs6.VcfReferencePosition = 3;
            vs6.VcfReferenceAllele   = "AATAA";
            vs6.VcfAlternateAllele   = "AACAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs6));


            //We are looking for ATA -> ACG , pos 4.
            //We will never find it in a sequence that starts on position 5 (as in vs7).
            //We either have found pos 4 ealier, when this method was called on a previous sequence (in which case we would exit before getting here)
            //~or~ all the bases at postion 4 got clipped or filtered off, which is why we are starting so late in the read.
            //( -> result should be that the bases in the test sequence for pos 4 must have FailedFilters and were not available to the query method)

            //what we are looking for:

            //                  1 2 3 4 5 6 7 8 9
            // looking for ->   - - - A C G - - -
            // read:       ->   ? ? ? ? C A A ? ?

            var vs7 = new VariantSite();

            vs7.VcfReferencePosition = 5;
            vs7.VcfReferenceAllele   = "TAA";
            vs7.VcfAlternateAllele   = "CAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs7));


            //We are looking for ATA -> ACG , pos 4.
            //We will never find it in a sequence that starts on position 5 (as in vs8).
            //We either have found pos 4 ealier, when this method was called on a previous sequence (in which case we would exit before getting here)
            //~or~ all the bases at postion 4 got clipped or filtered off, which is why we are starting so late in the read.
            //( -> result should be that the bases in the test sequence for pos 4 must have FailedFilters and were not available to the query method)


            //what we are looking for:

            //                  1 2 3 4 5 6 7 8 9
            // looking for ->   - - - A C G - - -
            // read:       ->   ? ? ? ? C G ? ? ?


            var vs8 = new VariantSite();

            vs8.VcfReferencePosition = 5;
            vs8.VcfReferenceAllele   = "TA";
            vs8.VcfAlternateAllele   = "CG";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs8));
        }
示例#3
0
        private static void CheckWeCanFindAnMNVInARead_healthyMNV()
        {
            var vcfSNP = new VariantSite();

            vcfSNP.VcfReferencePosition = 4;
            vcfSNP.VcfReferenceAllele   = "TA";
            vcfSNP.VcfAlternateAllele   = "CC";

            //a VS mined from a read that indeed contains the MNV
            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 2;
            vs1.VcfReferenceAllele   = "AATAA";
            vs1.VcfAlternateAllele   = "AACCAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs1));

            //a VS mined from a shorter read that also contains the MNV
            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 4;
            vs2.VcfReferenceAllele   = "TA";
            vs2.VcfAlternateAllele   = "CC";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs2));

            //a VS mined from a read that contains a different MNV
            var vs3 = new VariantSite();

            vs3.VcfReferencePosition = 2;
            vs3.VcfReferenceAllele   = "AATAAA";
            vs3.VcfAlternateAllele   = "AAGCAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs3));

            //a VS mined from a shorter read with a different MNV
            var vs4 = new VariantSite();

            vs4.VcfReferencePosition = 4;
            vs4.VcfReferenceAllele   = "TA";
            vs4.VcfAlternateAllele   = "GC";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs4));

            //a VS mined from a read that contains a ref
            var vs5 = new VariantSite();

            vs5.VcfReferencePosition = 2;
            vs5.VcfReferenceAllele   = "AATAA";
            vs5.VcfAlternateAllele   = "AATAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundReferenceVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs5));


            //a VS mined from a shorter read with a ref
            var vs6a = new VariantSite();

            vs6a.VcfReferencePosition = 4;
            vs6a.VcfReferenceAllele   = "TA";
            vs6a.VcfAlternateAllele   = "TA";
            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundReferenceVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs6a));

            //a VS mined from a shorter read with a ref
            var vs6 = new VariantSite();

            vs6.VcfReferencePosition = 4;
            vs6.VcfReferenceAllele   = "T";
            vs6.VcfAlternateAllele   = "T";

            //Here we dont claim we found the full reference sequence we are looking for. We run off the end of the read.
            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs6));

            //a VS mined from a read that contains a no-call / base that failed filters. Because of the "N".
            var vs7 = new VariantSite();

            vs7.VcfReferencePosition = 2;
            vs7.VcfReferenceAllele   = "AATAA";
            vs7.VcfAlternateAllele   = "AANAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs7));

            //a VS mined from a shorter read with a no-call / base that failed filters
            var vs8 = new VariantSite();

            vs8.VcfReferencePosition = 4;
            vs8.VcfReferenceAllele   = "TA";
            vs8.VcfAlternateAllele   = "NN";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs8));
        }
示例#4
0
        private static void CheckWeCanFindASnpInARead()
        {
            var vcfSNP = new VariantSite();

            vcfSNP.VcfReferencePosition = 4;
            vcfSNP.VcfReferenceAllele   = "T";
            vcfSNP.VcfAlternateAllele   = "C";

            //a VS mined from a read that indeed contains the SNP
            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 2;
            vs1.VcfReferenceAllele   = "AATAA";
            vs1.VcfAlternateAllele   = "AACAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs1));

            //a VS mined from a shorter read that also contains the SNP
            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 4;
            vs2.VcfReferenceAllele   = "T";
            vs2.VcfAlternateAllele   = "C";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs2));

            //a VS mined from a read that contains a different SNP
            var vs3 = new VariantSite();

            vs3.VcfReferencePosition = 2;
            vs3.VcfReferenceAllele   = "AATAA";
            vs3.VcfAlternateAllele   = "AAGAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs3));

            //a VS mined from a shorter read with a different SNP
            var vs4 = new VariantSite();

            vs4.VcfReferencePosition = 4;
            vs4.VcfReferenceAllele   = "T";
            vs4.VcfAlternateAllele   = "G";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs4));

            //a VS mined from a read that contains a ref
            var vs5 = new VariantSite();

            vs5.VcfReferencePosition = 2;
            vs5.VcfReferenceAllele   = "AATAA";
            vs5.VcfAlternateAllele   = "AATAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundReferenceVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs5));

            //a VS mined from a shorter read with a ref
            var vs6 = new VariantSite();

            vs6.VcfReferencePosition = 4;
            vs6.VcfReferenceAllele   = "T";
            vs6.VcfAlternateAllele   = "T";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundReferenceVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs6));


            //a VS mined from a read that contains a no-call / base that failed filters
            var vs7 = new VariantSite();

            vs7.VcfReferencePosition = 2;
            vs7.VcfReferenceAllele   = "AATAA";
            vs7.VcfAlternateAllele   = "AANAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs7));

            //a VS mined from a shorter read with a no-call / base that failed filters
            var vs8 = new VariantSite();

            vs8.VcfReferencePosition = 4;
            vs8.VcfReferenceAllele   = "T";
            vs8.VcfAlternateAllele   = "N";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs8));
        }
示例#5
0
        private static void CheckWeCanFindARefInARead()
        {
            //for this case "found this varaint" or "found reference variant" are interchangeable results.
            //This variant we are looking for *is* the reference.

            var vcfSNP = new VariantSite();

            vcfSNP.VcfReferencePosition = 4;
            vcfSNP.VcfReferenceAllele   = "T";
            vcfSNP.VcfAlternateAllele   = "T";

            //a VS mined from a read that indeed contains the ref
            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 2;
            vs1.VcfReferenceAllele   = "AATAA";
            vs1.VcfAlternateAllele   = "AATCAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs1));

            //a VS mined from a shorter read that also contains the ref
            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 4;
            vs2.VcfReferenceAllele   = "TA";
            vs2.VcfAlternateAllele   = "TC";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs2));

            //a VS mined from a read that contains a different SNP
            var vs3 = new VariantSite();

            vs3.VcfReferencePosition = 2;
            vs3.VcfReferenceAllele   = "AATAAA";
            vs3.VcfAlternateAllele   = "AAGCAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs3));

            //a VS mined from a shorter read with a different SNP
            var vs4 = new VariantSite();

            vs4.VcfReferencePosition = 4;
            vs4.VcfReferenceAllele   = "TA";
            vs4.VcfAlternateAllele   = "GC";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundDifferentVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs4));

            //a VS mined from a read that contains a ref
            var vs5 = new VariantSite();

            vs5.VcfReferencePosition = 2;
            vs5.VcfReferenceAllele   = "AATAA";
            vs5.VcfAlternateAllele   = "AATAA";
            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs5));


            //a VS mined from a shorter read with a ref
            var vs6a = new VariantSite();

            vs6a.VcfReferencePosition = 4;
            vs6a.VcfReferenceAllele   = "TA";
            vs6a.VcfAlternateAllele   = "TA";
            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs6a));

            //a VS mined from a shorter read with a ref
            var vs6 = new VariantSite();

            vs6.VcfReferencePosition = 4;
            vs6.VcfReferenceAllele   = "T";
            vs6.VcfAlternateAllele   = "T";
            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.FoundThisVariant, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs6));

            //a VS mined from a read that contains a no-call / base that failed filters. Because of the "N".
            var vs7 = new VariantSite();

            vs7.VcfReferencePosition = 2;
            vs7.VcfReferenceAllele   = "AATAA";
            vs7.VcfAlternateAllele   = "AANAA";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs7));

            //a VS mined from a shorter read with a no-call / base that failed filters
            var vs8 = new VariantSite();

            vs8.VcfReferencePosition = 4;
            vs8.VcfReferenceAllele   = "TA";
            vs8.VcfAlternateAllele   = "NN";

            Assert.Equal(VeadFinder.StateOfPhasingSiteInRead.HaveInsufficientData, VeadFinder.CheckVariantSequenceForMatchInVariantSiteFromRead(vcfSNP, vs8));
        }