예제 #1
0
        private void ProcessNewNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, string referenceStringBetweenVariants)
        {
            var newNeighborhood = new VcfNeighborhood(_variantCallingParams, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

            _neighborhoods.Add(newNeighborhood);
        }
예제 #2
0
        public void CheckOverlappingMNVs()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AGG"; //5,6,7
            clusterVariantSites[0].VcfAlternateAllele = "CCT";


            clusterVariantSites[1].VcfReferenceAllele = "GGA"; //7,8,9
            clusterVariantSites[1].VcfAlternateAllele = "TTT";

            clusterVariantSites[2].VcfReferenceAllele = "A";
            clusterVariantSites[2].VcfAlternateAllele = "T";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("AGGGA", allele.ReferenceAllele);
            Assert.Equal("CCTTT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
예제 #3
0
        private static string FillGapWithReferenceData(string reference,
                                                       VariantSite variantSite, IEnumerable <int> suckedUpReferenceCalls)
        {
            var gapFiller = "";

            foreach (var refPosition in suckedUpReferenceCalls)
            {
                var indexIntoRef = refPosition - variantSite.VcfReferencePosition;

                if (reference.Length == 0)
                {
                    gapFiller += "R";
                }
                else if ((indexIntoRef >= 0) && (indexIntoRef < reference.Length))
                {
                    gapFiller += reference[indexIntoRef];
                }
                else
                {
                    Logger.WriteToLog("Reference issue:");
                    Logger.WriteToLog("Reference:" + reference);
                    Logger.WriteToLog("Index:" + indexIntoRef);
                    Logger.WriteToLog("Start of nbhd:" + variantSite.VcfReferencePosition);
                    gapFiller += "R";
                }
            }
            return(gapFiller);
        }
예제 #4
0
        public void GetWeightedProbOfAGivenB()
        {
            var variantA             = new VariantSite(1);
            var variantB             = new VariantSite(2);
            var variantB2            = new VariantSite(3);
            var variantBOutsideGroup = new VariantSite(4);

            var phasingResult = new VariantPhasingResult(variantA, new List <VariantSite> {
                variantB, variantB2
            }, 100);

            // Should return 0 if nothing has been added
            Assert.Equal(0, phasingResult.GetWeightedProbOfAGivenB(variantB));
            Assert.Equal(0, phasingResult.GetWeightedProbOfAGivenB(variantB2));

            // Should return 0 if there is no support for AandB
            phasingResult.AddSupportForB(variantB, 20);
            Assert.Equal(0, phasingResult.GetWeightedProbOfAGivenB(variantB));

            // Should return 0 if there is no support for B alone
            phasingResult.AddSupportForAandB(variantB2, 20);
            Assert.Equal(0, phasingResult.GetWeightedProbOfAGivenB(variantB2));

            // Should calculate probability of B as Support(B)/TotalClusters and probability of AandB as Support(AandB)/TotalClusters
            // And then divide Prob(AandB)/Prob(B)
            phasingResult.AddSupportForAandB(variantB, 10);
            Assert.True(ApproximatelyEqual(0.5, phasingResult.GetWeightedProbOfAGivenB(variantB)));

            phasingResult.AddSupportForB(variantB2, 50);
            Assert.True(ApproximatelyEqual(0.4, phasingResult.GetWeightedProbOfAGivenB(variantB2)));

            // Should throw exception for variant not tracked
            Assert.Throws <Exception>(() => phasingResult.GetWeightedProbOfAGivenB(variantBOutsideGroup));
        }
예제 #5
0
        public void CallThroughAnEmptyNbhd()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref)

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(0, acceptedMNVs.Count);
            Assert.Equal(2, acceptedRefs.Count);

            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype);
            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype);
            Assert.Equal(123, acceptedRefs[123].ReferencePosition);
            Assert.Equal(124, acceptedRefs[124].ReferencePosition);
        }
예제 #6
0
        public void CheckPrecedingAndTrailingBasesGetRemoved()
        {
            //anchored

            int anchorPosition = -1;

            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "TCTCAAAAAACGT";
            clusterVariantSites[0].VcfAlternateAllele = "TCGTACGT";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, anchorPosition);

            Assert.Equal("TCAAAAA", allele.ReferenceAllele);
            Assert.Equal("GT", allele.AlternateAllele);
            Assert.Equal(28608285 + 2, allele.ReferencePosition);
        }
예제 #7
0
        public void CheckDeletionsWithAnchoring()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 0, 0 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AGAAGTACTCATTATCTGT";
            clusterVariantSites[0].VcfAlternateAllele = "A";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(1, refsToRemove.Count);

            Assert.Equal("AGAAGTACTCATTATCTGT", allele.ReferenceAllele);
            Assert.Equal("A", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);


            neighborhoodDepthAtSites   = new int[] { 100, 200, 100, 200 };
            neighborhoodNoCallsAtSites = new int[] { 0, 0, 0, 0 };
            clusterCountsAtSites       = new int[] { 90, 190, 10, 20 };
            clusterVariantSites        = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287),
                new VariantSite(28608288), new VariantSite(28608291)
            };


            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "A";

            clusterVariantSites[1].VcfReferenceAllele = "AAG";
            clusterVariantSites[1].VcfAlternateAllele = "A";

            clusterVariantSites[2].VcfReferenceAllele = "A";
            clusterVariantSites[2].VcfAlternateAllele = "A";

            clusterVariantSites[3].VcfReferenceAllele = "ACTCAT";
            clusterVariantSites[3].VcfAlternateAllele = "A";

            // referenceSequence = "AGA[AG]TA[CTCAT]TATCTGAGGAGCCGGTCACCTGTACCA";
            // altSequence = "AGA[XX]TA[XXXXX]TATCTGAGGAGCCGGTCACCTGTACCA";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100, 28608285);

            Assert.Equal(5, refsToRemove.Count);

            Assert.Equal("AGAAGTACTCAT", allele.ReferenceAllele);
            Assert.Equal("AGATA", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
예제 #8
0
        private static VariantSite SetVariantMatch(VariantSite vcfVariant)
        {
            var variantMatch = vcfVariant.DeepCopy();

            variantMatch.VcfReferenceAllele = vcfVariant.VcfReferenceAllele;
            variantMatch.VcfAlternateAllele = vcfVariant.VcfAlternateAllele;
            return(variantMatch);
        }
예제 #9
0
        private void AddNewNeighborhoodToBatch(VariantSite lastVariantSite, VariantSite currentVariantSite, int numNbhdsSoFar)
        {
            int numNbhdInBatchSoFar = _nextBatchOfVcfNeighborhoods.Count;

            var newNeighborhood = new VcfNeighborhood(numNbhdsSoFar + numNbhdInBatchSoFar, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite);

            _nextBatchOfVcfNeighborhoods.Add(newNeighborhood);
        }
예제 #10
0
        private void MakeAHangingNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, int numNbhdsSoFar)
        {
            //buffer this for our next call to "GetBatchOfNeighborhoods" .

            var newNeighborhood = new VcfNeighborhood(numNbhdsSoFar + _maxNumNbhdsInBatch, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite);

            _unfinshedNeighborhoods.Add(newNeighborhood);
        }
예제 #11
0
        public void CheckSNVs()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites   = new int[] { 100, 200 };
            var neighborhoodNoCallsAtSites = new int[] { 50, 100 };
            var clusterCountsAtSites       = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("C", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
            Assert.Equal(100, allele.TotalCoverage);
            Assert.Equal(50, allele.NumNoCalls);
            Assert.Equal((1f / 3f), allele.FractionNoCalls);

            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            clusterVariantSites[1].VcfReferenceAllele = "G";
            clusterVariantSites[1].VcfAlternateAllele = "T";


            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("G", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(28608287, allele.ReferencePosition);

            clusterVariantSites[0].VcfReferenceAllele = "A";
            clusterVariantSites[0].VcfAlternateAllele = "C";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, neighborhoodNoCallsAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(1, refsToRemove.Count);
            Assert.Equal("AGG", allele.ReferenceAllele);
            Assert.Equal("CGT", allele.AlternateAllele);
            Assert.Equal(28608285, allele.ReferencePosition);
        }
예제 #12
0
        public IEnumerable <VcfNeighborhood> GetNeighborhoods()
        {
            var indexInOrignalVcf = -1;
            var referenceStringBetweenVariants = "";

            var lastVariantSite = new VariantSite(0)
            {
                ReferenceName      = "",
                VcfReferenceAllele = "",
                VcfAlternateAllele = "",
            };

            var tempRawVcfVariants = _vcfVariantSource.GetVariants();
            var rawVcfVariants     = Extensions.Convert(tempRawVcfVariants);


            foreach (var rawVcfVariant in rawVcfVariants)
            {
                indexInOrignalVcf++;

                var currentVariantSite = new VariantSite(rawVcfVariant);
                var refBase            = currentVariantSite.VcfReferenceAllele.Substring(0, 1);

                //append the next base, unless we have a repeated variant.
                if (currentVariantSite.VcfReferencePosition != lastVariantSite.VcfReferencePosition)
                {
                    referenceStringBetweenVariants += refBase;
                }


                if (!IsEligibleVariant(rawVcfVariant))
                {
                    continue;
                }

                //the current variant is close to the last one
                if (IsProximal(currentVariantSite, lastVariantSite, _phasingDistance))
                {
                    FitVariantsInNeighborhood(lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

                    referenceStringBetweenVariants = "";
                }
                else
                {
                    referenceStringBetweenVariants = "";
                }

                lastVariantSite = currentVariantSite;
            }

            //TODO debug log variant sites in all neighborhoods as "phaseables".

            PrepNbhdsForUse(_neighborhoods);

            return(_neighborhoods);
        }
예제 #13
0
        public void AddSupport()
        {
            //TODO : Right now AddSupport checks the dictionary and adds first. Do we want to be restricting the variant sites to those we initialize with? Or do we want to not initialize the variant sites and just track those that have support?

            // This method tests AddSupportForB and AddSupportForAandB

            // Should not throw any exceptions (null ref or key not found are ones to look out for since we're tracking in dictionaries)
            // SupportOfB should be incremented by 1
            // WeightedSupportOfB should be incremented by weight

            var variantA             = new VariantSite(1);
            var variantB             = new VariantSite(2);
            var variantB2            = new VariantSite(3);
            var variantBOutsideGroup = new VariantSite(4);

            // We can verify that these were incremented by checking for the prob of A given B, since we know how many clusters we have
            var phasingResult = new VariantPhasingResult(variantA, new List <VariantSite> {
                variantB, variantB2
            }, 1);

            // Add support for a variant not being tracked already: should not throw an exception (see TODO above)
            phasingResult.AddSupportForB(variantBOutsideGroup, 30);

            //There is no support for B on its own (without A)
            phasingResult.AddSupportForB(variantB, 3);

            //Haven't added support for AandB yet, so we'll get a 0.
            Assert.Equal(0, phasingResult.GetProbOfAGivenB(variantB));
            Assert.Equal(0, phasingResult.GetWeightedProbOfAGivenB(variantB));

            //Adding support for AandB should bring us into the positive
            phasingResult.AddSupportForAandB(variantB, 12);
            //Now we should get 1/1 raw and 12/3 weighted
            Assert.Equal(1, phasingResult.GetProbOfAGivenB(variantB));
            Assert.Equal(4, phasingResult.GetWeightedProbOfAGivenB(variantB));

            //Adding more support for B alone should change our results
            phasingResult.AddSupportForB(variantB, 3);
            //Now we should get 1/2 raw and 12/6 weighted
            Assert.Equal(.5, phasingResult.GetProbOfAGivenB(variantB));
            Assert.Equal(2, phasingResult.GetWeightedProbOfAGivenB(variantB));

            //Adding more support for AandB should change our results
            phasingResult.AddSupportForAandB(variantB, 6);
            //Now we should get 2/2 raw and 18/6 weighted
            Assert.Equal(1, phasingResult.GetProbOfAGivenB(variantB));
            Assert.Equal(3, phasingResult.GetWeightedProbOfAGivenB(variantB));

            //Adding support to a different variant should not change our results
            phasingResult.AddSupportForB(variantB2, 5);
            Assert.Equal(1, phasingResult.GetProbOfAGivenB(variantB));
            Assert.Equal(3, phasingResult.GetWeightedProbOfAGivenB(variantB));
            phasingResult.AddSupportForAandB(variantB2, 5);
            Assert.Equal(1, phasingResult.GetProbOfAGivenB(variantB));
            Assert.Equal(3, phasingResult.GetWeightedProbOfAGivenB(variantB));
        }
예제 #14
0
        private void AddNewNeighborhoodToBatch(VariantSite lastVariantSite, VariantSite currentVariantSite,
                                               string referenceStringBetweenVariants, int numNbhdsSoFar)
        {
            int numNbhdInBatchSoFar = _nextBatchOfNeighborhoods.Count;

            var newNeighborhood = new VcfNeighborhood(_variantCallingParams, numNbhdsSoFar + numNbhdInBatchSoFar, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

            _nextBatchOfNeighborhoods.Add(newNeighborhood);
        }
예제 #15
0
        private void MakeAHangingNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite,
                                              string referenceStringBetweenVariants, int numNbhdsSoFar)
        {
            //buffer this for our next call to "GetBatchOfNeighborhoods" .

            var newNeighborhood = new VcfNeighborhood(_variantCallingParams, numNbhdsSoFar + _maxNumNbhdsInBatch, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

            _unfinshedNeighborhoods.Add(newNeighborhood);
        }
예제 #16
0
        public void CheckMNVs()
        {
            var allele = new Pisces.Domain.Models.Alleles.CalledAllele();
            var clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            var neighborhoodDepthAtSites = new List <int> {
                100, 200
            };
            var clusterCountsAtSites = new int[] { 90, 190 };

            clusterVariantSites[0].VcfReferenceAllele = "AG";
            clusterVariantSites[0].VcfAlternateAllele = "CC";

            var refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);


            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("AG", allele.Reference);
            Assert.Equal("CC", allele.Alternate);
            Assert.Equal(28608285, allele.Coordinate);


            clusterVariantSites = new VariantSite[] {
                new VariantSite(28608285), new VariantSite(28608287)
            };

            clusterVariantSites[1].VcfReferenceAllele = "GA";
            clusterVariantSites[1].VcfAlternateAllele = "TT";


            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("GA", allele.Reference);
            Assert.Equal("TT", allele.Alternate);
            Assert.Equal(28608287, allele.Coordinate);

            clusterVariantSites[0].VcfReferenceAllele = "AG";
            clusterVariantSites[0].VcfAlternateAllele = "CC";

            refsToRemove = PhasedVariantExtractor.Extract(
                out allele, clusterVariantSites, referenceSequence,
                neighborhoodDepthAtSites, clusterCountsAtSites, chromosome, 20, 100);

            Assert.Equal(0, refsToRemove.Count);
            Assert.Equal("AGGA", allele.Reference);
            Assert.Equal("CCTT", allele.Alternate);
            Assert.Equal(28608285, allele.Coordinate);
        }
예제 #17
0
        public void CheckAddingFilters()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var variantCallingParameters = new VariantCallingParameters();

            //Set up filters so calls are sure to trigger them.
            variantCallingParameters.LowDepthFilter             = 2000;
            variantCallingParameters.MinimumFrequencyFilter     = 0.80F;
            variantCallingParameters.MinimumVariantQScoreFilter = 300;


            var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters());


            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore));

            Assert.Equal(2, acceptedRefs.Count);

            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore));
            //note reference calls dont win the "LowVariantFrequency" flag.
        }
예제 #18
0
        public void IsProximal()
        {
            var variantSite1 = new VariantSite(123);
            var variantSite2 = new VariantSite(126);

            // Must be less than phasing distance
            Assert.True(VcfNeighborhoodBuilder.IsProximal(variantSite1, variantSite2, 4));
            Assert.False(VcfNeighborhoodBuilder.IsProximal(variantSite1, variantSite2, 3));

            // Works both ways
            Assert.True(VcfNeighborhoodBuilder.IsProximal(variantSite1, variantSite2, 4));
            Assert.False(VcfNeighborhoodBuilder.IsProximal(variantSite1, variantSite2, 3));
        }
예제 #19
0
        public void LastPositionIsNotMatch()
        {
            var nbhd        = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T");
            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite, "ATCG");

            var vsPositionMatch = new VariantSite(123);

            Assert.False(nbhd.LastPositionIsNotMatch(vsPositionMatch));

            var vsPositionMismatch = new VariantSite(124);

            Assert.True(nbhd.LastPositionIsNotMatch(vsPositionMismatch));
        }
        public static Vead CreateVeadFromStringArray(string name, string[,] variants)
        {
            var numVariants  = variants.GetLength(0);
            var variantSites = new VariantSite[numVariants];

            for (var i = 0; i < numVariants; i++)
            {
                var vs = new VariantSite {
                    VcfReferenceAllele = variants[i, 0], VcfAlternateAllele = variants[i, 1]
                };
                variantSites[i] = vs;
            }

            return(new Vead(name, variantSites));
        }
예제 #21
0
        public void AddVariantSite()
        {
            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120)
            {
                VcfReferenceAllele = "A"
            }, new VariantSite(121));

            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);

            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite);
            Assert.Equal(3, nbhd.VcfVariantSites.Count);
            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);
        }
예제 #22
0
        public void LastPositionIsNotMatch()
        {
            var nbhd        = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121));
            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite);

            var vsPositionMatch = new VariantSite(123);

            Assert.False(nbhd.LastPositionIsNotMatch(vsPositionMatch));

            var vsPositionMismatch = new VariantSite(124);

            Assert.True(nbhd.LastPositionIsNotMatch(vsPositionMismatch));
        }
예제 #23
0
        //These two variants are close enough go in the same nbhd. Either, add them to the last nbhd chain, or start a new chain.
        public bool FitVariantsInNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite,
                                              string referenceStringBetweenVariants, int numNbhdsSoFar)
        {
            bool ItsOKToAddANbhdToThisBatch = _nextBatchOfNeighborhoods.Count < _maxNumNbhdsInBatch;


            //if no batches exist yet.. We have to add one.
            if (_nextBatchOfNeighborhoods.Count == 0)
            {
                AddNewNeighborhoodToBatch(lastVariantSite, currentVariantSite, referenceStringBetweenVariants,
                                          numNbhdsSoFar);

                return(ItsOKToAddANbhdToThisBatch);
            }

            //else, we previously had a nbhd we were working on:
            var lastNeighborhood = _nextBatchOfNeighborhoods.Last();

            // Have we skipped any positions since our last addition to this chain? If so, we need to make a new neighborhood.
            // Othwerise, we can add on to the old chain
            if (lastNeighborhood.LastPositionIsNotMatch(lastVariantSite))
            {
                //are we allowed to start a new chain on the old batch?
                if (ItsOKToAddANbhdToThisBatch)
                {
                    //start a new chain
                    AddNewNeighborhoodToBatch(lastVariantSite, currentVariantSite, referenceStringBetweenVariants,
                                              numNbhdsSoFar);

                    return(true);
                }
                else    //We cant add any more nbhds to this batch. Leave the new nbhd hanging for now.
                {
                    //buffer this.
                    MakeAHangingNeighborhood(lastVariantSite, currentVariantSite, referenceStringBetweenVariants,
                                             numNbhdsSoFar);

                    return(false);
                }
            }
            else
            {
                //add to the old chain of the nbhd we already have.
                lastNeighborhood.AddVariantSite(currentVariantSite, referenceStringBetweenVariants);
            }

            return(true);
        }
예제 #24
0
        public void AddVariantSite()
        {
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120)
            {
                VcfReferenceAllele = "A"
            }, new VariantSite(121), "T");

            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);

            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite, "ATCG");
            Assert.Equal("ATATCG", nbhd.ReferenceSequence);
            Assert.Equal(3, nbhd.VcfVariantSites.Count);
            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);
        }
예제 #25
0
        /// <summary>
        /// in this test, the read fits two MNVs, and we need to find both.
        /// </summary>
        public void FindMultipleMNVResults()
        {
            var read = new BamAlignment();

            read.Bases = "AA" + "ACGTACGT" + "GGGG";
            //vcf coords  12-345678910-11,12,13,14
            read.CigarData = new CigarAlignment("2S8M4S");
            read.Position  = 3 - 1;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 4;
            vs1.VcfReferenceAllele   = "TA";
            vs1.VcfAlternateAllele   = "CG"; //read should match ALT for this test

            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 7;
            vs2.VcfReferenceAllele   = "GG";
            vs2.VcfAlternateAllele   = "AC";

            var vsFromVcf = new List <VariantSite>()
            {
                vs1, vs2
            };

            vsFromVcf.Sort();

            //given a variant site, is it in the read?

            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SubsequenceType.MatchOrMismatchSequence].Count, 1);
                Assert.Equal(foundVariants[SubsequenceType.InsertionSquence].Count, 0);
                Assert.Equal(foundVariants[SubsequenceType.DeletionSequence].Count, 0);
            }, (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 4);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "TA");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "CG");

                Assert.Equal(matchedVariants[1].VcfReferencePosition, 7);  //a deletion not supported by the reads
                Assert.Equal(matchedVariants[1].VcfReferenceAllele, "GG"); //to we just return T>T, a reference call at this loci.
                Assert.Equal(matchedVariants[1].VcfAlternateAllele, "AC");
            });
        }
예제 #26
0
        public void FindVariantResults()
        {
            var read = new BamAlignment();

            read.Bases = "AA" + "ACGTACGT" + "GGGG";
            //vcf coords  12-345678910-11,12,13,14
            read.CigarData = new CigarAlignment("2S8M4S");
            read.Position  = 3 - 1;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 4;
            vs1.VcfReferenceAllele   = "TA";
            vs1.VcfAlternateAllele   = "CG"; //read should match ALT for this test

            var vs2 = new VariantSite();

            vs2.VcfReferencePosition = 10;
            vs2.VcfReferenceAllele   = "TTT";
            vs2.VcfAlternateAllele   = "T";

            var vsFromVcf = new List <VariantSite>()
            {
                vs1, vs2
            };

            //given a variant site, is it in the read?

            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SomaticVariantType.SNP].Count, 1);
                Assert.Equal(foundVariants[SomaticVariantType.Insertion].Count, 0);
                Assert.Equal(foundVariants[SomaticVariantType.Deletion].Count, 0);
            }, (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 4);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "TA");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "CG");

                Assert.Equal(matchedVariants[1].VcfReferencePosition, 10);  //a deletion not supported by the reads
                Assert.Equal(matchedVariants[1].VcfReferenceAllele, "T");
                Assert.Equal(matchedVariants[1].VcfAlternateAllele, "T");
            });
        }
예제 #27
0
        public VcfNeighborhoodBuilder(PhasableVariantCriteria phasableVariantCriteria, VariantCallingParameters variantCallingParams,
                                      IVcfVariantSource vcfVariantSource, int batchSize)
        {
            _variantCallingParams     = variantCallingParams;
            _phasableVariantCriteria  = phasableVariantCriteria;
            _vcfVariantSource         = vcfVariantSource;
            _nextBatchOfNeighborhoods = new List <VcfNeighborhood>();
            _unfinshedNeighborhoods   = new List <VcfNeighborhood>();
            _maxNumNbhdsInBatch       = batchSize;

            var lastVariantSite = new VariantSite()
            {
                VcfReferenceAllele   = "",
                VcfAlternateAllele   = "",
                ReferenceName        = "",
                VcfReferencePosition = 0
            };
        }
예제 #28
0
        public void GetVeadCountsInCluster()
        {
            var initialVeadGroups = ClusterTestHelpers.GetSampleVeadGroups(3, 3);
            var cluster           = new Cluster("test", initialVeadGroups);
            var variantSite       = new VariantSite(0)
            {
                VcfReferenceAllele = "A", VcfAlternateAllele = "T"
            };
            var variantSite2 = new VariantSite(0)
            {
                VcfReferenceAllele = "A", VcfAlternateAllele = "C"
            };
            var sites = new List <VariantSite>()
            {
                variantSite, variantSite2
            };

            Assert.Equal(9, cluster.GetVeadCountsInCluster(sites)[variantSite]);
            Assert.Equal(0, cluster.GetVeadCountsInCluster(sites)[variantSite2]);
        }
예제 #29
0
        public void ProcessOneDeletionReadTest()
        {
            //reads with deletions, S102
            //       16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1014/2_rev_121416520	121416520	75M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGCCAGCTG
            //16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1484/2_fwd_121416520	121416520	68M18D7M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG
            //16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1320/2_rev_121416520	121416520	68M18D7M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG
            //16187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-1076/2_rev_121416520	121416520	68M18D7M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG
            //416187-121416587:COSM21479:GCCAGCTGCAGACGGAGCTC:GT:chr12:121416607-121417007-850/2_rev_121416520	121416520	75M	AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGCCAGCTG

            var read = new BamAlignment();

            read.Bases     = "AGGCGGCTAGCGTGGTGGACCCGGGCCGCGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGTCTGGCG";
            read.CigarData = new CigarAlignment("68M18D7M");
            read.Position  = 121416520;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 121416588;
            vs1.VcfReferenceAllele   = "GCCAGCTGCAGACGGAGCT";
            vs1.VcfAlternateAllele   = "G"; //read should match ALT for this test


            var vsFromVcf = new List <VariantSite>()
            {
                vs1
            };

            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SubsequenceType.MatchOrMismatchSequence].Count, 2);
                Assert.Equal(foundVariants[SubsequenceType.InsertionSquence].Count, 0);
                Assert.Equal(foundVariants[SubsequenceType.DeletionSequence].Count, 1);
            },
                        (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 121416588);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "GCCAGCTGCAGACGGAGCT");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "G");
            });
        }
예제 #30
0
        public void ProcessInsertionReadTest()
        {
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-1478/2_fwd	121432113	72M3S	CGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTACCTA
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-662/2_fwd	121432113	72M3S	CGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTACCTA
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-1308/2_rev	121432114	71M3I1M	GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTAC-CTA-C
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-64/2_rev	121432114	    71M3I1M	GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTAC-TTA-C
            //chr12:121431782-121432182:COSM46441:TGC:TACCTA:chr12:121432185-121432585-1322/2_rev	121432114	75M	GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTGC-CCTC

            var read = new BamAlignment();

            read.Bases = "GGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTACCTAC";
            //vcf coords  12-345678910-11,12,13,14
            read.CigarData = new CigarAlignment("71M3I1M");
            read.Position  = 121432114;
            read.Qualities = new byte[read.Bases.Length];

            var vs1 = new VariantSite();

            vs1.VcfReferencePosition = 121432185;
            vs1.VcfReferenceAllele   = "C";
            vs1.VcfAlternateAllele   = "CCTA"; //read should match ALT for this test


            var vsFromVcf = new List <VariantSite>()
            {
                vs1
            };

            //given a variant site, is it in the read?
            ExecuteTest(read, 0, vsFromVcf, (foundVariants) =>
            {
                Assert.Equal(foundVariants[SubsequenceType.MatchOrMismatchSequence].Count, 2);
                Assert.Equal(foundVariants[SubsequenceType.InsertionSquence].Count, 1);
                Assert.Equal(foundVariants[SubsequenceType.DeletionSequence].Count, 0);
            }, (matchedVariants) =>
            {
                Assert.Equal(matchedVariants[0].VcfReferencePosition, 121432185);
                Assert.Equal(matchedVariants[0].VcfReferenceAllele, "C");
                Assert.Equal(matchedVariants[0].VcfAlternateAllele, "CCTA");
            });
        }