예제 #1
0
        private void ExecuteGroupingTest(List <Read> reads, List <int> expectedGroupMemberships, IEnumerable <Tuple <int, string, string> > variants)
        {
            var variantSites = new List <VariantSite>();

            foreach (var variant in variants)
            {
                variantSites.Add(new VariantSite(variant.Item1)
                {
                    VcfReferenceAllele = variant.Item2, VcfAlternateAllele = variant.Item3
                });
            }

            var alignmentExtractor = new MockAlignmentExtractor(reads);

            var veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");
            var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121))
            {
                VcfVariantSites = variantSites
            };

            var callableNeighborhood = new CallableNeighborhood(vcfNeighborhood, new VariantCallingParameters());

            var veadGroups = veadSource.GetVeadGroups(callableNeighborhood).ToList();

            Assert.Equal(expectedGroupMemberships.Count, veadGroups.Count());
            for (var i = 0; i < veadGroups.Count(); i++)
            {
                Assert.Equal(expectedGroupMemberships[i], veadGroups[i].NumVeads);
            }
        }
        public bool ShouldSkipRead(Read read, CallableNeighborhood neighborhood)
        {
            if (_options.RemoveDuplicates)
            {
                if (read.IsPcrDuplicate)
                {
                    return(true);
                }
            }

            if (_options.OnlyUseProperPairs)
            {
                if (!read.IsProperPair)
                {
                    return(true);
                }
            }

            if (read.MapQuality < _options.MinimumMapQuality)
            {
                return(true);
            }
            if (read.EndPosition < neighborhood.FirstPositionOfInterest)
            {
                return(true);
            }

            return(false);
        }
        public void CallThroughAnEmptyNbhd()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref)

            var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2);

            var callableNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            caller.CallMNVs(callableNbhd);
            caller.CallRefs(callableNbhd);

            var acceptedMNVs = callableNbhd.CalledVariants;
            var acceptedRefs = callableNbhd.CalledRefs;

            Assert.Equal(0, acceptedMNVs.Count);
            Assert.Equal(2, acceptedRefs.Count);

            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype);
            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype);
            Assert.Equal(123, acceptedRefs[123].ReferencePosition);
            Assert.Equal(124, acceptedRefs[124].ReferencePosition);
        }
예제 #4
0
        public void GetOriginalVcfIndexes()
        {
            var originalVar1 = new CalledAllele()
            {
                ReferencePosition = 1
            };
            var originalVar10 = new CalledAllele()
            {
                ReferencePosition = 10
            };

            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(123)
            {
                OriginalAlleleFromVcf = originalVar1
            }
                                           , new VariantSite(123)
            {
                OriginalAlleleFromVcf = originalVar10
            });

            var originalVcfIndexes = new CallableNeighborhood(nbhd, new VariantCallingParameters()).GetOriginalVcfVariants();

            Assert.Equal(2, originalVcfIndexes.Count);
            Assert.Equal(1, originalVcfIndexes[0].ReferencePosition);
            Assert.Equal(10, originalVcfIndexes[1].ReferencePosition);
        }
예제 #5
0
        public void SupplementSupportWithClippedReads()
        {
            // In this test we create reads that are either normal or clipped (identified by "clip_" in their name)
            // This test does not take cigar data into account.

            var mockClippedReadComparator = new Mock <IMNVClippedReadComparator>();

            // Mock read comparator returns true if read name starts with c
            mockClippedReadComparator.Setup(x => x.DoesClippedReadSupportMNV(It.IsAny <Read>(), It.IsAny <CalledAllele>()))
            .Returns((Read read, CalledAllele allele) => read.Name[0] == 'c' ? true : false);

            var reads = new List <Read>();

            reads.Add(CreateRead("chr1", "ACGT", 3, "read4"));
            reads.Add(CreateRead("chr1", "ACGT", 3, "clip_read4", matePosition: 3));  // +1 not in neighborhood, but still gets counted because mocked ClippedReadComparator
            reads.Add(CreateRead("chr1", "ACGT", 12, "read1", matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read2", matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read1", read2: true, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read_notmapped", isMapped: false, isProperPair: false, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read3", isProperPair: false, read2: true, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read2", read2: true, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", matePosition: 10));                                                // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", matePosition: 10));                                                // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", read2: true, matePosition: 10));                                   // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read_notmapped", isMapped: false, isProperPair: false, matePosition: 10)); // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read3", isProperPair: false, read2: true, matePosition: 10));              // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", read2: true, matePosition: 10));                                   // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 30, "read5"));
            reads.Add(CreateRead("chr1", "ACGT", 30, "clip_read5", matePosition: 30));                                                // not in neighborhood, not counted
            var mockAlignmentExtractor = new MockAlignmentExtractor(reads);
            int qNoiseLevel            = 20;
            int maxQscore  = 100;
            int minMNVsize = 6;
            MNVSoftClipSupportFinder mnvClippedSupportFinder = new MNVSoftClipSupportFinder(mockAlignmentExtractor, mockClippedReadComparator.Object, qNoiseLevel, maxQscore, minMNVsize);

            var mnv1      = TestHelper.CreateDummyAllele("chr1", 10, "AAAAAA", "CCC", 2000, 50);
            var neighbor1 = new VcfNeighborhood(0, "chr", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C", ReferenceName = "chr"
                    },
                    new VariantSite(25)
                    {
                        VcfReferenceAllele = "T", VcfAlternateAllele = "G", ReferenceName = "chr"
                    },
                },
            };

            var callableNbhd = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null);

            callableNbhd.AddAcceptedPhasedVariant(mnv1);
            Assert.Equal(50, callableNbhd.CandidateVariants[0].AlleleSupport);
            mnvClippedSupportFinder.SupplementSupportWithClippedReads(callableNbhd);
            Assert.Equal(57, callableNbhd.CandidateVariants[0].AlleleSupport);
        }
        public void CheckAddingFilters()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var variantCallingParameters = new VariantCallingParameters();

            //Set up filters so calls are sure to trigger them.
            variantCallingParameters.LowDepthFilter             = 2000;
            variantCallingParameters.MinimumFrequencyFilter     = 0.80F;
            variantCallingParameters.MinimumVariantQScoreFilter = 300;


            var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters());


            var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var callableNeihborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            callableNeihborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            callableNeihborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(callableNeihborhood);
            caller.CallRefs(callableNeihborhood);

            var acceptedMNVs = callableNeihborhood.CalledVariants;
            var acceptedRefs = callableNeihborhood.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore));

            Assert.Equal(2, acceptedRefs.Count);

            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore));
            //note reference calls dont win the "LowVariantFrequency" flag.
        }
        public void SupplementSupportWithClippedReads(CallableNeighborhood neighborhood)
        {
            var neighbors = neighborhood.VcfVariantSites;
            var refName   = neighbors.First().ReferenceName;

            _alignmentExtractor.Jump(refName);

            Logger.WriteToLog("Supplementing candidate variant support with soft clipped reads.");

            //var readFilter = new NeighborhoodReadFilter(_options);
            //var clippedReadComparator = new ClippedReadComparator();
            //var mnvClippedReadComparator = new MNVClippedReadComparator(scReadFilter);
            Read read = new Read();

            while (true)
            {
                if (!_alignmentExtractor.GetNextAlignment(read))
                {
                    break; // no more reads
                }

                // Check if clipped part matches alternate allele of any candidate variant
                foreach (var mnv in neighborhood.CandidateVariants)
                {
                    // Do not boost support for SNVs and short MNVs
                    if (mnv.ReferenceAllele.Length + mnv.AlternateAllele.Length < _minSizeForClipRescue)
                    {
                        continue;
                    }
                    if (_mnvClippedReadComparator.DoesClippedReadSupportMNV(read, mnv))
                    {
                        // Nima: in current implementation, same read can support multiple candidate variants.
                        // In future we may want to "assign" reads to only one candidate variant.
                        // Risk: reads that support an MNV, may also support candidate variants. This can lead to false positives.
                        mnv.AlleleSupport++;
                        mnv.SoftClipAlleleSupport++;
                    }
                }

                if (read.Position > neighborhood.LastPositionOfInterestWithLookAhead)
                {
                    break;
                }
            }
            // Update Q score before moving on
            // Nima: Q score will be calculated twice for some variants
            // (once in PhasedVariantExtractor.cs>Create() , and another time here)
            foreach (var mnv in neighborhood.CandidateVariants)
            {
                mnv.VariantQscore = VariantQualityCalculator.AssignPoissonQScore(mnv.AlleleSupport, mnv.ReferenceSupport, _qNoiseLevel, _maxQscore);
                Logger.WriteToLog("Added soft clip support of {0} to MNV: {1}.", mnv.AlleleSupport - mnv.SoftClipAlleleSupport, mnv.ToString());
            }
        }
        public bool IsClippedWithinNeighborhood(Read read, CallableNeighborhood neighborhood)
        {
            // Check if clipped at beginning of read, and position of read (end of clipping) falls into neighborhood
            if (read.StartsWithSoftClip &&
                (read.Position >= neighborhood.SoftClipEndBeforeNbhd && read.Position <= neighborhood.SoftClipPosAfterNbhd))
            {
                return(true);
            }
            // Check if clipped at end of read, and end position of read (beginning of clip) falls into neighborhood
            else if (read.EndsWithSoftClip &&
                     (read.EndPosition >= neighborhood.SoftClipEndBeforeNbhd && read.EndPosition <= neighborhood.SoftClipPosAfterNbhd))
            {
                return(true);
            }

            return(false);
        }
예제 #9
0
        private void CallMnvsForNeighborhood(CallableNeighborhood neighborhood)
        {
            Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id);

            try
            {
                var clusterer       = _factory.CreateNeighborhoodClusterer();
                var veadGroupSource = _factory.CreateVeadGroupSource();
                var collapsedReads  = veadGroupSource.GetVeadGroups(neighborhood);

                //(1) Get CLUSTERS
                var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList(), neighborhood.Id);


                //clean out vg, we dont need them any more
                veadGroupSource = null;
                collapsedReads  = null;

                bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci;

                //(2) Turn clusters into MNV candidates
                neighborhood.CreateMnvsFromClusters(clusters.Clusters,
                                                    _factory.Options.BamFilterParams.MinimumBaseCallQuality,
                                                    crushNbhdVariantsToSamePositon);
                if (neighborhood.NumberClippedReads > 0 &&
                    _factory.Options.SoftClipSupportParams.UseSoftClippedReads)
                {
                    var softClippedSupportFinder = _factory.CreateSoftClipSupportFinder();
                    softClippedSupportFinder.SupplementSupportWithClippedReads(neighborhood);
                }
                neighborhood.SetGenotypesAndPruneExcessAlleles();

                // (3) Variant call the candidates
                var variantCaller = _factory.CreateVariantCaller();
                variantCaller.CallMNVs(neighborhood);
                variantCaller.CallRefs(neighborhood);

                //wait untill vcf is ready to write...
            }
            catch (Exception ex)
            {
                Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id);
                Logger.WriteExceptionToLog(ex);
            }
        }
예제 #10
0
        public void SetRangeOfInterestTests()
        {
            /// <summary>
            /// This method sets the NbdhReferenceSequenceSubstring, and the first/last positions of interest
            /// </summary>

            var refName = "chr";

            //test with no Genome given
            var nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121));

            Assert.Equal(-1, nbhd.FirstPositionOfInterest);
            Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf);
            Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead);

            var readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            Assert.Equal("RR", readyNbhd.NbhdReferenceSequenceSubstring);
            Assert.Equal(120, nbhd.FirstPositionOfInterest);
            Assert.Equal(121, nbhd.LastPositionOfInterestInVcf);
            Assert.Equal(122, nbhd.LastPositionOfInterestWithLookAhead);

            //test with a genome given
            var    genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta");
            Genome genome     = new Genome(genomePath, new List <string>()
            {
                refName
            });
            ChrReference chrReference = genome.GetChrReference(refName);

            nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121));

            Assert.Equal(-1, nbhd.FirstPositionOfInterest);
            Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf);
            Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead);

            readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters(), chrReference);

            Assert.Equal("TG", readyNbhd.NbhdReferenceSequenceSubstring);
            Assert.Equal(120, readyNbhd.FirstPositionOfInterest);
            Assert.Equal(121, readyNbhd.LastPositionOfInterestInVcf);
            Assert.Equal(122, readyNbhd.LastPositionOfInterestWithLookAhead);
        }
예제 #11
0
        public void ShouldSkipReadTest()
        {
            var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters()
            {
                MinimumMapQuality = 20
            });

            var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "G", VcfAlternateAllele = "A"
                    },
                },
            };

            neighbor1.SetRangeOfInterest();
            var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null);


            var read1 = TestHelper.CreateRead("chr1", "ACGT", 6);      // Read ends before first variant

            Assert.Equal(true, nbhdReadFilter.ShouldSkipRead(read1, callableNeighbor1));
            var read2 = TestHelper.CreateRead("chr1", "ACGT", 7);      // Read covers 1 base of the nbhd

            Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read2, callableNeighbor1));
            var read3 = TestHelper.CreateRead("chr1", "ACGT", 12);      // Read partially covers neighborhood

            Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read3, callableNeighbor1));
            var read4 = TestHelper.CreateRead("chr1", "ACGT", 16);      // Read starts after neighborhood

            Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read4, callableNeighbor1));

            // Nima: we can maybe add features to CreateRead to be able to create PCR duplicate, low mapQ, and non proper pair reads
            //       but i think these conditions are somewhat trivial, and this may not be necessary.
        }
예제 #12
0
        public void AddMnvsFromClusters()
        {
            //TODO even with mock cluster this takes too much setting up.
            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121));

            var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var veads = new List <Vead> {
                vead, vead2, vead3
            };

            vead.SiteResults[0].VcfReferencePosition = 1;
            vead.SiteResults[1].VcfReferencePosition = 2;
            vead.SiteResults[2].VcfReferencePosition = 3;

            vead2.SiteResults[0].VcfReferencePosition = 1;
            vead2.SiteResults[1].VcfReferencePosition = 2;
            vead2.SiteResults[2].VcfReferencePosition = 3;

            vead3.SiteResults[0].VcfReferencePosition = 1;
            vead3.SiteResults[1].VcfReferencePosition = 2;
            vead3.SiteResults[2].VcfReferencePosition = 3;

            var mockCluster = new Mock <ICluster>();

            mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 });
            var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads);

            mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults);
            mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>()
            {
                consensus
            });

            var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            callableNeighborhood.NbhdReferenceSequenceSubstring = "CGT";
            callableNeighborhood.CreateMnvsFromClusters(new List <ICluster>()
            {
                mockCluster.Object
            }, 20);

            var allele = callableNeighborhood.CandidateVariants.First();

            Assert.Equal(6, allele.TotalCoverage);
            Assert.Equal(6, allele.AlleleSupport);
            Assert.Equal("CGT", allele.ReferenceAllele);
            Assert.Equal("AAA", allele.AlternateAllele);

            int[] depths  = new int[0];
            int[] nocalls = new int[0];
            callableNeighborhood.DepthAtSites(new List <ICluster>()
            {
                mockCluster.Object
            }, out depths, out nocalls);
            Assert.Equal(3, depths.Length);
            Assert.Equal(3, depths[0]);
            Assert.Equal(3, depths[1]);
            Assert.Equal(3, depths[2]);
        }
        public void CallAVariantInANewLocation()
        {
            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);
            var vs3 = new VariantSite(originalVcfVariant3);
            var vs4 = new VariantSite(originalVcfVariant4);

            var vcParams = new VariantCallingParameters();

            vcParams.Validate();
            var caller = new VariantCaller(vcParams, new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(0, "chr1", vs1, vs2);

            nbhd.AddVariantSite(vs3); //note, we do not add vs4, that is not going to get used for phasing. Sps it is a variant that failed filters.

            var callableNbhd = new CallableNeighborhood(nbhd, vcParams, null);

            //now stage one candidate MNV:
            var newMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 129,
                ReferenceAllele   = "A",
                AlternateAllele   = "TT",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            };


            callableNbhd.AddAcceptedPhasedVariant(newMNV);
            var suckedUpRefRecord1000 = new SuckedUpRefRecord()
            {
                Counts = 1000, AlleleThatClaimedIt = new CalledAllele()
            };

            callableNbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord1000 }
            };

            caller.CallMNVs(callableNbhd);
            caller.CallRefs(callableNbhd);

            var acceptedMNVs = callableNbhd.CalledVariants;
            var acceptedRefs = callableNbhd.CalledRefs;


            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[129].Count);

            Assert.Equal(3, acceptedRefs.Count);

            VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
            VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]);
        }
예제 #14
0
        public void ClippedReadCountTest()
        {
            var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "G", VcfAlternateAllele = "A"
                    },
                    new VariantSite(25)
                    {
                        VcfReferenceAllele = "T", VcfAlternateAllele = "G"
                    },
                },
            };

            neighbor1.SetRangeOfInterest();
            Assert.Equal(9, neighbor1.SoftClipEndBeforeNbhd);
            Assert.Equal(26, neighbor1.SoftClipPosAfterNbhd);

            var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null);

            var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters()
            {
                MinimumMapQuality = 20
            });

            var cigarMatch = new CigarAlignment("4M");
            var read1      = TestHelper.CreateRead("chr1", "ACGT", 6, cigarMatch); // No clip, ends before neighborhood starts

            Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read1, callableNeighbor1));
            var read2 = TestHelper.CreateRead("chr1", "ACGT", 8, cigarMatch);   // No clip, partially covers neighborhood

            Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read2, callableNeighbor1));
            var read3 = TestHelper.CreateRead("chr1", "ACGT", 15, cigarMatch);   // No clip, inside neighborhood

            Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read3, callableNeighbor1));

            // Clipped portion of read starts before neighborhood -> NOT within neighborhood
            //  POS     8  9  10 11
            //  Read    M  S  S  S
            var cigar21 = new CigarAlignment("1M3S");
            var read21  = TestHelper.CreateRead("chr1", "ACGT", 8, cigar21);

            Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read21, callableNeighbor1));
            // Clipped portion of read starts on first variant site -> within neighborhood
            //  POS     8  9  10 11
            //  Read    M  M  S  S
            var cigar4 = new CigarAlignment("2M2S");
            var read4  = TestHelper.CreateRead("chr1", "ACGT", 8, cigar4);

            Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read4, callableNeighbor1));
            // Clipped portion of read starts after first variant site but before end of neighborhood -> within neighborhood
            //  POS     8  9  10 11
            //  Read    M  M  M  S
            var cigar5 = new CigarAlignment("3M1S");
            var read5  = TestHelper.CreateRead("chr1", "ACGT", 8, cigar5);  // clipped end matches start of neighborhood

            Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read5, callableNeighbor1));

            // Clipped portion of read ends before end of neighborhood -> within neighborhood
            //  POS     24 25 26 27
            //  Read    S  M  M  M
            var cigar22 = new CigarAlignment("1S3M");
            var read22  = TestHelper.CreateRead("chr1", "ACGT", 25, cigar22);

            Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read22, callableNeighbor1));
            // Clipped portion of read ends at last variant site of neighborhood -> within neighborhood
            //  POS     24 25 26 27
            //  Read    S  S  M  M
            var cigar6 = new CigarAlignment("2S2M");
            var read6  = TestHelper.CreateRead("chr1", "ACGT", 26, cigar6);

            Assert.Equal(true, nbhdReadFilter.IsClippedWithinNeighborhood(read6, callableNeighbor1));

            // Clipped portion of read ends after neighborhood's last variant site -> NOT within neighborhood
            //  POS     24 25 26 27
            //  Read    S  S  S  M
            var cigar7 = new CigarAlignment("3S1M");
            var read7  = TestHelper.CreateRead("chr1", "ACGT", 27, cigar7);

            Assert.Equal(false, nbhdReadFilter.IsClippedWithinNeighborhood(read7, callableNeighbor1));

            // TODO (maybe test in future)
            // Nima: These borders are not very necessary given we don't check exact match in first pass over clipped reads.
            // Testing SoftClip position and End for neighborhoods with deletion
            var neighbor2 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "ACC", VcfAlternateAllele = "A"
                    },
                    new VariantSite(25)
                    {
                        VcfReferenceAllele = "TCC", VcfAlternateAllele = "T"
                    },
                },
            };

            neighbor2.SetRangeOfInterest();
            Assert.Equal(10, neighbor2.SoftClipEndBeforeNbhd);
            Assert.Equal(28, neighbor2.SoftClipPosAfterNbhd);

            // Testing SoftClip position and End for neighborhoods with insertion
            var neighbor3 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "ACC"
                    },
                    new VariantSite(25)
                    {
                        VcfReferenceAllele = "T", VcfAlternateAllele = "TCC"
                    },
                },
            };

            neighbor3.SetRangeOfInterest();
            Assert.Equal(10, neighbor3.SoftClipEndBeforeNbhd);
            Assert.Equal(26, neighbor3.SoftClipPosAfterNbhd);
        }
 public bool PastNeighborhood(Read read, CallableNeighborhood neighborhood)
 {
     return(read.Position > neighborhood.LastPositionOfInterestWithLookAhead);
 }
예제 #16
0
        public void GetVeads()
        {
            var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(100)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(400)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(505)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(703)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "T"
                    },
                    new VariantSite(800)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                }
            };

            var callableNeighborhood = new CallableNeighborhood(vcfNeighborhood, new VariantCallingParameters());

            var reads = new List <Read>();

            reads.Add(CreateRead("chr1", "ACGT", 10));                     // Before neighborhood
            reads.Add(CreateRead("chr1", "ACGT", 96));                     // Ends right before neighborhood's first variant site
            reads.Add(CreateRead("chr1", "ACGT", 100));                    // Match (100)
            reads.Add(CreateRead("chr1", "ACGT", 300));                    // Within neighborhood but no VariantSite
            reads.Add(CreateRead("chr1", "ACGT", 400, qualityForAll: 19)); // Within neighbhorhood but low quals
            reads.Add(CreateRead("chr1", "ACGT", 500));                    // Within neighborhood but no VariantSite (ends right before 505)
            reads.Add(CreateRead("chr1", "ACGT", 700));                    // Match (703)
            reads.Add(CreateRead("chr1", "ACGT", 800));                    // Match (800)
            reads.Add(CreateRead("chr1", "ACGT", 805));                    // Past neighborhood
            reads.Add(CreateRead("chr1", "ACGT", 900));                    // Past neighborhood
            reads.Add(CreateRead("chr2", "ACGT", 100));                    // Wrong chromosome



            var alignmentExtractor = new MockAlignmentExtractor(reads);


            var veadSource = new VeadGroupSource(alignmentExtractor,
                                                 new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");

            var veadGroups = veadSource.GetVeadGroups(callableNeighborhood);

            // Collect all reads that could relate to the neighborhood
            // - Skip anything that has quality less than MinimumMapQuality
            // - Skip anything that ends before neighborhood begins
            // - Stop collecting once we've passed the end of the neighborhood

            // We should have collected the reads at 100, 700, and 800.
            Assert.Equal(801, callableNeighborhood.LastPositionOfInterestWithLookAhead);
            Assert.Equal(3, veadGroups.Count());
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900")));
            foreach (var veadGroup in veadGroups)
            {
                Assert.Equal(1, veadGroup.NumVeads);
            }

            vcfNeighborhood.VcfVariantSites.Add(
                new VariantSite(790)
            {
                VcfReferenceAllele = "ACAGTGAAAGACTTGTGAC", VcfAlternateAllele = "C"
            });

            callableNeighborhood = new CallableNeighborhood(vcfNeighborhood, new VariantCallingParameters());


            Assert.Equal(809, callableNeighborhood.LastPositionOfInterestWithLookAhead);

            alignmentExtractor = new MockAlignmentExtractor(reads);


            veadSource = new VeadGroupSource(alignmentExtractor,
                                             new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");

            veadGroups = veadSource.GetVeadGroups(callableNeighborhood);

            Assert.Equal(3, veadGroups.Count());
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900")));

            // Boundary case - read ends exactly at neighborhood's first variant site

            reads = new List <Read>();
            reads.Add(CreateRead("chr1", "ACGT", 10)); // Before neighborhood
            reads.Add(CreateRead("chr1", "ACGT", 96)); // Ends right before neighborhood's first variant site
            reads.Add(CreateRead("chr1", "ACGT", 97)); // Ends exactly at neighborhood's first variant site

            alignmentExtractor = new MockAlignmentExtractor(reads);

            veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");

            veadGroups = veadSource.GetVeadGroups(callableNeighborhood);

            // The veadgroup for 97 should be the only one
            Assert.Equal(1, veadGroups.Count());
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("97")));
            foreach (var veadGroup in veadGroups)
            {
                Assert.Equal(1, veadGroup.NumVeads);
            }
        }
예제 #17
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new AlleleReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1      = new VcfNeighborhood(0, "chr2", vs1, vs2);
            var calledNbh1 = new CallableNeighborhood(nbhd1, new VariantCallingParameters());

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2      = new VcfNeighborhood(0, "chr7", vs4, vs5);
            var calledNbh2 = new CallableNeighborhood(nbhd2, new VariantCallingParameters());

            VcfMerger merger = new VcfMerger(reader);
            List <Tuple <CalledAllele, string> > alleleTuplesPastNbhd = new List <Tuple <CalledAllele, string> >();

            calledNbh1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            calledNbh2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };


            alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd1.ReferenceName);

            alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh1);

            alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd2.ReferenceName);

            alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh2);

            merger.WriteRemainingVariants(writer, alleleTuplesPastNbhd);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
예제 #18
0
 public static void CheckNeighborhoodVariants(List <VariantSite> expectedVariantSites, CallableNeighborhood neighborhood)
 {
     Assert.Equal(expectedVariantSites.Count, neighborhood.VcfVariantSites.Count);
     foreach (var expectedVariantSite in expectedVariantSites)
     {
         Assert.True(neighborhood.VcfVariantSites.Any(v => v.ReferenceName == expectedVariantSite.ReferenceName && v.VcfReferencePosition == expectedVariantSite.VcfReferencePosition &&
                                                      v.VcfReferenceAllele == expectedVariantSite.VcfReferenceAllele && v.VcfAlternateAllele == expectedVariantSite.VcfAlternateAllele));
     }
 }
예제 #19
0
        public static void CheckNeighborhoodVariants(List <VcfVariant> expectedVariants, CallableNeighborhood neighborhood)
        {
            var variants = expectedVariants.Select(expectedVariant =>
                                                   new VariantSite()
            {
                VcfReferencePosition = expectedVariant.ReferencePosition,
                ReferenceName        = expectedVariant.ReferenceName,
                VcfReferenceAllele   = expectedVariant.ReferenceAllele,
                VcfAlternateAllele   = expectedVariant.VariantAlleles.First()
            }).ToList();

            CheckNeighborhoodVariants(variants, neighborhood);
        }
예제 #20
0
        public void WriteADiploidNbhd()
        {
            var outputDir        = Path.Combine(TestPaths.LocalScratchDirectory, "MergerWriteADiploidNbhd");
            var outputFilePath   = Path.Combine(outputDir, "TinyDiploid.Phased.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploid.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploidOutput.vcf");

            TestHelper.RecreateDirectory(outputDir);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chr22", 51304566),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.DiploidByThresholding,
                AllowMultipleVcfLinesPerLoci = false
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new AlleleReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 1, "A", "G", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 1, "A", "T", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "G", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "GTCT", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1      = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var calledNbh1 = new CallableNeighborhood(nbhd1, new VariantCallingParameters());

            VcfMerger merger = new VcfMerger(reader);
            List <Tuple <CalledAllele, string> > alleleTuplesPastNbhd = new List <Tuple <CalledAllele, string> >();

            //we will just say, we called the variants that were in the origina vcf. Ie, we agree with it.
            calledNbh1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };

            //Realizes the first nbhd starts at chr1 . We have to do something with the first lines of the vcf (chr1	1	.	A	G,T)
            //so, alleleTuplesPastNbhd = chr1	1	.	A	G,T
            alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd1.ReferenceName);
            Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant1));
            Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant2));

            //This method writes everything up to the end of nbhd 1,
            //so "(chr1	1	.	A	G,T)" from the vcf and the variants scylla detected "(chr1	1	.	A	G,T)" need to be dealt with.
            //Since these 4 variants are actually the same two, we need to remove the vcf ones and only write the scylla ones.
            //Thn we peek into the vcf and see the next line is "chr22	1230237	.	GTC	G,GTCT", clearly outside nbh1.
            //so we write out everything we need for nbhd1, and save the peeked line
            alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh1);
            Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant4));
            Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant5));

            //now write out
            //chr22   1230237.GTC G,GTCT  50  DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US  1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2
            //chrX    79.CG  GTG,AA  50  DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US  1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2
            merger.WriteRemainingVariants(writer, alleleTuplesPastNbhd);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
        public void VarCallsBecomeRefsAndNulls()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var vcParams = new VariantCallingParameters();

            vcParams.Validate();
            var caller = new VariantCaller(vcParams, new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var callableNeighbor1 = new CallableNeighborhood(nbhd, vcParams);


            callableNeighbor1.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            callableNeighbor1.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(callableNeighbor1);
            caller.CallRefs(callableNeighbor1);

            var acceptedMNVs = callableNeighbor1.CalledVariants;
            var acceptedRefs = callableNeighbor1.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);


            var vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }, { "DP", "1000" }, { "AD", "844" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);

            // If one has been sucked up and there are refs remaining, we should output it as a ref.
            var suckedUpRefRecord100 = new SuckedUpRefRecord()
            {
                Counts = 100, AlleleThatClaimedIt = new CalledAllele()
            };

            callableNeighbor1.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord100 }
            };


            caller.CallMNVs(callableNeighbor1);
            caller.CallRefs(callableNeighbor1);

            acceptedMNVs = callableNeighbor1.CalledVariants;
            acceptedRefs = callableNeighbor1.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);


            // If one has been sucked up all the way
            // we should output it as a null.
            var suckedUpRefRecord1000 = new SuckedUpRefRecord()
            {
                Counts = 1000, AlleleThatClaimedIt = new CalledAllele()
            };

            callableNeighbor1.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord1000 }
            };

            caller.CallMNVs(callableNeighbor1);
            caller.CallRefs(callableNeighbor1);

            acceptedMNVs = callableNeighbor1.CalledVariants;
            acceptedRefs = callableNeighbor1.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }, { "DP", "1000" }, { "AD", "0" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
        }
예제 #22
0
        //this unit test was made after we found bug ScyllaLoosingRefCalls_PICS-723.
        //We had a 1/. GT reported when it should be 1/0.
        //The reason for this is that all the refs (the "0"s) got incorrectly sucked up.
        //Ie, MNV ACG-> AG claimed 50 refs, so we (incorrectly) subtracted 50 refs from it.
        //The bug is that the ref counts got subtractedfrom the exact same mnv that claimed them.
        // This should never happen, and was not the intent of the alg.
        //
        //The affected mehtod is: CreateMnvsFromClusters in VcfNbhd
        public void CreateMnvsFromClusters_TakeUpRefCount()
        {
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "ACG", "AT", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "TTTTTT", 1000, 200);
            // var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 123, "AC", "TT", 1000, 100);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "A",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 200,
                ReferenceSupport  = 350
            });


            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Mnv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "ACG",
                AlternateAllele   = "AT",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 300,
                ReferenceSupport  = 350
            });

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Insertion)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "AAAAA",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 250,
                ReferenceSupport  = 350
            });


            //default behavior, nothing gets sucked up
            callableNeighborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };
            vs1.VcfReferencePosition = 123;
            var vead        = new Vead("dummy", new VariantSite[] { vs1 });
            var vg          = new VeadGroup(vead);
            var fakeCluster = new Cluster("test", new List <VeadGroup>()
            {
                vg
            });

            fakeCluster.ResetConsensus();
            callableNeighborhood.CreateMnvsFromClusters(new List <Cluster> {
                fakeCluster
            },
                                                        20);
            caller.CallMNVs(callableNeighborhood);
            caller.CallRefs(callableNeighborhood);

            var acceptedMNVs = callableNeighborhood.CalledVariants;
            var acceptedRefs = callableNeighborhood.CalledRefs;

            Assert.Equal(2, acceptedMNVs.Count);
            Assert.Equal(3, acceptedMNVs[123].Count);
            Assert.Equal(1, acceptedRefs.Count);

            //check the ref counts on all the MNVs. Nothing should be sucked up.
            Assert.Equal(350, acceptedMNVs[123][0].ReferenceSupport);  // Previously: total depth - allele suport. overly simple for now)
            Assert.Equal(350, acceptedMNVs[123][1].ReferenceSupport);  // Now: explicitly set ref support
            Assert.Equal(350, acceptedMNVs[123][2].ReferenceSupport);  //

            // now variant 0 will suck up 100 ref calls:
            var suckedUpRefRecord100 = new SuckedUpRefRecord()
            {
                Counts = 100, AlleleThatClaimedIt = callableNeighborhood.CandidateVariants[0]
            };

            callableNeighborhood.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 123, suckedUpRefRecord100 }
            };
            callableNeighborhood.CreateMnvsFromClusters(new List <Cluster> {
                fakeCluster
            },
                                                        20);

            caller.CallMNVs(callableNeighborhood);
            caller.CallRefs(callableNeighborhood);

            acceptedMNVs = callableNeighborhood.CalledVariants;
            acceptedRefs = callableNeighborhood.CalledRefs;


            //check the ref counts on all the MNVs. refs should only be taken up by the first one
            Assert.Equal(350, acceptedMNVs[123][0].ReferenceSupport);  //Previously:  total depth - allele suport. overly simple for now)

            //old result - has bug
            //Assert.Equal(1000 - 300, acceptedMNVs[123][1].ReferenceSupport);  // Previously:  total depth - allele suport - sucked up ref)
            //Assert.Equal(1000 - 250, acceptedMNVs[123][2].ReferenceSupport);  // Now: explicitly set ref support

            //new result, fixed
            Assert.Equal(350 - 100, acceptedMNVs[123][1].ReferenceSupport);  // refSupport - sucked up ref)
            Assert.Equal(350 - 100, acceptedMNVs[123][2].ReferenceSupport);  // refSupport - sucked up ref)
        }
예제 #23
0
        //this unit test was made after we found bug ScyllaShouldMergeClusters_PICS-1122.
        //We had an output vcf with the following lines
        //chr11	64577365	.	C	.	100	PASS	DP=1429	GT:GQ:AD:DP:VF:NL:SB:NC:US	0/0:1:1429:1429:0.00000:65:-100.0000:0.0592:0,0,0,0,0,0,0,0,0,0,0,0
        //chr11	64577366	.	A T	78	PASS DP = 559  GT:GQ:AD:DP:VF:NL:SB:NC:US	0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0
        //chr11	64577366	.	A T	78	PASS DP = 559  GT:GQ:AD:DP:VF:NL:SB:NC:US	0/1:78:538,2:559:0.00358:65:-100.0000:0.7509:0,0,0,0,0,0,0,0,0,0,0,0
        //chr11	64577367	.	G.   100	PASS DP = 1411 GT:GQ:AD:DP:VF:NL:SB:NC:US	0/0:1:1411:1411:0.00000:65:-100.0000:0.0741:0,0,0,0,0,0,0,0,0,0,0,0

        //The affected methods are "AddAcceptedPhasedVariant" and "AddRejectedPhasedVariant"
        //the new fix will merge the added variant, if its the same as a varaint that already exists
        public void AddAcceptedAndRejectedPhasedVariantTests()
        {
            //for this test we take three SNPs, two of which can be combined and 1 that cannot, and
            //we take three ref calls, two of which can be combined and 1 that cannot.
            //So 6 diff alleles go in, but only 4 should come out in the lists.

            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 500, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 200);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var callableNeighborhood = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            //variants:

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 200,
                ReferenceSupport  = 350,
                NoiseLevelApplied = 20
            });


            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "G",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            callableNeighborhood.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "AG",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            //refs:

            callableNeighborhood.AddRejectedPhasedVariant(
                new CalledAllele(AlleleCategory.Reference)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = ".",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 200,
                ReferenceSupport  = 350,
                NoiseLevelApplied = 20
            });


            callableNeighborhood.AddRejectedPhasedVariant(
                new CalledAllele(AlleleCategory.Reference)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = ".",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });

            callableNeighborhood.AddRejectedPhasedVariant(
                new CalledAllele(AlleleCategory.Reference)
            {
                Chromosome        = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                AlternateAllele   = ".",
                VariantQscore     = 20,
                TotalCoverage     = 500,
                AlleleSupport     = 300,
                ReferenceSupport  = 50,
                NoiseLevelApplied = 20
            });


            //check results.

            //check we got the right number of results
            Assert.Equal(3, callableNeighborhood.CandidateVariants.Count);
            Assert.Equal(2, callableNeighborhood.Refs.Count);

            //check the snps did what we expected

            var combinedSnp = callableNeighborhood.CandidateVariants[0];

            Assert.Equal(123, combinedSnp.ReferencePosition);
            Assert.Equal("chr1", combinedSnp.Chromosome);
            Assert.Equal("A", combinedSnp.ReferenceAllele);
            Assert.Equal("T", combinedSnp.AlternateAllele);
            Assert.Equal(200 + 300, combinedSnp.AlleleSupport);
            Assert.Equal(0, combinedSnp.NumNoCalls);
            Assert.Equal(100, combinedSnp.VariantQscore);
            Assert.Equal((1000 + 500) / 2, combinedSnp.TotalCoverage);
            Assert.Equal((350 + 50) / 2, combinedSnp.ReferenceSupport);
            Assert.Equal(AlleleCategory.Snv, combinedSnp.Type);
            Assert.Equal(20, combinedSnp.NoiseLevelApplied);

            //these values should not have changed
            var justAddedSnp = callableNeighborhood.CandidateVariants[1];

            Assert.Equal(123, justAddedSnp.ReferencePosition);
            Assert.Equal("chr1", justAddedSnp.Chromosome);
            Assert.Equal("A", justAddedSnp.ReferenceAllele);
            Assert.Equal("G", justAddedSnp.AlternateAllele);
            Assert.Equal(300, justAddedSnp.AlleleSupport);
            Assert.Equal(0, justAddedSnp.NumNoCalls);
            Assert.Equal(20, justAddedSnp.VariantQscore);
            Assert.Equal(500, justAddedSnp.TotalCoverage);
            Assert.Equal(50, justAddedSnp.ReferenceSupport);
            Assert.Equal(AlleleCategory.Snv, justAddedSnp.Type);
            Assert.Equal(20, justAddedSnp.NoiseLevelApplied);
        }
예제 #24
0
        public void GetMergedListOfVariants_LeaveUntouchedAsIs()
        {
            //chr7	55242464	.	A	G	6	LowSupport	DP=287	GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ	0/1:6:286,1:287:0.00348:30:-7.4908:0.0304:0,0,0,0,0,1,56,17,49,56,69,40:4.294:0.000
            //chr7	55242464	.	AGGAATTAAGAGAAGC	A	100	PASS	DP=298	GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ	0/1:100:284,14:298:0.04698:30:-75.6792:0.0000:1,0,1,4,5,3,58,18,49,55,71,41:100.000:100.000
            //chr7	55242481	.	A	T	6	LowSupport	DP=306	GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ	0/1:6:305,1:306:0.00327:30:-7.4622:0.0556:0,0,0,0,0,1,63,20,54,52,69,48:3.669:0.000
            //chr7	55242487	.	C	T	6	LowSupport	DP=325	GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ	0/1:6:324,1:325:0.00308:30:-7.1283:0.0469:0,0,0,1,0,0,67,24,61,53,68,52:1.954:0.000
            //chr7	55242489	.	G	T	6	LowSupport	DP=327	GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ	0/1:6:326,1:327:0.00306:30:-7.0226:0.0411:0,0,1,0,0,0,71,23,60,54,67,52:2.177:0.000

            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr7", 55242464, "A", "G", 287, 1);

            originalVcfVariant1.ReferenceSupport = 286;
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 55242464, "AGGAATTAAGAGAAGC", "A", 298, 14);

            originalVcfVariant2.ReferenceSupport = 284;
            var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr7", 55242481, "A", "T", 306, 1);

            originalVcfVariant3.ReferenceSupport = 305;
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 55242487, "C", "T", 325, 1);

            originalVcfVariant4.ReferenceSupport = 324;
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 55242489, "G", "T", 327, 1);

            originalVcfVariant5.ReferenceSupport = 326;

            //#2mnv accepted: chr7 55242464 . AGGAATTAAGAGAAGC A
            //chr7	55242464	.	AGGAATTAAGAGAAGC	A	100	PASS	DP=286	GT:GQ:AD:DP:VF:NL:SB:NC:US	0/1:100:272,13:286:0.04545:30:-100.0000:0.3024:0,0,0,0,0,0,0,0,0,0,0,0
            //#3mnv accepted: chr7 55242464 . AGGAATTAAGAGAAGCAA GAT.
            //chr7	55242464	.	AGGAATTAAGAGAAGCAA	GAT	6	PASS	DP=293	GT:GQ:AD:DP:VF:NL:SB:NC:US	0/1:6:226,1:293:0.00341:30:-100.0000:0.2854:0,0,0,0,0,0,0,0,0,0,0,0

            var mnv1 = TestHelper.CreateDummyAllele("chr7", 55242464, "AGGAATTAAGAGAAGC", "A", 286, 13);

            mnv1.ReferenceSupport = 272;
            var mnv2 = TestHelper.CreateDummyAllele("chr7", 55242464, "AGGAATTAAGAGAAGCAA", "GAT", 293, 1);

            mnv2.ReferenceSupport = 226;
            //#4mnv accepted: chr7 55242487 . C T.
            var mnv3 = TestHelper.CreateDummyAllele("chr7", 55242487, "C", "T", 325, 1);

            mnv3.ReferenceSupport = 324;
            //#5mnv accepted: chr7 55242489 . G T.
            var mnv4 = TestHelper.CreateDummyAllele("chr7", 55242489, "G", "T", 327, 1);

            mnv4.ReferenceSupport = 326;

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs3 = new VariantSite((originalVcfVariant3));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));

            var nbhd1 = new VcfNeighborhood(0, "chr7", vs1, vs2);

            nbhd1.AddVariantSite(vs3);
            nbhd1.AddVariantSite(vs4);
            nbhd1.AddVariantSite(vs5);
            var calledNbhd = new CallableNeighborhood(nbhd1, new VariantCallingParameters());

            calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { mnv1.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv1, mnv2
                  } },
                { mnv3.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv3
                  } },
                { mnv4.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv4
                  } },
            };
            //Became ref
            //chr7	55242481	.	A	.	100	PASS	DP=306	GT:GQ:AD:DP:VF:NL:SB:NC:US	0/.:100:305:306:0.00327:30:-100.0000:0.0556:0,0,0,0,0,0,0,0,0,0,0,0
            var var3AsRef = TestHelper.CreateDummyAllele("chr7", 55242481, "A", ".", 306, 0);

            calledNbhd.CalledRefs = new Dictionary <int, CalledAllele>()
            {
                { var3AsRef.ReferencePosition, var3AsRef }
            };

            var origAlleles = new List <Tuple <CalledAllele, string> >();

            origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant1, "Variant1"));
            origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant2, "Variant2"));
            origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant3, "Variant3"));
            origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant4, "Variant4"));
            origAlleles.Add(new Tuple <CalledAllele, string>(originalVcfVariant5, "Variant5"));
            var mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles);

            Assert.Equal(5, mergedList.Count);
            // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple.
            Assert.Equal(3, mergedList.Count(x => x.Item2 == ""));
            // Variant4 and 5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage
            Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant4"));
            Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5"));

            //Should take new one if anything is changed
            // Pretend mnv3 had a ref base sucked up by other MNV
            mnv3.ReferenceSupport     = originalVcfVariant4.ReferenceSupport - 1;
            calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { mnv1.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv1, mnv2
                  } },
                { mnv3.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv3
                  } },
                { mnv4.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv4
                  } },
            };
            mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles);
            Assert.Equal(5, mergedList.Count);
            // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple.
            Assert.Equal(4, mergedList.Count(x => x.Item2 == ""));
            // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage
            // Variant 4 has changed in terms of ref support.
            Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4"));
            Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5"));

            // Pretend mnv3 had coverage changed (not sure this is realistic, but to cover all bases adding test)
            mnv3.ReferenceSupport     = originalVcfVariant4.ReferenceSupport;
            mnv3.TotalCoverage        = originalVcfVariant4.TotalCoverage - 1;
            calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { mnv1.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv1, mnv2
                  } },
                { mnv3.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv3
                  } },
                { mnv4.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv4
                  } },
            };
            mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles);
            Assert.Equal(5, mergedList.Count);
            // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple.
            Assert.Equal(4, mergedList.Count(x => x.Item2 == ""));
            // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage
            // Variant 4 has changed in terms of ref support.
            Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4"));
            Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5"));

            // Pretend mnv3 had allele support changed (not sure this is realistic, but to cover all bases adding test)
            mnv3.TotalCoverage        = originalVcfVariant4.TotalCoverage;
            mnv3.AlleleSupport        = originalVcfVariant4.AlleleSupport - 1;
            calledNbhd.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { mnv1.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv1, mnv2
                  } },
                { mnv3.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv3
                  } },
                { mnv4.ReferencePosition, new List <CalledAllele>()
                  {
                      mnv4
                  } },
            };
            mergedList = VcfMerger.GetMergedListOfVariants(calledNbhd, origAlleles);
            Assert.Equal(5, mergedList.Count);
            // Anything that is new from phasing (real MNV, ref conversion) should have empty string portion of the tuple.
            Assert.Equal(4, mergedList.Count(x => x.Item2 == ""));
            // Only Variant5 should be retained as-is because after being spat out of phasing nothing has changed in terms of allele, ref support, allele support, or coverage
            // Variant 4 has changed in terms of ref support.
            Assert.Equal(0, mergedList.Count(x => x.Item2 == "Variant4"));
            Assert.Equal(1, mergedList.Count(x => x.Item2 == "Variant5"));
        }
예제 #25
0
        public IEnumerable <VeadGroup> GetVeadGroups(CallableNeighborhood neighborhood)
        {
            var veadGroups = new Dictionary <string, VeadGroup>();

            var neighbors = neighborhood.VcfVariantSites;
            var refName   = neighbors.First().ReferenceName;

            _alignmentExtractor.Jump(refName);

            // keep reading the alignments while we're on the same reference sequence

            var veadMaker = new VeadFinder(_options);
            var debugLog  = Path.Combine(_debugLogRoot, refName + "_" + neighborhood.Id + "_ReadsInNbhd.txt");

            WriteToReadLog(debugLog, string.Join("\t", "ReadName", "used?", "IsFirstMate", "CigarData", "Read.Position"));

            Read read = new Read();

            while (true)
            {
                if (!_alignmentExtractor.GetNextAlignment(read))
                {
                    break; // no more reads
                }

                if (_readfilter.IsClippedWithinNeighborhood(read, neighborhood))
                {
                    neighborhood.NumberClippedReads++;
                    // continue
                }

                if (_readfilter.ShouldSkipRead(read, neighborhood))
                {
                    //WriteToReadLog(debugLog,(string.Join("\t", read.Name, "skipped", read.IsFirstMate, read.CigarData.ToString(), read.Position)));
                    continue;
                }
                if (_readfilter.PastNeighborhood(read, neighborhood))
                {
                    //WriteToReadLog(debugLog,(string.Join("\t", read.Name, "past nbhd", read.IsFirstMate, read.CigarData.ToString(), read.Position)));
                    break;
                }


                //Make a vead and add it to our list
                var readName = read.Name + "_";
                if (read.IsFirstMate)
                {
                    readName += "fwd_" + read.Position;
                }
                else
                {
                    readName += "rev_" + read.Position;
                }

                WriteToReadLog(debugLog, (string.Join("\t", read.Name, "will use", read.IsFirstMate, read.CigarData.ToString(),
                                                      read.Position, read.Sequence, string.Join(",", read.Qualities))));

                //map from bases to ref position
                var vead = new Vead(readName, veadMaker.FindVariantResults(neighbors, read));

                if (vead.SiteResults == null || !vead.SiteResults.Any())
                {
                    continue;
                }

                // Add vead to a veadgroup.
                var hash = vead.ToVariantSequence();
                if (!veadGroups.ContainsKey(hash))
                {
                    veadGroups.Add(hash, new VeadGroup(vead));
                }
                else
                {
                    veadGroups[hash].AddSupport(vead);
                }
            }

            LogVeadGroupInfo(veadGroups.Values);

            return(veadGroups.Values);
        }
예제 #26
0
        public void PastNeighborhoodTest()
        {
            var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters()
            {
                MinimumMapQuality = 20
            });

            // Scenario 1: neighborhood with 2 SNVs
            var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "G", VcfAlternateAllele = "A"
                    },
                },
            };

            var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters());

            var read1 = TestHelper.CreateRead("chr1", "ACGT", 6);  // ends before neighborhood starts

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read1, callableNeighbor1));
            var read2 = TestHelper.CreateRead("chr1", "ACGT", 8);  // read partially covers neighborhood from left

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read2, callableNeighbor1));
            var read3 = TestHelper.CreateRead("chr1", "ACGT", 11); // read enclosed in neighborhood

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read3, callableNeighbor1));
            var read4 = TestHelper.CreateRead("chr1", "ACGT", 14);  // read partially sticks out of neighborhood from right

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read4, callableNeighbor1));
            var read5 = TestHelper.CreateRead("chr1", "ACGT", 15);  // read starts on last variant site

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read5, callableNeighbor1));
            var read6 = TestHelper.CreateRead("chr1", "ACGT", 16);  // read starts right after neighborhood

            // Nima: Minimum lookahead is pos+1, so this is still not considered past neighborhood (maybe we should -1 but it doesn't really make a huge diff?)
            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read6, callableNeighbor1));
            var read7 = TestHelper.CreateRead("chr1", "ACGT", 17);  // read starts after neighborhood lookahead

            Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read7, callableNeighbor1));


            // Scenario 2: neighborhood with one SNV, and one insertion (should extend lookahead)
            var neighbor2 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "G", VcfAlternateAllele = "GAAA"
                    },
                },
            };

            var callableNeighbor2 = new CallableNeighborhood(neighbor2, new VariantCallingParameters(), null);

            var read8 = TestHelper.CreateRead("chr1", "ACGT", 15);  // read starts at the last variant site

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read8, callableNeighbor2));
            var read9 = TestHelper.CreateRead("chr1", "ACGT", 16);  // read starts after last variant position, but before lookahead

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read9, callableNeighbor2));
            var read10 = TestHelper.CreateRead("chr1", "ACGT", 17); // read starts after last variant position, but before lookahead

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read10, callableNeighbor2));
            var read11 = TestHelper.CreateRead("chr1", "ACGT", 18); // read starts after last variant position, but before lookahead

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read11, callableNeighbor2));
            var read12 = TestHelper.CreateRead("chr1", "ACGT", 19); // read starts after last variant position, but before lookahead

            // Nima: Minimum lookahead is pos+4, so this is still not considered past neighborhood (maybe we should -1 but it doesn't really make a huge diff?)
            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read12, callableNeighbor2));
            var read13 = TestHelper.CreateRead("chr1", "ACGT", 20); // read starts after lookahead

            Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read13, callableNeighbor2));


            // Scenario 3: neighborhood with one SNV, and one deletion (similar to Scenario 2)
            var neighbor3 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "GAAA", VcfAlternateAllele = "G"
                    },
                },
            };

            var callableNeighbor3 = new CallableNeighborhood(neighbor3, new VariantCallingParameters(), null);

            var read14 = TestHelper.CreateRead("chr1", "ACGT", 18); // read starts after last variant position, but before lookahead

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read14, callableNeighbor3));
            var read15 = TestHelper.CreateRead("chr1", "ACGT", 19); // read starts after last variant position, but before lookahead

            // Nima: Minimum lookahead is pos+4, so this is still not considered past neighborhood (maybe we should -1 but it doesn't really make a huge diff?)
            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read15, callableNeighbor3));
            var read16 = TestHelper.CreateRead("chr1", "ACGT", 20); // read starts after lookahead

            Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read16, callableNeighbor3));


            // Scenario 4: long indel variant in the beginning of neighborhood can extend lookahead
            var neighbor4 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "ATTTTTTT"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "G", VcfAlternateAllele = "A"
                    },
                },
            };

            var callableNeighbor4 = new CallableNeighborhood(neighbor4, new VariantCallingParameters(), null);


            var read17 = TestHelper.CreateRead("chr1", "ACGT", 16);  // read starts after last variant position, but before lookahead from first variant

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read17, callableNeighbor4));
            var read18 = TestHelper.CreateRead("chr1", "ACGT", 17);  // read starts after last variant position, but before lookahead from first variant

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read18, callableNeighbor4));
            var read19 = TestHelper.CreateRead("chr1", "ACGT", 18);  // read starts after last variant position, but before lookahead from first variant

            Assert.Equal(false, nbhdReadFilter.PastNeighborhood(read19, callableNeighbor4));
            var read20 = TestHelper.CreateRead("chr1", "ACGT", 20);  // read starts after lookahead of first variant

            Assert.Equal(true, nbhdReadFilter.PastNeighborhood(read20, callableNeighbor4));
        }