Пример #1
0
        public void VcfNeighborhood()
        {
            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(123), new VariantSite(124));

            Assert.Equal("NbhdNum0_chr1_123", nbhd.Id);
            Assert.Equal("chr1", nbhd.ReferenceName);
        }
Пример #2
0
        private void ProcessNewNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, string referenceStringBetweenVariants)
        {
            var newNeighborhood = new VcfNeighborhood(_variantCallingParams, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

            _neighborhoods.Add(newNeighborhood);
        }
Пример #3
0
        private void ExecuteGroupingTest(List <Read> reads, List <int> expectedGroupMemberships, IEnumerable <Tuple <int, string, string> > variants)
        {
            var variantSites = new List <VariantSite>();

            foreach (var variant in variants)
            {
                variantSites.Add(new VariantSite(variant.Item1)
                {
                    VcfReferenceAllele = variant.Item2, VcfAlternateAllele = variant.Item3
                });
            }

            var alignmentExtractor = new MockAlignmentExtractor(reads);

            var veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");
            var vcfNeighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T")
            {
                VcfVariantSites = variantSites
            };

            vcfNeighborhood.SetRangeOfInterest();

            var veadGroups = veadSource.GetVeadGroups(vcfNeighborhood).ToList();

            Assert.Equal(expectedGroupMemberships.Count, veadGroups.Count());
            for (var i = 0; i < veadGroups.Count(); i++)
            {
                Assert.Equal(expectedGroupMemberships[i], veadGroups[i].NumVeads);
            }
        }
Пример #4
0
        private void ProcessNeighborhood(VcfNeighborhood neighborhood, NeighborhoodClusterer clusterer, IEnumerable <VeadGroup> collapsedReads)
        {
            Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id);

            try
            {
                var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList());

                if (clusters != null)
                {
                    Logger.WriteToLog("Found " + clusters.Clusters.Length + " clusters in Nbhd " + neighborhood.Id);

                    //tjd+
                    //Commenting out for speed. We currently never use these results
                    //neighborhood.PhasingProbabiltiies =
                    //   VariantPhaser.GetPhasingProbabilities(neighborhood.VcfVariantSites, clusters);
                    //tjd-
                }

                bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci;

                neighborhood.AddMnvsFromClusters(clusters.Clusters,
                                                 _factory.Options.BamFilterParams.MinimumBaseCallQuality, _factory.Options.VariantCallingParams.MaximumVariantQScore,
                                                 crushNbhdVariantsToSamePositon);
                neighborhood.SetGenotypesAndPruneExcessAlleles();
            }
            catch (Exception ex)
            {
                Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id);
                Logger.WriteExceptionToLog(ex);
            }
        }
Пример #5
0
        public void VcfNeighborhood()
        {
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(123), new VariantSite(124), "A");

            Assert.Equal("NbhdNum0_chr1_123", nbhd.Id);
            Assert.Equal("chr1", nbhd.ReferenceName);
        }
Пример #6
0
        private bool ShouldSkipRead(Read read, VcfNeighborhood neighborhood)
        {
            if (_options.RemoveDuplicates)
            {
                if (read.IsPcrDuplicate)
                {
                    return(true);
                }
            }

            if (_options.OnlyUseProperPairs)
            {
                if (!read.IsProperPair)
                {
                    return(true);
                }
            }

            if (read.MapQuality < _options.MinimumMapQuality)
            {
                return(true);
            }
            if (read.EndPosition < neighborhood.VcfVariantSites.First().VcfReferencePosition)
            {
                return(true);
            }

            return(false);
        }
Пример #7
0
        public void GetOriginalVcfIndexes()
        {
            var originalVar1 = new Pisces.Domain.Models.Alleles.CalledAllele()
            {
                ReferencePosition = 1
            };
            var originalVar10 = new Pisces.Domain.Models.Alleles.CalledAllele()
            {
                ReferencePosition = 10
            };

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(123)
            {
                OriginalAlleleFromVcf = originalVar1
            }
                                           , new VariantSite(123)
            {
                OriginalAlleleFromVcf = originalVar10
            }, "T");

            var originalVcfIndexes = nbhd.GetOriginalVcfVariants();

            Assert.Equal(2, originalVcfIndexes.Count);
            Assert.Equal(1, originalVcfIndexes[0].ReferencePosition);
            Assert.Equal(10, originalVcfIndexes[1].ReferencePosition);
        }
Пример #8
0
        public void GetOriginalVcfIndexes()
        {
            var originalVar1 = new CalledAllele()
            {
                ReferencePosition = 1
            };
            var originalVar10 = new CalledAllele()
            {
                ReferencePosition = 10
            };

            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(123)
            {
                OriginalAlleleFromVcf = originalVar1
            }
                                           , new VariantSite(123)
            {
                OriginalAlleleFromVcf = originalVar10
            });

            var originalVcfIndexes = new CallableNeighborhood(nbhd, new VariantCallingParameters()).GetOriginalVcfVariants();

            Assert.Equal(2, originalVcfIndexes.Count);
            Assert.Equal(1, originalVcfIndexes[0].ReferencePosition);
            Assert.Equal(10, originalVcfIndexes[1].ReferencePosition);
        }
Пример #9
0
        public void CallThroughAnEmptyNbhd()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var caller = new VariantCaller(new VariantCallingParameters(), new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call. (we will call it 0/., since we know its not a homozygous ref)

            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(0, acceptedMNVs.Count);
            Assert.Equal(2, acceptedRefs.Count);

            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[123].Genotype);
            Assert.Equal(Genotype.RefAndNoCall, acceptedRefs[124].Genotype);
            Assert.Equal(123, acceptedRefs[123].ReferencePosition);
            Assert.Equal(124, acceptedRefs[124].ReferencePosition);
        }
Пример #10
0
        public void SupplementSupportWithClippedReads()
        {
            // In this test we create reads that are either normal or clipped (identified by "clip_" in their name)
            // This test does not take cigar data into account.

            var mockClippedReadComparator = new Mock <IMNVClippedReadComparator>();

            // Mock read comparator returns true if read name starts with c
            mockClippedReadComparator.Setup(x => x.DoesClippedReadSupportMNV(It.IsAny <Read>(), It.IsAny <CalledAllele>()))
            .Returns((Read read, CalledAllele allele) => read.Name[0] == 'c' ? true : false);

            var reads = new List <Read>();

            reads.Add(CreateRead("chr1", "ACGT", 3, "read4"));
            reads.Add(CreateRead("chr1", "ACGT", 3, "clip_read4", matePosition: 3));  // +1 not in neighborhood, but still gets counted because mocked ClippedReadComparator
            reads.Add(CreateRead("chr1", "ACGT", 12, "read1", matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read2", matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read1", read2: true, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read_notmapped", isMapped: false, isProperPair: false, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read3", isProperPair: false, read2: true, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "read2", read2: true, matePosition: 10));
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", matePosition: 10));                                                // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", matePosition: 10));                                                // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read1", read2: true, matePosition: 10));                                   // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read_notmapped", isMapped: false, isProperPair: false, matePosition: 10)); // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read3", isProperPair: false, read2: true, matePosition: 10));              // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 12, "clip_read2", read2: true, matePosition: 10));                                   // +1 clipped read
            reads.Add(CreateRead("chr1", "ACGT", 30, "read5"));
            reads.Add(CreateRead("chr1", "ACGT", 30, "clip_read5", matePosition: 30));                                                // not in neighborhood, not counted
            var mockAlignmentExtractor = new MockAlignmentExtractor(reads);
            int qNoiseLevel            = 20;
            int maxQscore  = 100;
            int minMNVsize = 6;
            MNVSoftClipSupportFinder mnvClippedSupportFinder = new MNVSoftClipSupportFinder(mockAlignmentExtractor, mockClippedReadComparator.Object, qNoiseLevel, maxQscore, minMNVsize);

            var mnv1      = TestHelper.CreateDummyAllele("chr1", 10, "AAAAAA", "CCC", 2000, 50);
            var neighbor1 = new VcfNeighborhood(0, "chr", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C", ReferenceName = "chr"
                    },
                    new VariantSite(25)
                    {
                        VcfReferenceAllele = "T", VcfAlternateAllele = "G", ReferenceName = "chr"
                    },
                },
            };

            var callableNbhd = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null);

            callableNbhd.AddAcceptedPhasedVariant(mnv1);
            Assert.Equal(50, callableNbhd.CandidateVariants[0].AlleleSupport);
            mnvClippedSupportFinder.SupplementSupportWithClippedReads(callableNbhd);
            Assert.Equal(57, callableNbhd.CandidateVariants[0].AlleleSupport);
        }
Пример #11
0
        private void MakeAHangingNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite, int numNbhdsSoFar)
        {
            //buffer this for our next call to "GetBatchOfNeighborhoods" .

            var newNeighborhood = new VcfNeighborhood(numNbhdsSoFar + _maxNumNbhdsInBatch, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite);

            _unfinshedNeighborhoods.Add(newNeighborhood);
        }
Пример #12
0
        private void AddNewNeighborhoodToBatch(VariantSite lastVariantSite, VariantSite currentVariantSite, int numNbhdsSoFar)
        {
            int numNbhdInBatchSoFar = _nextBatchOfVcfNeighborhoods.Count;

            var newNeighborhood = new VcfNeighborhood(numNbhdsSoFar + numNbhdInBatchSoFar, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite);

            _nextBatchOfVcfNeighborhoods.Add(newNeighborhood);
        }
Пример #13
0
        private void AddNewNeighborhoodToBatch(VariantSite lastVariantSite, VariantSite currentVariantSite,
                                               string referenceStringBetweenVariants, int numNbhdsSoFar)
        {
            int numNbhdInBatchSoFar = _nextBatchOfNeighborhoods.Count;

            var newNeighborhood = new VcfNeighborhood(_variantCallingParams, numNbhdsSoFar + numNbhdInBatchSoFar, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

            _nextBatchOfNeighborhoods.Add(newNeighborhood);
        }
Пример #14
0
        private void MakeAHangingNeighborhood(VariantSite lastVariantSite, VariantSite currentVariantSite,
                                              string referenceStringBetweenVariants, int numNbhdsSoFar)
        {
            //buffer this for our next call to "GetBatchOfNeighborhoods" .

            var newNeighborhood = new VcfNeighborhood(_variantCallingParams, numNbhdsSoFar + _maxNumNbhdsInBatch, currentVariantSite.ReferenceName,
                                                      lastVariantSite, currentVariantSite, referenceStringBetweenVariants);

            _unfinshedNeighborhoods.Add(newNeighborhood);
        }
Пример #15
0
        public void CheckAddingFilters()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var variantCallingParameters = new VariantCallingParameters();

            //Set up filters so calls are sure to trigger them.
            variantCallingParameters.LowDepthFilter             = 2000;
            variantCallingParameters.MinimumFrequencyFilter     = 0.80F;
            variantCallingParameters.MinimumVariantQScoreFilter = 300;


            var caller = new VariantCaller(variantCallingParameters, new BamFilterParameters());


            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantFrequency));
            Assert.True(acceptedMNVs[123][0].Filters.Contains(FilterType.LowVariantQscore));

            Assert.Equal(2, acceptedRefs.Count);

            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowDepth));
            Assert.True(acceptedRefs[123].Filters.Contains(FilterType.LowVariantQscore));
            //note reference calls dont win the "LowVariantFrequency" flag.
        }
Пример #16
0
        public void AddMnvsFromClusters()
        {
            //TODO even with mock cluster this takes too much setting up.
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(120), new VariantSite(121), "T");

            var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var veads = new List <Vead> {
                vead, vead2, vead3
            };

            nbhd.ReferenceSequence = "CGT";

            var mockCluster = new Mock <ICluster>();

            mockCluster.Setup(c => c.CountsAtSites).Returns(new[] { 10, 3, 5 });
            var consensus = PhasedVariantTestUtilities.CreateVeadGroup(veads);

            mockCluster.Setup(c => c.GetConsensusSites()).Returns(consensus.SiteResults);
            mockCluster.Setup(c => c.GetVeadGroups()).Returns(new List <VeadGroup>()
            {
                consensus
            });
            nbhd.AddMnvsFromClusters(new List <ICluster>()
            {
                mockCluster.Object
            }, 20, 100);

            var allele = nbhd.CandidateVariants.First();

            Assert.Equal(6, allele.TotalCoverage);
            Assert.Equal(6, allele.AlleleSupport);
            Assert.Equal("CGT", allele.Reference);
            Assert.Equal("AAA", allele.Alternate);

            var depths = nbhd.DepthAtSites(new List <ICluster>()
            {
                mockCluster.Object
            });

            Assert.Equal(3, depths.Length);
            Assert.Equal(3, depths[0]);
            Assert.Equal(3, depths[1]);
            Assert.Equal(3, depths[2]);
        }
Пример #17
0
        public void LastPositionIsNotMatch()
        {
            var nbhd        = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T");
            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite, "ATCG");

            var vsPositionMatch = new VariantSite(123);

            Assert.False(nbhd.LastPositionIsNotMatch(vsPositionMatch));

            var vsPositionMismatch = new VariantSite(124);

            Assert.True(nbhd.LastPositionIsNotMatch(vsPositionMismatch));
        }
Пример #18
0
        public void AddVariantSite()
        {
            var nbhd = new VcfNeighborhood(0, "chr1", new VariantSite(120)
            {
                VcfReferenceAllele = "A"
            }, new VariantSite(121));

            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);

            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite);
            Assert.Equal(3, nbhd.VcfVariantSites.Count);
            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);
        }
Пример #19
0
        public void LastPositionIsNotMatch()
        {
            var nbhd        = new VcfNeighborhood(0, "chr1", new VariantSite(120), new VariantSite(121));
            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite);

            var vsPositionMatch = new VariantSite(123);

            Assert.False(nbhd.LastPositionIsNotMatch(vsPositionMatch));

            var vsPositionMismatch = new VariantSite(124);

            Assert.True(nbhd.LastPositionIsNotMatch(vsPositionMismatch));
        }
Пример #20
0
        public void AddVariantSite()
        {
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120)
            {
                VcfReferenceAllele = "A"
            }, new VariantSite(121), "T");

            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);

            var variantSite = new VariantSite(123);

            nbhd.AddVariantSite(variantSite, "ATCG");
            Assert.Equal("ATATCG", nbhd.ReferenceSequence);
            Assert.Equal(3, nbhd.VcfVariantSites.Count);
            Assert.Equal("NbhdNum0_chr1_120", nbhd.Id);
        }
Пример #21
0
        public void SetDepthAtSites()
        {
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T");

            var vead = PhasedVariantTestUtilities.CreateVeadFromStringArray("r1", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead2 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r2", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var vead3 = PhasedVariantTestUtilities.CreateVeadFromStringArray("r3", new[, ] {
                { "C", "A" }, { "G", "A" }, { "T", "A" }
            });
            var veads = new List <Vead> {
                vead, vead2, vead3
            };
        }
Пример #22
0
        public void SetRangeOfInterestTests()
        {
            /// <summary>
            /// This method sets the NbdhReferenceSequenceSubstring, and the first/last positions of interest
            /// </summary>

            var refName = "chr";

            //test with no Genome given
            var nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121));

            Assert.Equal(-1, nbhd.FirstPositionOfInterest);
            Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf);
            Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead);

            var readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters());

            Assert.Equal("RR", readyNbhd.NbhdReferenceSequenceSubstring);
            Assert.Equal(120, nbhd.FirstPositionOfInterest);
            Assert.Equal(121, nbhd.LastPositionOfInterestInVcf);
            Assert.Equal(122, nbhd.LastPositionOfInterestWithLookAhead);

            //test with a genome given
            var    genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta");
            Genome genome     = new Genome(genomePath, new List <string>()
            {
                refName
            });
            ChrReference chrReference = genome.GetChrReference(refName);

            nbhd = new VcfNeighborhood(0, refName, new VariantSite(120), new VariantSite(121));

            Assert.Equal(-1, nbhd.FirstPositionOfInterest);
            Assert.Equal(-1, nbhd.LastPositionOfInterestInVcf);
            Assert.Equal(-1, nbhd.LastPositionOfInterestWithLookAhead);

            readyNbhd = new CallableNeighborhood(nbhd, new VariantCallingParameters(), chrReference);

            Assert.Equal("TG", readyNbhd.NbhdReferenceSequenceSubstring);
            Assert.Equal(120, readyNbhd.FirstPositionOfInterest);
            Assert.Equal(121, readyNbhd.LastPositionOfInterestInVcf);
            Assert.Equal(122, readyNbhd.LastPositionOfInterestWithLookAhead);
        }
Пример #23
0
        public void ShouldSkipReadTest()
        {
            var nbhdReadFilter = new NeighborhoodReadFilter(new BamFilterParameters()
            {
                MinimumMapQuality = 20
            });

            var neighbor1 = new VcfNeighborhood(0, "chr1", new VariantSite(10000), new VariantSite(200000))
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(10)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(15)
                    {
                        VcfReferenceAllele = "G", VcfAlternateAllele = "A"
                    },
                },
            };

            neighbor1.SetRangeOfInterest();
            var callableNeighbor1 = new CallableNeighborhood(neighbor1, new VariantCallingParameters(), null);


            var read1 = TestHelper.CreateRead("chr1", "ACGT", 6);      // Read ends before first variant

            Assert.Equal(true, nbhdReadFilter.ShouldSkipRead(read1, callableNeighbor1));
            var read2 = TestHelper.CreateRead("chr1", "ACGT", 7);      // Read covers 1 base of the nbhd

            Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read2, callableNeighbor1));
            var read3 = TestHelper.CreateRead("chr1", "ACGT", 12);      // Read partially covers neighborhood

            Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read3, callableNeighbor1));
            var read4 = TestHelper.CreateRead("chr1", "ACGT", 16);      // Read starts after neighborhood

            Assert.Equal(false, nbhdReadFilter.ShouldSkipRead(read4, callableNeighbor1));

            // Nima: we can maybe add features to CreateRead to be able to create PCR duplicate, low mapQ, and non proper pair reads
            //       but i think these conditions are somewhat trivial, and this may not be necessary.
        }
Пример #24
0
        private void ProcessNeighborhood(VcfNeighborhood neighborhood)
        {
            Logger.WriteToLog("Processing Neighborhood {0}.", neighborhood.Id);

            try
            {
                var clusterer       = _factory.CreateNeighborhoodClusterer();
                var veadGroupSource = _factory.CreateVeadGroupSource();
                var collapsedReads  = veadGroupSource.GetVeadGroups(neighborhood);

                //(1) Get CLUSTERS
                var clusters = clusterer.ClusterVeadGroups(collapsedReads.ToList(), neighborhood.Id);


                //clean out vg, we dont need them any more
                veadGroupSource = null;
                collapsedReads  = null;

                bool crushNbhdVariantsToSamePositon = !_factory.Options.VcfWritingParams.AllowMultipleVcfLinesPerLoci;

                //(2) Turn clusters into MNV candidates
                neighborhood.CreateMnvsFromClusters(clusters.Clusters,
                                                    _factory.Options.BamFilterParams.MinimumBaseCallQuality, _factory.Options.VariantCallingParams.MaximumVariantQScore,
                                                    crushNbhdVariantsToSamePositon);
                neighborhood.SetGenotypesAndPruneExcessAlleles();

                // (3) Variant call the candidates
                var variantCaller = _factory.CreateVariantCaller();
                variantCaller.CallMNVs(neighborhood);
                variantCaller.CallRefs(neighborhood);

                //wait untill vcf is ready to write...
            }
            catch (Exception ex)
            {
                Logger.WriteToLog("Error processing neighborhood {0}", neighborhood.Id);
                Logger.WriteExceptionToLog(ex);
            }
        }
Пример #25
0
        private List <VcfNeighborhood> GetNeighborhoods(int expectedNumberOfThreads)
        {
            var neighborhoods = new List <VcfNeighborhood>();

            for (var i = 0; i < expectedNumberOfThreads; i++)
            {
                var neighborhood = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120), new VariantSite(121), "T")
                {
                    VcfVariantSites = new List <VariantSite>
                    {
                        new VariantSite(123)
                        {
                            ReferenceName         = "chr1",
                            OriginalAlleleFromVcf = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156)
                                                    //orignally at index 0
                        },
                    }
                };

                neighborhoods.Add(neighborhood);
            }
            return(neighborhoods);
        }
Пример #26
0
        public void VarCallsBecomeRefsAndNulls()
        {
            var originalVcfVariant  = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var vs1 = new VariantSite(originalVcfVariant);
            var vs2 = new VariantSite(originalVcfVariant2);

            var vcParams = new VariantCallingParameters();

            vcParams.Validate();
            var caller = new VariantCaller(vcParams, new BamFilterParameters());

            //since there is an alt at position 124 ( a call of 156 alt / 1000 total, that means 844 original ref calls.
            //Of which we said, 100 will get sucked up. So that leaves 744 / 1000 calls for a reference.
            //So, we can still make a confident ref call.

            var nbhd = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, "");

            nbhd.SetRangeOfInterest();
            nbhd.AddAcceptedPhasedVariant(
                new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                AlternateAllele   = "T",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            });
            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);


            var vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }, { "DP", "1000" }, { "AD", "844" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);

            // If one has been sucked up and there are refs remaining, we should output it as a ref.
            var suckedUpRefRecord100 = new SuckedUpRefRecord()
            {
                Counts = 100, AlleleThatClaimedIt = new CalledAllele()
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord100 }
            };


            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            vcfVariant2asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }, { "DP", "1000" }, { "AD", "744" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asRef, acceptedRefs[124]);


            // If one has been sucked up all the way
            // we should output it as a null.
            var suckedUpRefRecord1000 = new SuckedUpRefRecord()
            {
                Counts = 1000, AlleleThatClaimedIt = new CalledAllele()
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord1000 }
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            acceptedMNVs = nbhd.CalledVariants;
            acceptedRefs = nbhd.CalledRefs;

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[123].Count);

            Assert.Equal(2, acceptedRefs.Count);

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }, { "DP", "1000" }, { "AD", "0" }
                    }
                },
            };

            VcfMergerTests.CheckVariantsMatch(originalVcfVariant, acceptedMNVs[123][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
        }
Пример #27
0
        public void SortSites()
        {
            var nbhd = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr1", new VariantSite(120)
            {
                VcfReferenceAllele = "A"
            }, new VariantSite(121), "T");
            var variantSite1 = new VariantSite(123);

            variantSite1.VcfReferencePosition  = 140453137;
            variantSite1.VcfReferenceAllele    = "C";
            variantSite1.VcfAlternateAllele    = "CGTA";
            variantSite1.OriginalAlleleFromVcf = new Pisces.Domain.Models.Alleles.CalledAllele()
            {
                ReferencePosition = 7
            };
            nbhd.AddVariantSite(variantSite1, "ATCG");

            var variantSite2 = new VariantSite();

            variantSite2.VcfReferencePosition  = 140453137;
            variantSite2.VcfReferenceAllele    = "C";
            variantSite2.VcfAlternateAllele    = "T";
            variantSite2.OriginalAlleleFromVcf = new Pisces.Domain.Models.Alleles.CalledAllele()
            {
                ReferencePosition = 8
            };
            nbhd.AddVariantSite(variantSite2, "");

            var variantSite3 = new VariantSite();

            variantSite3.VcfReferencePosition  = 140453130;
            variantSite3.VcfReferenceAllele    = "C";
            variantSite3.VcfAlternateAllele    = "T";
            variantSite3.OriginalAlleleFromVcf = new Pisces.Domain.Models.Alleles.CalledAllele()
            {
                ReferencePosition = 9
            };
            nbhd.AddVariantSite(variantSite3, "");

            Assert.Equal(5, nbhd.VcfVariantSites.Count);

            Assert.Equal(120, nbhd.VcfVariantSites[0].VcfReferencePosition);
            Assert.Equal(120, nbhd.VcfVariantSites[0].TrueFirstBaseOfDiff);
            Assert.Equal("A", nbhd.VcfVariantSites[0].VcfReferenceAllele);
            Assert.Equal("N", nbhd.VcfVariantSites[0].VcfAlternateAllele);

            Assert.Equal(121, nbhd.VcfVariantSites[1].VcfReferencePosition);
            Assert.Equal(121, nbhd.VcfVariantSites[1].TrueFirstBaseOfDiff);
            Assert.Equal("N", nbhd.VcfVariantSites[1].VcfReferenceAllele);
            Assert.Equal("N", nbhd.VcfVariantSites[1].VcfAlternateAllele);

            Assert.Equal(140453137, nbhd.VcfVariantSites[2].VcfReferencePosition);
            Assert.Equal(140453138, nbhd.VcfVariantSites[2].TrueFirstBaseOfDiff);
            Assert.Equal("C", nbhd.VcfVariantSites[2].VcfReferenceAllele);
            Assert.Equal("CGTA", nbhd.VcfVariantSites[2].VcfAlternateAllele);

            Assert.Equal(140453137, nbhd.VcfVariantSites[3].VcfReferencePosition);
            Assert.Equal(140453137, nbhd.VcfVariantSites[3].TrueFirstBaseOfDiff);
            Assert.Equal("C", nbhd.VcfVariantSites[3].VcfReferenceAllele);
            Assert.Equal("T", nbhd.VcfVariantSites[3].VcfAlternateAllele);

            Assert.Equal(140453130, nbhd.VcfVariantSites[4].VcfReferencePosition);
            Assert.Equal(140453130, nbhd.VcfVariantSites[4].TrueFirstBaseOfDiff);
            Assert.Equal("C", nbhd.VcfVariantSites[4].VcfReferenceAllele);
            Assert.Equal("T", nbhd.VcfVariantSites[4].VcfAlternateAllele);

            nbhd.OrderVariantSitesByFirstTrueStartPosition();

            Assert.Equal(120, nbhd.VcfVariantSites[0].VcfReferencePosition);
            Assert.Equal(120, nbhd.VcfVariantSites[0].TrueFirstBaseOfDiff);
            Assert.Equal("A", nbhd.VcfVariantSites[0].VcfReferenceAllele);
            Assert.Equal("N", nbhd.VcfVariantSites[0].VcfAlternateAllele);

            Assert.Equal(121, nbhd.VcfVariantSites[1].VcfReferencePosition);
            Assert.Equal(121, nbhd.VcfVariantSites[1].TrueFirstBaseOfDiff);
            Assert.Equal("N", nbhd.VcfVariantSites[1].VcfReferenceAllele);
            Assert.Equal("N", nbhd.VcfVariantSites[1].VcfAlternateAllele);

            Assert.Equal(140453130, nbhd.VcfVariantSites[2].VcfReferencePosition);
            Assert.Equal(140453130, nbhd.VcfVariantSites[2].TrueFirstBaseOfDiff);
            Assert.Equal("C", nbhd.VcfVariantSites[2].VcfReferenceAllele);
            Assert.Equal("T", nbhd.VcfVariantSites[2].VcfAlternateAllele);
            Assert.Equal(7, nbhd.VcfVariantSites[2].OriginalAlleleFromVcf.ReferencePosition);

            Assert.Equal(140453137, nbhd.VcfVariantSites[3].VcfReferencePosition);
            Assert.Equal(140453137, nbhd.VcfVariantSites[3].TrueFirstBaseOfDiff);
            Assert.Equal("C", nbhd.VcfVariantSites[3].VcfReferenceAllele);
            Assert.Equal("T", nbhd.VcfVariantSites[3].VcfAlternateAllele);
            Assert.Equal(8, nbhd.VcfVariantSites[3].OriginalAlleleFromVcf.ReferencePosition);

            Assert.Equal(140453137, nbhd.VcfVariantSites[4].VcfReferencePosition);
            Assert.Equal(140453138, nbhd.VcfVariantSites[4].TrueFirstBaseOfDiff);
            Assert.Equal("C", nbhd.VcfVariantSites[4].VcfReferenceAllele);
            Assert.Equal("CGTA", nbhd.VcfVariantSites[4].VcfAlternateAllele);
            Assert.Equal(9, nbhd.VcfVariantSites[4].OriginalAlleleFromVcf.ReferencePosition);
        }
Пример #28
0
        public void GetVeads()
        {
            var vcfNeighborhood = new VcfNeighborhood(new VariantCallingParameters(), "chr1", new VariantSite(10000), new VariantSite(200000), "T")
            {
                VcfVariantSites = new List <VariantSite>
                {
                    new VariantSite(100)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(400)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(505)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                    new VariantSite(703)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "T"
                    },
                    new VariantSite(800)
                    {
                        VcfReferenceAllele = "A", VcfAlternateAllele = "C"
                    },
                }
            };

            var reads = new List <Read>();

            reads.Add(CreateRead("chr1", "ACGT", 10));                     // Before neighborhood
            reads.Add(CreateRead("chr1", "ACGT", 96));                     // Ends right before neighborhood's first variant site
            reads.Add(CreateRead("chr1", "ACGT", 100));                    // Match (100)
            reads.Add(CreateRead("chr1", "ACGT", 300));                    // Within neighborhood but no VariantSite
            reads.Add(CreateRead("chr1", "ACGT", 400, qualityForAll: 19)); // Within neighbhorhood but low quals
            reads.Add(CreateRead("chr1", "ACGT", 500));                    // Within neighborhood but no VariantSite (ends right before 505)
            reads.Add(CreateRead("chr1", "ACGT", 700));                    // Match (703)
            reads.Add(CreateRead("chr1", "ACGT", 800));                    // Match (800)
            reads.Add(CreateRead("chr1", "ACGT", 805));                    // Past neighborhood
            reads.Add(CreateRead("chr1", "ACGT", 900));                    // Past neighborhood
            reads.Add(CreateRead("chr2", "ACGT", 100));                    // Wrong chromosome


            vcfNeighborhood.SetRangeOfInterest();

            var alignmentExtractor = new MockAlignmentExtractor(reads);


            var veadSource = new VeadGroupSource(alignmentExtractor,
                                                 new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");

            var veadGroups = veadSource.GetVeadGroups(vcfNeighborhood);

            // Collect all reads that could relate to the neighborhood
            // - Skip anything that has quality less than MinimumMapQuality
            // - Skip anything that ends before neighborhood begins
            // - Stop collecting once we've passed the end of the neighborhood

            // We should have collected the reads at 100, 700, and 800.
            Assert.Equal(801, vcfNeighborhood.LastPositionOfInterestWithLookAhead);
            Assert.Equal(3, veadGroups.Count());
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900")));
            foreach (var veadGroup in veadGroups)
            {
                Assert.Equal(1, veadGroup.NumVeads);
            }

            vcfNeighborhood.VcfVariantSites.Add(
                new VariantSite(790)
            {
                VcfReferenceAllele = "ACAGTGAAAGACTTGTGAC", VcfAlternateAllele = "C"
            });

            vcfNeighborhood.SetRangeOfInterest();
            Assert.Equal(809, vcfNeighborhood.LastPositionOfInterestWithLookAhead);

            alignmentExtractor = new MockAlignmentExtractor(reads);


            veadSource = new VeadGroupSource(alignmentExtractor,
                                             new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");

            veadGroups = veadSource.GetVeadGroups(vcfNeighborhood);

            Assert.Equal(3, veadGroups.Count());
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("100")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("700")));
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("800")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("805")));
            Assert.Equal(0, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("900")));

            // Boundary case - read ends exactly at neighborhood's first variant site

            reads = new List <Read>();
            reads.Add(CreateRead("chr1", "ACGT", 10)); // Before neighborhood
            reads.Add(CreateRead("chr1", "ACGT", 96)); // Ends right before neighborhood's first variant site
            reads.Add(CreateRead("chr1", "ACGT", 97)); // Ends exactly at neighborhood's first variant site

            alignmentExtractor = new MockAlignmentExtractor(reads);

            veadSource = new VeadGroupSource(alignmentExtractor, new BamFilterParameters()
            {
                MinimumMapQuality = 20
            }, false, "");

            veadGroups = veadSource.GetVeadGroups(vcfNeighborhood);

            // The veadgroup for 97 should be the only one
            Assert.Equal(1, veadGroups.Count());
            Assert.Equal(1, veadGroups.Count(v => v.RepresentativeVead.Name.EndsWith("97")));
            foreach (var veadGroup in veadGroups)
            {
                Assert.Equal(1, veadGroup.NumVeads);
            }
        }
Пример #29
0
        public void CallAVariantInANewLocation()
        {
            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 123, "A", "T", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 124, "A", "T", 1000, 156);
            var originalVcfVariant3 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr1", 234, "A", "T", 1000, 156);

            var vs1 = new VariantSite(originalVcfVariant1);
            var vs2 = new VariantSite(originalVcfVariant2);
            var vs3 = new VariantSite(originalVcfVariant3);
            var vs4 = new VariantSite(originalVcfVariant4);

            var vcParams = new VariantCallingParameters();

            vcParams.Validate();
            var caller = new VariantCaller(vcParams, new BamFilterParameters());
            var nbhd   = new VcfNeighborhood(vcParams, 0, "chr1", vs1, vs2, "");

            nbhd.AddVariantSite(vs3, "RRRRR"); //note, we do not add vs4, that is not going to get used for phasing. Sps it is a variant that failed filters.
            nbhd.SetRangeOfInterest();

            //now stage one candidate MNV:
            var newMNV = new CalledAllele(AlleleCategory.Snv)
            {
                Chromosome        = "chr1",
                ReferencePosition = 129,
                ReferenceAllele   = "A",
                AlternateAllele   = "TT",
                VariantQscore     = 100,
                TotalCoverage     = 1000,
                AlleleSupport     = 500
            };


            nbhd.AddAcceptedPhasedVariant(newMNV);
            var suckedUpRefRecord1000 = new SuckedUpRefRecord()
            {
                Counts = 1000, AlleleThatClaimedIt = new CalledAllele()
            };

            nbhd.UsedRefCountsLookup = new Dictionary <int, SuckedUpRefRecord>()
            {
                { 124, suckedUpRefRecord1000 }
            };

            caller.CallMNVs(nbhd);
            caller.CallRefs(nbhd);

            var acceptedMNVs = nbhd.CalledVariants;
            var acceptedRefs = nbhd.CalledRefs;


            var vcfVariant0asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 123,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }
                    }
                },
            };

            var vcfVariant3asRef = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 234,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "0/." }
                    }
                },
            };

            var vcfVariant2asNull = new VcfVariant()
            {
                ReferenceName     = "chr1",
                ReferencePosition = 124,
                ReferenceAllele   = "A",
                VariantAlleles    = new[] { "." },
                Genotypes         = new List <Dictionary <string, string> >()
                {
                    new Dictionary <string, string>()
                    {
                        { "GT", "./." }
                    }
                },
            };

            Assert.Equal(1, acceptedMNVs.Count);
            Assert.Equal(1, acceptedMNVs[129].Count);

            Assert.Equal(3, acceptedRefs.Count);

            VcfMergerTests.CheckVariantsMatch(vcfVariant0asRef, acceptedRefs[123]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant2asNull, acceptedRefs[124]);
            VcfMergerTests.CheckVariantsMatch(newMNV, acceptedMNVs[129][0]);
            VcfMergerTests.CheckVariantsMatch(vcfVariant3asRef, acceptedRefs[234]);
        }
Пример #30
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new VcfReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr2", vs1, vs2, "");

            nbhd1.SetRangeOfInterest();

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr7", vs4, vs5, "");

            nbhd2.SetRangeOfInterest();


            VcfMerger           merger         = new VcfMerger(reader);
            List <CalledAllele> allelesPastNbh = new List <CalledAllele>();

            nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };


            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh);

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh);

            merger.WriteRemainingVariants(writer, allelesPastNbh);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }