Ejemplo n.º 1
0
        public ChrRealigner(ChrReference chrReference, IAlignmentExtractor extractorForCandidates,
                            IAlignmentExtractor extractorForRealign, IIndelCandidateFinder indelFinder, IIndelRanker indelRanker,
                            ITargetCaller caller, RealignStateManager stateManager, IRealignmentWriter writer,
                            List <CandidateAllele> knownIndels   = null, int maxIndelSize         = 25, bool tryThree                   = false,
                            int anchorSizeThreshold              = 10, bool skipDuplicates        = false, bool skipAndRemoveDuplicates = false, bool remaskSoftclips = true, bool maskPartialInsertion = false, int minimumUnanchoredInsertionLength = 0,
                            bool tryRealignCleanSoftclippedReads = true, bool allowRescoringOrig0 = true, int maxRealignShift           = 250, AlignmentScorer alignmentScorer = null, bool debug       = false)
        {
            _chrReference           = chrReference;
            _extractorForCandidates = extractorForCandidates;
            _extractorForRealign    = extractorForRealign;
            _indelFinder            = indelFinder;
            _indelRanker            = indelRanker;
            _caller                          = caller;
            _stateManager                    = stateManager;
            _writer                          = writer;
            _knownIndels                     = knownIndels == null ? null : knownIndels.Select(i => new CandidateIndel(i)).ToList();
            _maxIndelSize                    = maxIndelSize;
            _anchorSizeThreshold             = anchorSizeThreshold;
            _skipDuplicates                  = skipDuplicates;
            _skipAndRemoveDuplicates         = skipAndRemoveDuplicates;
            _allowRescoringOrig0             = allowRescoringOrig0;
            _maxRealignShift                 = maxRealignShift;
            _tryRealignCleanSoftclippedReads = tryRealignCleanSoftclippedReads;
            _alignmentScorer                 = alignmentScorer;
            _debug = debug;

            if (alignmentScorer != null)
            {
                _alignmentComparer = new ScoredAlignmentComparer(alignmentScorer);
            }
            else
            {
                _alignmentComparer = new BasicAlignmentComparer();
            }

            _readRealigner = new ReadRealigner(_alignmentComparer, tryThree, remaskSoftclips, maskPartialInsertion, minimumUnanchoredInsertionLength);
        }
Ejemplo n.º 2
0
        protected virtual IAlleleCaller CreateVariantCaller(ChrReference chrReference, ChrIntervalSet intervalSet)
        {
            var coverageCalculator = CreateCoverageCalculator();
            var genotypeCalculator = GenotypeCreator.CreateGenotypeCalculator(
                _options.PloidyModel, _options.FilteredVariantFrequency, _options.MinimumDepth, _options.DiploidThresholdingParameters, _options.MinimumGenotypeQScore, _options.MaximumGenotypeQScore);

            return(new AlleleCaller(new VariantCallerConfig
            {
                IncludeReferenceCalls = _options.OutputgVCFFiles,
                MinVariantQscore = _options.MinimumVariantQScore,
                MaxVariantQscore = _options.MaximumVariantQScore,
                MinGenotypeQscore = _options.MinimumGenotypeQScore,
                MaxGenotypeQscore = _options.MaximumGenotypeQScore,
                VariantQscoreFilterThreshold = _options.FilteredVariantQScore,
                MinCoverage = _options.MinimumDepth,
                MinFrequency = _options.MinimumFrequency,
                EstimatedBaseCallQuality = GetEstimatedBaseCallQuality(),
                StrandBiasModel = _options.StrandBiasModel,
                StrandBiasFilterThreshold = _options.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = _options.FilterOutVariantsPresentOnlyOneStrand,
                GenotypeCalculator = genotypeCalculator,
                VariantFreqFilter = _options.FilteredVariantFrequency,
                LowGTqFilter = _options.LowGenotypeQualityFilter,
                IndelRepeatFilter = _options.IndelRepeatFilter,
                LowDepthFilter = _options.LowDepthFilter,
                ChrReference = chrReference,
                RMxNFilterSettings = new RMxNFilterSettings
                {
                    RMxNFilterMaxLengthRepeat = _options.RMxNFilterMaxLengthRepeat,
                    RMxNFilterMinRepetitions = _options.RMxNFilterMinRepetitions,
                    RMxNFilterFrequencyLimit = _options.RMxNFilterFrequencyLimit
                },
                NoiseModel = _options.NoiseModel
            }, intervalSet,
                                    CreateVariantCollapser(chrReference.Name, coverageCalculator),
                                    coverageCalculator));
        }
Ejemplo n.º 3
0
 protected virtual IRegionMapper CreateRegionPadder(ChrReference chrReference, ChrIntervalSet intervalSet, bool includeReference)
 {
     // padder is only required if there are intervals and we are including reference calls
     return(intervalSet == null || !_options.VcfWritingParameters.OutputGvcfFile ? null : new RegionMapper(chrReference, intervalSet, _options.BamFilterParameters.MinimumBaseCallQuality));
 }
Ejemplo n.º 4
0
 /// <summary>
 /// Given a list of raw (non-genome-contextualized) indels to realign around, returns a list of hashable, contextualized indels.
 /// </summary>
 /// <param name="chrom"></param>
 /// <param name="indelsForChrom"></param>
 /// <param name="chrReference"></param>
 /// <returns></returns>
 public List <HashableIndel> GetFinalIndelsForChromosome(string chrom, List <PreIndel> indelsForChrom, ChrReference chrReference)
 {
     return(GetFinalIndelsForChromosome(indelsForChrom, chrReference, _debug));
 }
Ejemplo n.º 5
0
        protected virtual IAlleleCaller CreateVariantCaller(ChrReference chrReference, ChrIntervalSet intervalSet, IAlignmentSource alignmentSource, HashSet <Tuple <string, int, string, string> > forceGtAlleles = null)
        {
            var coverageCalculator = CreateCoverageCalculator(alignmentSource);
            var genotypeCalculator = GenotypeCreator.CreateGenotypeCalculator(
                _options.VariantCallingParameters.PloidyModel, _options.VariantCallingParameters.MinimumFrequencyFilter,
                _options.VariantCallingParameters.MinimumCoverage,
                _options.VariantCallingParameters.DiploidSNVThresholdingParameters,
                _options.VariantCallingParameters.DiploidINDELThresholdingParameters,
                _options.VariantCallingParameters.AdaptiveGenotypingParameters,
                _options.VariantCallingParameters.MinimumGenotypeQScore,
                _options.VariantCallingParameters.MaximumGenotypeQScore,
                _options.VariantCallingParameters.TargetLODFrequency,
                _options.VariantCallingParameters.MinimumFrequency,
                chrReference.Name, _options.VariantCallingParameters.IsMale);

            genotypeCalculator.SetMinFreqFilter(_options.VariantCallingParameters.MinimumFrequencyFilter);

            var locusProcessor = _options.VariantCallingParameters.PloidyModel == PloidyModel.DiploidByThresholding
                ? (ILocusProcessor) new DiploidLocusProcessor()
                : new SomaticLocusProcessor();

            var variantCallerConfig = new VariantCallerConfig
            {
                IncludeReferenceCalls        = _options.VcfWritingParameters.OutputGvcfFile,
                MinVariantQscore             = _options.VariantCallingParameters.MinimumVariantQScore,
                MaxVariantQscore             = _options.VariantCallingParameters.MaximumVariantQScore,
                MinGenotypeQscore            = _options.VariantCallingParameters.MinimumGenotypeQScore,
                MaxGenotypeQscore            = _options.VariantCallingParameters.MaximumGenotypeQScore,
                VariantQscoreFilterThreshold = _options.VariantCallingParameters.MinimumVariantQScoreFilter,
                NoCallFilterThreshold        = _options.VariantCallingParameters.NoCallFilterThreshold,
                AmpliconBiasFilterThreshold  = _options.VariantCallingParameters.AmpliconBiasFilterThreshold,
                MinCoverage  = _options.VariantCallingParameters.MinimumCoverage,
                MinFrequency = genotypeCalculator.MinVarFrequency,
                NoiseLevelUsedForQScoring  = _options.VariantCallingParameters.NoiseLevelUsedForQScoring,
                StrandBiasModel            = _options.VariantCallingParameters.StrandBiasModel,
                StrandBiasFilterThreshold  = _options.VariantCallingParameters.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = _options.VariantCallingParameters.FilterOutVariantsPresentOnlyOneStrand,
                GenotypeCalculator         = genotypeCalculator,
                VariantFreqFilter          = genotypeCalculator.MinVarFrequencyFilter,
                LowGTqFilter       = _options.VariantCallingParameters.LowGenotypeQualityFilter,
                IndelRepeatFilter  = _options.VariantCallingParameters.IndelRepeatFilter,
                LowDepthFilter     = _options.VariantCallingParameters.LowDepthFilter,
                ChrReference       = chrReference,
                RMxNFilterSettings = new RMxNFilterSettings
                {
                    RMxNFilterMaxLengthRepeat = _options.VariantCallingParameters.RMxNFilterMaxLengthRepeat,
                    RMxNFilterMinRepetitions  = _options.VariantCallingParameters.RMxNFilterMinRepetitions,
                    RMxNFilterFrequencyLimit  = _options.VariantCallingParameters.RMxNFilterFrequencyLimit
                },
                NoiseModel     = _options.VariantCallingParameters.NoiseModel,
                LocusProcessor = locusProcessor
            };



            var alleleCaller = new AlleleCaller(variantCallerConfig, intervalSet,
                                                CreateVariantCollapser(chrReference.Name, coverageCalculator),
                                                coverageCalculator);

            alleleCaller.AddForcedGtAlleles(forceGtAlleles);

            return(alleleCaller);
        }
Ejemplo n.º 6
0
 public override ISomaticVariantCaller CreateSomaticVariantCaller(ChrReference chrReference, string bamFilePath, IVcfWriter vcfWriter, IStrandBiasFileWriter biasFileWriter = null, string intervalFilePath = null)
 {
     return(MockSomaticVariantCaller != null ? MockSomaticVariantCaller.Object : base.CreateSomaticVariantCaller(chrReference, bamFilePath, vcfWriter, biasFileWriter, intervalFilePath));
 }
Ejemplo n.º 7
0
        public void ExecuteTest_GetCandidates(bool withReference, bool withIntervals)
        {
            var testRegion   = new RegionState(1, 50);
            var chrReference = new ChrReference()
            {
                Name     = "chr1",
                Sequence = string.Concat(Enumerable.Repeat("A", 50))
            };
            var snv1 = new CandidateAllele("chr1", 5, "A", "T", AlleleCategory.Snv)
            {
                SupportByDirection = new [] { 10, 5, 0 }
            };
            var snv2 = new CandidateAllele("chr1", 15, "A", "T", AlleleCategory.Snv)
            {
                SupportByDirection = new[] { 10, 5, 0 }
            };

            testRegion.AddCandidate(snv1);
            testRegion.AddCandidate(snv2);

            for (var i = 0; i < 5; i++)
            {
                testRegion.AddAlleleCount(5, AlleleType.A, DirectionType.Stitched);  // ref @ variant position
                testRegion.AddAlleleCount(6, AlleleType.A, DirectionType.Stitched);  // ref by itself
                testRegion.AddAlleleCount(10, AlleleType.C, DirectionType.Stitched); // nonref by itself (no ref)
                testRegion.AddAlleleCount(15, AlleleType.A, DirectionType.Reverse);  // ref (multiple directions) + nonref
                testRegion.AddAlleleCount(15, AlleleType.A, DirectionType.Forward);
                testRegion.AddAlleleCount(15, AlleleType.T, DirectionType.Reverse);
            }

            ChrIntervalSet intervals = null;

            if (withIntervals)
            {
                intervals = new ChrIntervalSet(new List <CallSomaticVariants.Logic.RegionState.Region>()
                {
                    new CallSomaticVariants.Logic.RegionState.Region(3, 6),
                    new CallSomaticVariants.Logic.RegionState.Region(16, 16)
                }, "chr1");
            }
            var expectedList = new List <CandidateAllele>();

            expectedList.Add(snv1);
            expectedList.Add(snv2);

            if (withReference)
            {
                expectedList.Add(new CandidateAllele("chr1", 5, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 0, 0, 5 }
                });
                expectedList.Add(new CandidateAllele("chr1", 6, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 0, 0, 5 }
                });
                expectedList.Add(new CandidateAllele("chr1", 10, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 0, 0, 0 }
                });
                expectedList.Add(new CandidateAllele("chr1", 15, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 5, 5, 0 }
                });
            }

            if (withIntervals)
            {
                expectedList = expectedList.Where(c => c.Coordinate == 5 || c.Coordinate == 6 || c.Type != AlleleCategory.Reference).ToList();
                if (withReference)
                {
                    expectedList.Add(new CandidateAllele("chr1", 3, "A", "A", AlleleCategory.Reference)
                    {
                        SupportByDirection = new[] { 0, 0, 0 }
                    });
                    expectedList.Add(new CandidateAllele("chr1", 4, "A", "A", AlleleCategory.Reference)
                    {
                        SupportByDirection = new[] { 0, 0, 0 }
                    });
                    expectedList.Add(new CandidateAllele("chr1", 16, "A", "A", AlleleCategory.Reference)
                    {
                        SupportByDirection = new[] { 0, 0, 0 }
                    });
                }
            }
            var allCandidates = testRegion.GetAllCandidates(withReference, chrReference, intervals);

            VerifyCandidates(expectedList, allCandidates);
        }
Ejemplo n.º 8
0
        public void GetFinalIndelsForChromosome()
        {
            var preIndels  = new List <PreIndel>();
            var insertion1 = new PreIndel(new CandidateAllele("chr1", 100, "N", "NGA", AlleleCategory.Insertion));

            insertion1.Score = 100;
            var deletion = new PreIndel(new CandidateAllele("chr1", 5, "NNNN", "N", AlleleCategory.Deletion));

            deletion.Score = 100;
            var insertionSimilarToIns1 = new PreIndel(new CandidateAllele("chr1", 100, "N", "NGC", AlleleCategory.Insertion));

            insertionSimilarToIns1.Score = 20;
            var insertion2 = new PreIndel(new CandidateAllele("chr1", 302, "N", "NTCATCA", AlleleCategory.Insertion));

            insertion2.Score = 100;
            var insertionSimilarConsequenceToIns2 = new PreIndel(new CandidateAllele("chr1", 305, "N", "NTCATGA", AlleleCategory.Insertion));

            insertionSimilarConsequenceToIns2.Score = 20;
            var insertionNotSimilarEnoughConsequenceToIns2 = new PreIndel(new CandidateAllele("chr1", 305, "N", "NTCAGTA", AlleleCategory.Insertion));

            insertionNotSimilarEnoughConsequenceToIns2.Score = 20;
            var insertionContainingInsertion2 = new PreIndel(new CandidateAllele("chr1", 302, "N", "NTCATCATCATCA", AlleleCategory.Insertion));

            insertionContainingInsertion2.Score = 20;
            // TODO add edge cases in terms of score, negative cases in terms of diffferent variant types

            preIndels = new List <PreIndel>()
            {
                deletion, insertion1, insertionSimilarToIns1,
                insertion2, insertionSimilarConsequenceToIns2, insertionNotSimilarEnoughConsequenceToIns2,
                insertionContainingInsertion2
            };

            // insertionSimilarToIns1 is removed for being very similar to insertion 1 and much lower quality
            // insertionSimilarConsequenceToIns2 is removed for having almost the exact same consequence as insertion 2 and much lower quality
            // insertionNotSimilarEnoughConsequenceToIns2 is pretty close to insertion 2 in terms of consequence, and weaker, but not similar enough, so can stay
            // insertionContainingInsertion2 has exact same nearby consequence and position as insertion 2 but it is hard to call, being a long dup. so it gets to stay.

            var indelSource  = new HashableIndelSource();
            var chrReference = new ChrReference()
            {
                FastaPath = "abc", Name = "chr1",
                Sequence  = new string('A', 99) + new string('T', 5) + new string('C', 195) +
                            //299
                            string.Join("", Enumerable.Repeat("TCA", 20)) + new string('G', 300)
            };

            var finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);

            // Rehydrate with reference sequence and keep the right ones
            Assert.Equal(5, finalIndels.Count);
            EnsureIndelNotPresent(finalIndels, insertionSimilarToIns1.ReferencePosition, "A", "AGC");
            EnsureIndelNotPresent(finalIndels, insertionSimilarConsequenceToIns2.ReferencePosition, "A", "ATCATGA");
            var ins1 = CheckForIndel(finalIndels, 100, "T", "TGA", 100);

            Assert.False(ins1.IsDuplication);
            Assert.False(ins1.IsRepeat);
            var del = CheckForIndel(finalIndels, 5, "AAAA", "A", 100);

            Assert.False(del.IsDuplication);
            Assert.True(del.IsRepeat);
            var ins2 = CheckForIndel(finalIndels, 302, "A", "ATCATCA", 100);

            Assert.True(ins2.IsRepeat);
            Assert.True(ins2.IsDuplication);
            var ins2NotSimilarEnough = CheckForIndel(finalIndels, 305, "A", "ATCAGTA", 20);

            Assert.True(ins2NotSimilarEnough.IsRepeat);
            Assert.False(ins2NotSimilarEnough.IsDuplication);
            var longerInsertion = CheckForIndel(finalIndels, 302, "A", "ATCATCATCATCA", 20);

            Assert.True(longerInsertion.IsRepeat);
            Assert.True(longerInsertion.IsDuplication);
            Assert.True(longerInsertion.HardToCall);

            // Should handle scenario of stutter
            //         012345678901234567890
            // ...CCCCCCGGGGGTTTTTAAAAATATATA
            //              *ins TGG
            //          *ins GGG
            // ...CCCCCCGGGGGTGGTTTTTAAAAATATATA
            // ...CCCCCCGGGGGGGGTTTTTAAAAATATATA
            var homopolymerIns = new PreIndel(new CandidateAllele("chr1", 300, "N", "NGGG", AlleleCategory.Insertion));

            homopolymerIns.Score = 100;
            var homopolymerInsWithStutter = new PreIndel(new CandidateAllele("chr1", 305, "N", "NTGG", AlleleCategory.Insertion));

            homopolymerInsWithStutter.Score = 10;
            preIndels = new List <PreIndel>()
            {
                homopolymerIns, homopolymerInsWithStutter
            };

            indelSource  = new HashableIndelSource();
            chrReference = new ChrReference()
            {
                FastaPath = "abc",
                Name      = "chr1",
                Sequence  = new string('C', 300) + "GGGGGTTTTTAAAAATATATA" + new string('G', 300)
            };
            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(1, finalIndels.Count);

            //chr1: 125080780 N > NTTTGATTCCATTCGATGATCACTACATTCAGTTCCATTCAATGATGATTCCAACAGATTCCATTTGGTGACTCCATTCGATTCTATTCATTGATGATTCCA
            //chr1: 125080854 N > NATTCGATTCTATTCATTGATGATTCCATTTGATTCCATTCGATGATGACTGCCTTCAGTTCCATTCGGTGATGATTCCAACAGATTCCATTTGGTGACTCA
            var realLongIns1 = new PreIndel(new CandidateAllele("chr1", 780, "N", "NTTTGATTCCATTCGATGATCACTACATTCAGTTCCATTCAATGATGATTCCAACAGATTCCATTTGGTGACTCCATTCGATTCTATTCATTGATGATTCCA", AlleleCategory.Insertion));

            realLongIns1.Score = 100;
            var realLongIns2 = new PreIndel(new CandidateAllele("chr1", 854, "N", "NATTCGATTCTATTCATTGATGATTCCATTTGATTCCATTCGATGATGACTGCCTTCAGTTCCATTCGGTGATGATTCCAACAGATTCCATTTGGTGACTCA", AlleleCategory.Insertion));

            realLongIns2.Score = 20;
            preIndels          = new List <PreIndel>()
            {
                realLongIns1, realLongIns2
            };

            indelSource  = new HashableIndelSource();
            chrReference = new ChrReference()
            {
                FastaPath = "abc",
                Name      = "chr1",
                Sequence  = new string('A', 3000)
            };

            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(2, finalIndels.Count);

            // Long deletion - should adjust snippet width to accomodate
            var longDel1 = new PreIndel(new CandidateAllele("chr1", 100, new string('N', 200), "N", AlleleCategory.Deletion));

            longDel1.Score = 100;
            var longDel2 = new PreIndel(new CandidateAllele("chr1", 150, new string('N', 200), "N", AlleleCategory.Deletion));

            longDel2.Score = 20;
            preIndels      = new List <PreIndel>()
            {
                longDel1, longDel2
            };

            indelSource  = new HashableIndelSource();
            chrReference = new ChrReference()
            {
                FastaPath = "abc",
                Name      = "chr1",
                Sequence  = new string('A', 100) + new string('T', 100) + new string('C', 1000)
            };

            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(2, finalIndels.Count);

            chrReference = new ChrReference()
            {
                FastaPath = "abc",
                Name      = "chr1",
                Sequence  = new string('A', 100) + new string('T', 500) + new string('C', 1000)
            };

            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(1, finalIndels.Count);


            //         012345678901234567890
            // ...CCCCCCGGGGGGGGAGGTTTTTAAAAATATATA
            // ...CCCCCC---GGGGGAGGTTTTTAAAAATATATA // del 1
            // ...CCCCCCGGGGGGGG---TTTTTAAAAATATATA // del 2
            // ...CCCCCCGGGGGGGGA---TTTTAAAAATATATA // del 3
            // ...CCCCCCGGGGGAGGTTTTTAAAAATATATA // effective 1
            // ...CCCCCCGGGGGGGGTTTTTAAAAATATATA // effective 2
            // ...CCCCCCGGGGGGGGATTTTAAAAATATATA // effective 3 - edit distance of 2 from eff1, 1 from eff2

            var homopolymerDel = new PreIndel(new CandidateAllele("chr1", 300, "NNNN", "N", AlleleCategory.Deletion));

            homopolymerDel.Score = 100;
            var homopolymerDelMuchWeakerOneMismatch = new PreIndel(new CandidateAllele("chr1", 308, "NNNN", "N", AlleleCategory.Deletion));

            homopolymerDelMuchWeakerOneMismatch.Score = 10;
            var homopolymerDelMuchWeakerTwoMismatch = new PreIndel(new CandidateAllele("chr1", 309, "NNNN", "N", AlleleCategory.Deletion));

            homopolymerDelMuchWeakerTwoMismatch.Score = 10;
            preIndels = new List <PreIndel>()
            {
                homopolymerDel, homopolymerDelMuchWeakerOneMismatch, homopolymerDelMuchWeakerTwoMismatch
            };

            indelSource  = new HashableIndelSource();
            chrReference = new ChrReference()
            {
                FastaPath = "abc",
                Name      = "chr1",
                Sequence  = new string('C', 300) + "GGGGGGGGAGGTTTTTAAAAATATATA" + new string('G', 300)
            };
            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(2, finalIndels.Count);
            CheckForIndel(finalIndels, 300, "CGGG", "C", 100);
            EnsureIndelNotPresent(finalIndels, 308, "GAGG", "G");
            CheckForIndel(finalIndels, 309, "AGGT", "A", 10);

            // Same deletions but flip the scores -- The deletions have very similar consequences, but there is not a clear stronger deletion, which makes us less confident that these are mismatching versions of the same deletion. Keep all.
            homopolymerDelMuchWeakerTwoMismatch.Score = 60;
            homopolymerDelMuchWeakerOneMismatch.Score = 60;
            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(3, finalIndels.Count);
            CheckForIndel(finalIndels, 300, "CGGG", "C", 100);
            CheckForIndel(finalIndels, 308, "GAGG", "G", 60);
            CheckForIndel(finalIndels, 309, "AGGT", "A", 60);

            // Same deletions but flip the scores -- The strongest deletion is edit distance of 1 away from both of the others
            homopolymerDel.Score = 40;
            homopolymerDelMuchWeakerTwoMismatch.Score = 10;
            homopolymerDelMuchWeakerOneMismatch.Score = 100;
            finalIndels = indelSource.GetFinalIndelsForChromosome("chr1", preIndels, chrReference);
            Assert.Equal(1, finalIndels.Count);
            EnsureIndelNotPresent(finalIndels, 300, "CGGG", "C");
            CheckForIndel(finalIndels, 308, "GAGG", "G", 100);
            EnsureIndelNotPresent(finalIndels, 309, "AGGT", "A");
        }
Ejemplo n.º 9
0
 protected override IAlleleCaller CreateVariantCaller(ChrReference chrReference, ChrIntervalSet intervalSet)
 {
     return(MockVariantCaller != null ? MockVariantCaller.Object : base.CreateVariantCaller(chrReference, intervalSet));
 }
Ejemplo n.º 10
0
        public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference,
                                                       ChrIntervalSet intervals = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null)
        {
            var alleles = new List <CandidateAllele>();

            // add all candidates - these are potentially collapsable targets
            foreach (var positionLookup in _candidateVariantsLookup)
            {
                if (positionLookup != null)
                {
                    alleles.AddRange(positionLookup);
                }
            }

            var IntervalsInUse = includeRefAlleles ? intervals : CreateIntervalsFromAllels(chrReference, forcesGtAlleles);

            if (includeRefAlleles || (forcesGtAlleles != null && forcesGtAlleles.Count != 0))
            {
                var regionsToFetch = IntervalsInUse == null
                    ? new List <Region> {
                    this
                }                                      // fetch whole block region
                    : IntervalsInUse.GetClipped(this); // clip intervals to block region

                for (var i = 0; i < regionsToFetch.Count; i++)
                {
                    var clippedInterval = regionsToFetch[i];
                    for (var position = clippedInterval.StartPosition;
                         position <= clippedInterval.EndPosition;
                         position++)
                    {
                        var positionIndex = position - StartPosition;

                        // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided
                        if (position > chrReference.Sequence.Length)
                        {
                            break;
                        }

                        var refBase = chrReference.Sequence[position - 1].ToString();

                        var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase);
                        var refAllele    = new CandidateAllele(chrReference.Name, position,
                                                               refBase, refBase, AlleleCategory.Reference);

                        // gather support for allele
                        var totalSupport = 0;

                        for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++)
                        {
                            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                            {
                                var count = 0;
                                for (int anchorIndex = 0; anchorIndex < NumAnchorIndexes; anchorIndex++)
                                {
                                    var countForAnchorType = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex, anchorIndex];
                                    count += countForAnchorType;
                                }

                                if (alleleTypeIndex == refBaseIndex)
                                {
                                    refAllele.SupportByDirection[directionIndex] = count;

                                    // TODO this isn't really proven to be well-anchored, nor is it proven not to be
                                    //refAllele.WellAnchoredSupportByDirection[directionIndex] = count;
                                }

                                totalSupport += count;
                            }
                        }

                        if (IntervalsInUse != null || totalSupport > 0)
                        {
                            alleles.Add(refAllele);
                        }
                    }
                }
            }

            return(alleles);
        }
Ejemplo n.º 11
0
        private static void ApplyFilters(CalledAllele allele, int?minCoverageFilter, int?variantQscoreThreshold, bool filterSingleStrandVariants, float?variantFreqFilter, float?lowGenotypeqFilter, int?indelRepeatFilter,
                                         RMxNFilterSettings rMxNFilterSettings, float?noCallFilter, float?ampliconBiasFilter, bool hasStitchedSource, ChrReference chrReference)
        {
            //Reset filters
            allele.Filters.Clear();

            if (minCoverageFilter.HasValue && allele.TotalCoverage < minCoverageFilter)
            {
                allele.AddFilter(FilterType.LowDepth);
            }

            if (variantQscoreThreshold.HasValue && allele.VariantQscore < variantQscoreThreshold && (allele.TotalCoverage != 0))
            {
                //note we wont flag it for Qscore, if its got zero depth, because in that case, the Q score calc was not made anyway.
                allele.AddFilter(FilterType.LowVariantQscore);
            }
            if (allele.Type != AlleleCategory.Reference)
            {
                //No call filter
                if (noCallFilter.HasValue && allele.FractionNoCalls > noCallFilter)
                {
                    allele.AddFilter(FilterType.NoCall);
                }

                if (!allele.StrandBiasResults.BiasAcceptable ||
                    (filterSingleStrandVariants && !allele.StrandBiasResults.VarPresentOnBothStrands))
                {
                    allele.AddFilter(FilterType.StrandBias);
                }

                if (allele.AmpliconBiasResults != null && allele.AmpliconBiasResults.BiasDetected && ampliconBiasFilter.HasValue)
                {
                    allele.AddFilter(FilterType.AmpliconBias);
                }

                if (indelRepeatFilter.HasValue && indelRepeatFilter > 0)
                {
                    var indelRepeatLength = ComputeIndelRepeatLength(allele, chrReference.Sequence);
                    if (indelRepeatFilter <= indelRepeatLength)
                    {
                        allele.AddFilter(FilterType.IndelRepeatLength);
                    }
                }

                if (RMxNCalculator.ShouldFilter(allele, rMxNFilterSettings, chrReference.Sequence))
                {
                    allele.AddFilter(FilterType.RMxN);
                }

                if (variantFreqFilter.HasValue && allele.Frequency < variantFreqFilter)
                {
                    allele.AddFilter(FilterType.LowVariantFrequency);
                }

                if (hasStitchedSource) //can only happen for insertions and MNVs
                {
                    if (allele.AlternateAllele.Contains("N"))
                    {
                        allele.AddFilter(FilterType.StrandBias);
                    }
                }
            }
        }
Ejemplo n.º 12
0
        public void IndelRepeat_ChromosomeEdgeCases()
        {
            var chrReference = new ChrReference()
            {
                Sequence = String.Concat(Enumerable.Repeat("A", 75))
            };

            // Wherever the variant is in the reference, as long as it's within it, R8 filter should be ok.
            // See exception below.
            for (int i = 0; i < chrReference.Sequence.Length - 1; i++)
            {
                var variant = TestHelper.CreatePassingVariant(false);
                variant.ReferencePosition = i;

                variant.Type            = AlleleCategory.Insertion;
                variant.ReferenceAllele = "A";
                variant.AlternateAllele = "AA";

                AlleleProcessor.Process(variant, 0.01f, 0, 0,
                                        true, 0, 0, 2, null, 0.6f, chrReference);

                Assert.Equal(true, variant.Filters.Contains(FilterType.IndelRepeatLength));
            }

            // Quirk: A variant at the last or second-to-last position of the chromosome does not get R8 filtered
            // This comes from the legacy code that we implemented _only_ to maintain continuity with Isas, and it appears that this was an intentional behavior (comment in the code is "this handles cases where a deletion is larger than the number of downstream flanking bases").
            // This test is here to show the behavior.
            var variantAtLastPosition = TestHelper.CreatePassingVariant(false);

            variantAtLastPosition.ReferencePosition = chrReference.Sequence.Length; // Last position of chrom (variant positions are 1-based)
            variantAtLastPosition.Type            = AlleleCategory.Insertion;
            variantAtLastPosition.ReferenceAllele = "A";
            variantAtLastPosition.AlternateAllele = "AA";
            AlleleProcessor.Process(variantAtLastPosition, 0.01f, 0, 0,
                                    true, 0, 0, 2, null, 0.6f, chrReference);
            Assert.Equal(false, variantAtLastPosition.Filters.Contains(FilterType.IndelRepeatLength));

            var variantAtSecondToLastPosition = TestHelper.CreatePassingVariant(false);

            variantAtSecondToLastPosition.ReferencePosition = chrReference.Sequence.Length - 1; // Second to last position of chrom (variant positions are 1-based)
            variantAtSecondToLastPosition.Type            = AlleleCategory.Insertion;
            variantAtSecondToLastPosition.ReferenceAllele = "A";
            variantAtSecondToLastPosition.AlternateAllele = "AA";
            AlleleProcessor.Process(variantAtSecondToLastPosition, 0.01f, 0, 0,
                                    true, 0, 0, 2, null, 0.6f, chrReference);
            Assert.Equal(false, variantAtSecondToLastPosition.Filters.Contains(FilterType.IndelRepeatLength));


            // Variant decidedly outside of chromosome - throws exception because trying to substring at nonsensical positions of chromosome.
            // This is a non-sensical scenario, just demonstrating that it throws exception.
            // Note that if the variant was just one base outside of the chromosome, it wouldn't throw this exception (returns false -- again, not a real scenario, just documenting it)... .NET behavior for substring: returns empty string "if startIndex is equal to the length of this instance and length is zero."
            var variantOutsideOfChromosome = TestHelper.CreatePassingVariant(false);

            variantOutsideOfChromosome.ReferencePosition = chrReference.Sequence.Length + 2;
            variantOutsideOfChromosome.Type            = AlleleCategory.Insertion;
            variantOutsideOfChromosome.ReferenceAllele = "A";
            variantOutsideOfChromosome.AlternateAllele = "AA";

            Assert.Throws <ArgumentOutOfRangeException>(() => AlleleProcessor.Process(variantOutsideOfChromosome, 0.01f, 0, 0,
                                                                                      true, 0, 0, 2, null, 0.6f, chrReference));
        }
 protected abstract void Process(BamWorkRequest workRequest, ChrReference chrReference);
 protected override IAlleleCaller CreateVariantCaller(ChrReference chrReference, ChrIntervalSet intervalSet, IAlignmentSource alignmentSource, HashSet <Tuple <string, int, string, string> > forcedGtAlleles = null)
 {
     return(MockVariantCaller != null ? MockVariantCaller.Object : base.CreateVariantCaller(chrReference, intervalSet, alignmentSource, forcedGtAlleles));
 }
 protected override IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath, bool expectStitchedDirections, List <string> chrsToProcess)
 {
     return(MockAlignmentSource != null ? MockAlignmentSource.Object : base.CreateAlignmentSource(chrReference, bamFilePath, expectStitchedDirections, chrsToProcess));
 }
Ejemplo n.º 16
0
 public MockAlignmentExtractor(ChrReference chrInfo, bool SourceIsStitched = false)
 {
     _reads            = new List <Read>();
     _chrName          = chrInfo.Name;
     _sourceIsStitched = SourceIsStitched;
 }
Ejemplo n.º 17
0
        public SomaticVariantCaller(IAlignmentSource alignmentSource, ICandidateVariantFinder variantFinder, IAlleleCaller alleleCaller,
                                    IVcfWriter vcfWriter, IStateManager stateManager, ChrReference chrReference, IRegionPadder regionMapper, IStrandBiasFileWriter biasFileWriter)
        {
            _alignmentSource = alignmentSource;
            _variantFinder   = variantFinder;
            _alleleCaller    = alleleCaller;
            _vcfWriter       = vcfWriter;
            _stateManager    = stateManager;
            _chrReference    = chrReference;
            _regionMapper    = regionMapper;
            _biasFileWriter  = biasFileWriter;

            if (_alignmentSource.ChromosomeFilter != _chrReference.Name)
            {
                throw new ArgumentException(string.Format("Chromosome filter in alignment source '{0}' does not match to current chromosome '{1}'", _alignmentSource.ChromosomeFilter, _chrReference.Name));
            }
        }
Ejemplo n.º 18
0
        public static ISomaticVariantCaller CreateMockVariantCaller(VcfFileWriter vcfWriter, ApplicationOptions options, ChrReference chrRef, MockAlignmentExtractor mockAlignmentExtractor, IStrandBiasFileWriter biasFileWriter = null, string intervalFilePath = null)
        {
            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = options.MinimumMapQuality,
                OnlyUseProperPairs = options.OnlyUseProperPairs,
            };


            //var mateFinder = options.StitchReads ? new AlignmentMateFinder() : null;
            AlignmentMateFinder mateFinder = null;
            var alignmentSource            = new AlignmentSource(mockAlignmentExtractor, mateFinder, config);
            var variantFinder      = new CandidateVariantFinder(options.MinimumBaseCallQuality, options.MaxSizeMNV, options.MaxGapBetweenMNV, options.CallMNVs);
            var coverageCalculator = new CoverageCalculator();

            var alleleCaller = new AlleleCaller(new VariantCallerConfig
            {
                IncludeReferenceCalls        = options.OutputgVCFFiles,
                MinVariantQscore             = options.MinimumVariantQScore,
                MaxVariantQscore             = options.MaximumVariantQScore,
                VariantQscoreFilterThreshold = options.FilteredVariantQScore > options.MinimumVariantQScore ? options.FilteredVariantQScore : (int?)null,
                MinCoverage                = options.MinimumDepth,
                MinFrequency               = options.MinimumFrequency,
                EstimatedBaseCallQuality   = options.AppliedNoiseLevel == -1 ? options.MinimumBaseCallQuality : options.AppliedNoiseLevel,
                StrandBiasModel            = options.StrandBiasModel,
                StrandBiasFilterThreshold  = options.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = options.FilterOutVariantsPresentOnlyOneStrand,
                ChrReference               = chrRef
            },
                                                coverageCalculator: coverageCalculator,
                                                variantCollapser: options.Collapse ? new VariantCollapser(null, coverageCalculator) : null);

            var stateManager = new RegionStateManager(
                expectStitchedReads: mockAlignmentExtractor.SourceIsStitched,
                trackOpenEnded: options.Collapse, trackReadSummaries: options.CoverageMethod == CoverageMethod.Approximate);

            //statmanager is an allele source
            Assert.Equal(0, stateManager.GetAlleleCount(1, AlleleType.A, DirectionType.Forward));


            return(new SomaticVariantCaller(
                       alignmentSource,
                       variantFinder,
                       alleleCaller,
                       vcfWriter,
                       stateManager,
                       chrRef,
                       null,
                       biasFileWriter));
        }
Ejemplo n.º 19
0
 public static void Process(CalledAllele allele,
                            float minFrequency, int?lowDepthFilter, int?filterVariantQscore, bool filterSingleStrandVariants, float?variantFreqFilter, float?lowGqFilter, int?indelRepeatFilter,
                            RMxNFilterSettings rMxNFilterSettings, float?noCallFilter, float?ampliconBiasFilter, ChrReference chrReference, bool isStitchedSource = false)
 {
     allele.SetFractionNoCalls();
     ApplyFilters(allele, lowDepthFilter, filterVariantQscore, filterSingleStrandVariants, variantFreqFilter, lowGqFilter, indelRepeatFilter, rMxNFilterSettings, noCallFilter, ampliconBiasFilter, isStitchedSource, chrReference);
 }
Ejemplo n.º 20
0
 protected virtual IRegionPadder CreateRegionPadder(ChrReference chrReference, ChrIntervalSet intervalSet, bool includeReference)
 {
     // padder is only required if there are intervals and we are including reference calls
     return(intervalSet == null || !_options.OutputgVCFFiles ? null : new RegionPadder(chrReference, intervalSet));
 }
        public static string[] CheckReadLoading(BamAlignment read, PiscesApplicationOptions options, ChrReference chrInfo, bool isVariant, StitchingScenario scenario)
        {
            string expectedVarLoading         = scenario.RefLoading;
            string expectedCandidateDireciton = "0";

            if (isVariant)
            {
                expectedVarLoading         = scenario.VarLoading;
                expectedCandidateDireciton = scenario.CandidateDirection;
            }

            var loadingResults = LoadReads(new List <BamAlignment>()
            {
                read
            }, options, chrInfo, isVariant, expectedVarLoading, expectedCandidateDireciton);

            if (loadingResults == null)
            {
                return(new string[] { "total fail to parse variant reads" });
            }

            //coverage check
            var variantReadLoadResult         = CheckLoading(scenario, 1, loadingResults.Item1, isVariant);
            var variantReadCandidateDirection = CheckCandidateDirection(isVariant, loadingResults.Item2, expectedCandidateDireciton);


            if (variantReadLoadResult == null)
            {
                return(new string[] { "total fail to check loading" });
            }

            if (variantReadCandidateDirection == null)
            {
                return(new string[] { "total fail to check direction" });
            }

            return(new string[] { variantReadLoadResult, variantReadCandidateDirection });
        }
Ejemplo n.º 22
0
 /// <summary>
 /// Sole job is to pad empty reference calls when using intervals.  Assumes batch has already included reference calls (either empty or not)
 /// for cleared regions.
 /// </summary>
 /// <param name="chrReference"></param>
 /// <param name="includeReferenceCalls"></param>
 /// <param name="intervals"></param>
 public RegionPadder(ChrReference chrReference, ChrIntervalSet intervals)
 {
     _chrReference = chrReference;
     IntervalSet   = intervals;
 }
Ejemplo n.º 23
0
 protected override IAlignmentSource CreateAlignmentSource(ChrReference chrReference, string bamFilePath)
 {
     return(MockAlignmentSource != null ? MockAlignmentSource.Object : base.CreateAlignmentSource(chrReference, bamFilePath));
 }
Ejemplo n.º 24
0
        public IClassificationBlockProvider GetBlockProvider(Dictionary <int, string> refIdMapping, string chrom,
                                                             IWriterSource writerSource, ConcurrentDictionary <string, int> progressTracker,
                                                             ConcurrentDictionary <PairClassification, int> categoryLookup, ConcurrentDictionary <string, IndelEvidence> masterIndelLookup,
                                                             ConcurrentDictionary <HashableIndel, int[]> masterOutcomesLookup,
                                                             ConcurrentDictionary <HashableIndel, int> masterFinalIndels, ChrReference chrReference)
        {
            var actionBlockFactoryProvider = new PairResultActionBlockFactoryProvider(writerSource, _geminiOptions.Debug,
                                                                                      _geminiOptions.LightDebug, chrom, _geminiSampleOptions.RefId.Value, _maxDegreeOfParallelism,
                                                                                      _stitcherOptions.FilterForProperPairs, _geminiOptions.MessySiteWidth, progressTracker, categoryLookup);
            var aggregateProcessor = new AggregateRegionProcessor(chrReference, refIdMapping,
                                                                  _bamRealignmentFactory, _geminiOptions, _geminiFactory, chrom, _dataSourceFactory, _realignmentOptions, masterIndelLookup, masterOutcomesLookup, masterFinalIndels, _realignmentOptions.CategoriesForRealignment, progressTracker);
            var batchBlockFactory = new PairResultBatchBlockFactory(_geminiOptions.ReadCacheSize / 5);

            return(new ClassificationBlockProvider(_geminiOptions, chrom, progressTracker, categoryLookup, actionBlockFactoryProvider, aggregateProcessor,
                                                   _geminiOptions.LightDebug, batchBlockFactory, new BinEvidenceFactory(_geminiOptions, _geminiSampleOptions), _realignmentOptions.CategoriesForRealignment, _maxDegreeOfParallelism));
        }
Ejemplo n.º 25
0
 protected override IRegionPadder CreateRegionPadder(ChrReference chrReference, ChrIntervalSet intervalSet, bool includeReferences)
 {
     return(MockRegionMapper != null ? MockRegionMapper.Object : base.CreateRegionPadder(chrReference, intervalSet, includeReferences));
 }
Ejemplo n.º 26
0
        private ISomaticVariantCaller CreateMockVariantCaller(VcfFileWriter vcfWriter, ApplicationOptions options, ChrReference chrRef, MockAlignmentExtractor mae, IStrandBiasFileWriter biasFileWriter = null, string intervalFilePath = null)
        {
            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = options.MinimumMapQuality,
                OnlyUseProperPairs = options.OnlyUseProperPairs,
            };

            IAlignmentStitcher stitcher = null;

            if (options.StitchReads)
            {
                if (options.UseXCStitcher)
                {
                    stitcher = new XCStitcher(options.MinimumBaseCallQuality);
                }
                else
                {
                    stitcher = new BasicStitcher(options.MinimumBaseCallQuality);
                }
            }

            var mateFinder      = options.StitchReads ? new AlignmentMateFinder(MAX_FRAGMENT_SIZE) : null;
            var RegionPadder    = new RegionPadder(chrRef, null);
            var alignmentSource = new AlignmentSource(mae, mateFinder, stitcher, config);
            var variantFinder   = new CandidateVariantFinder(options.MinimumBaseCallQuality, options.MaxSizeMNV, options.MaxGapBetweenMNV, options.CallMNVs);
            var alleleCaller    = new AlleleCaller(new VariantCallerConfig
            {
                IncludeReferenceCalls        = options.OutputgVCFFiles,
                MinVariantQscore             = options.MinimumVariantQScore,
                MaxVariantQscore             = options.MaximumVariantQScore,
                VariantQscoreFilterThreshold = options.FilteredVariantQScore > options.MinimumVariantQScore ? options.FilteredVariantQScore : (int?)null,
                MinCoverage                = options.MinimumCoverage,
                MinFrequency               = options.MinimumFrequency,
                EstimatedBaseCallQuality   = options.AppliedNoiseLevel == -1 ? options.MinimumBaseCallQuality : options.AppliedNoiseLevel,
                StrandBiasModel            = options.StrandBiasModel,
                StrandBiasFilterThreshold  = options.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = options.FilterOutVariantsPresentOnlyOneStrand,
                GenotypeModel              = options.GTModel
            });
            var stateManager = new RegionStateManager();

            return(new SomaticVariantCaller(
                       alignmentSource,
                       variantFinder,
                       alleleCaller,
                       vcfWriter,
                       stateManager,
                       chrRef,
                       RegionPadder,
                       biasFileWriter));
        }
Ejemplo n.º 27
0
 public SomaticVariantCaller(IAlignmentSource alignmentSource, ICandidateVariantFinder variantFinder, IAlleleCaller alleleCaller,
                             IVcfWriter <CalledAllele> vcfWriter, IStateManager stateManager, ChrReference chrReference, IRegionMapper regionMapper,
                             IStrandBiasFileWriter biasFileWriter, ChrIntervalSet intervalSet = null)
 {
     _alignmentSource = alignmentSource;
     _variantFinder   = variantFinder;
     _alleleCaller    = alleleCaller;
     _vcfWriter       = vcfWriter;
     _stateManager    = stateManager;
     _chrReference    = chrReference;
     _regionMapper    = regionMapper;
     _biasFileWriter  = biasFileWriter;
     _intervalSet     = intervalSet;
 }
Ejemplo n.º 28
0
 protected override void Process(BamWorkRequest workRequest, ChrReference chrReference)
 {
     // do nothing
 }
 public SmallVariantCaller(IAlignmentSource alignmentSource, ICandidateVariantFinder variantFinder, IAlleleCaller alleleCaller,
                           IVcfWriter <CalledAllele> vcfWriter, IStateManager stateManager, ChrReference chrReference, IRegionMapper regionMapper,
                           IStrandBiasFileWriter strandBiasFileWriter, IAmpliconBiasFileWriter ampBiasFileWriter, ChrIntervalSet intervalSet = null, HashSet <Tuple <string, int, string, string> > forcedGTAlleles = null)
 {
     _alignmentSource               = alignmentSource;
     _variantFinder                 = variantFinder;
     _alleleCaller                  = alleleCaller;
     _vcfWriter                     = vcfWriter;
     _stateManager                  = stateManager;
     _chrReference                  = chrReference;
     _regionMapper                  = regionMapper;
     _strandBiasFileWriter          = strandBiasFileWriter;
     _ampliconBiasFileWriter        = ampBiasFileWriter;
     _intervalSet                   = intervalSet;
     _forcedGtAlleles               = forcedGTAlleles;
     _unProcessedForcedAllelesByPos = CreateForcedAllelePos(_forcedGtAlleles);
     _writeBiasFiles                = (strandBiasFileWriter != null && ampBiasFileWriter != null);
 }
Ejemplo n.º 30
0
        public void CreateCallableNbhdsTests()
        {
            var vcfFilePath     = Path.Combine(TestPaths.LocalTestDataDirectory, "VeryMutated.genome.vcf");
            var variantSource   = new AlleleReader(vcfFilePath);
            var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(123), new VariantSite(125));
            List <VcfNeighborhood> VcfNeighborhoods = new List <VcfNeighborhood>()
            {
                vcfNeighborhood
            };

            //Test 1, genome is NULL

            var neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(),
                                                              variantSource, null, 20);

            var neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods);

            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count());
            Assert.Equal("chr1", neighborhoods[0].ReferenceName);
            Assert.Equal("RRR", neighborhoods[0].NbhdReferenceSequenceSubstring);

            //Test 2, genome is exists, but doesnt have the right chr

            var    genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta");
            var    refName    = "chr_wrong";
            Genome genome     = new Genome(genomePath, new List <string>()
            {
                refName
            });
            ChrReference chrReference = genome.GetChrReference(refName);

            neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(),
                                                          variantSource, genome, 20);

            neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods);
            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count());
            Assert.Equal("chr1", neighborhoods[0].ReferenceName);
            Assert.Equal("RRR", neighborhoods[0].NbhdReferenceSequenceSubstring);


            //Test 3, genome is exists, and DOES have the right chr

            refName = "chr";
            genome  = new Genome(genomePath, new List <string>()
            {
                refName
            });
            chrReference = genome.GetChrReference(refName);

            neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(),
                                                          variantSource, genome, 20);


            vcfNeighborhood  = new VcfNeighborhood(0, "chr", new VariantSite(123), new VariantSite(125));
            VcfNeighborhoods = new List <VcfNeighborhood>()
            {
                vcfNeighborhood
            };

            neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods);
            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count());
            Assert.Equal("chr", neighborhoods[0].ReferenceName);
            Assert.Equal("TAT", neighborhoods[0].NbhdReferenceSequenceSubstring);
        }