예제 #1
0
        public void CallVariants_MnvReallocatesToSnvOutsideInterval()
        {
            var config = new VariantCallerConfig
            {
                MaxVariantQscore         = 100,
                EstimatedBaseCallQuality = 20,
                IncludeReferenceCalls    = true,
                MinFrequency             = 6f / 150
            };

            var intervalSet = new ChrIntervalSet(new List <Region>()
            {
                new Region(1900, 1950)
            }, "chr1");
            var variantCaller = new AlleleCaller(config, intervalSet);

            // -----------------------------------------------
            // Passing MNV that spans interval edge should be called if it begins within intervals
            // Failing MNVs that span interval edge and are reallocated to SNVs should only have those SNVs called if they are within intervals
            // (broken-out SNVs outside intervals should not be called even if they gain enough support to be called).
            // -----------------------------------------------

            var passingMnv = new CandidateAllele("chr1", 1950, "TTT", "CCC", AlleleCategory.Mnv)
            {
                SupportByDirection = new[] { 10, 0, 0 }
            };
            var failingMnv1 = new CandidateAllele("chr1", 1950, "TTT", "GGG", AlleleCategory.Mnv) // only the first SNV should be called (1950 T>G)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };
            var failingMnv1Booster = new CandidateAllele("chr1", 1949, "TTTT", "GGGG", AlleleCategory.Mnv) // only the second SNV should be called (1950 T>G)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };
            var failingMnv2 = new CandidateAllele("chr1", 1950, "TTT", "AAA", AlleleCategory.Mnv) // none of these should be called
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };

            var mockStateManager = MockStateManager(306, 0);

            var candidateVariants = new List <CandidateAllele>
            {
                passingMnv,
                failingMnv1,
                failingMnv2,
                failingMnv1Booster
            };

            var batch = new CandidateBatch(candidateVariants)
            {
                MaxClearedPosition = 2000
            };

            var calledVariants = variantCaller.Call(batch, mockStateManager.Object);

            PrintResults(calledVariants.ToList());

            Assert.Equal(2, calledVariants.Count());
        }
예제 #2
0
        public void Constructor()
        {
            var intervalSet = new ChrIntervalSet(new List <BasicRegion>()
            {
                new BasicRegion(4, 6),
                new BasicRegion(8, 8),
                new BasicRegion(9, 10)
            }, "chr1");

            Assert.Equal(4, intervalSet.MinPosition);
            Assert.Equal(10, intervalSet.MaxPosition);
            Assert.Equal("chr1", intervalSet.ChrName);

            intervalSet = new ChrIntervalSet(new List <BasicRegion>(), "chr1");

            Assert.Equal(0, intervalSet.MinPosition);
            Assert.Equal(0, intervalSet.MaxPosition);

            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(null, "chr1"));
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>(), null));
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>(), ""));
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>()
            {
                new BasicRegion(7, 6)
            }, "chr1"));
        }
예제 #3
0
 public RegionStateManager(bool includeRefAlleles = false, int minBasecallQuality = 20, ChrIntervalSet intervalSet = null, int blockSize = Constants.RegionSize)
 {
     _regionSize         = blockSize;
     _minBasecallQuality = minBasecallQuality;
     _includeRefAlleles  = includeRefAlleles;
     _intervalSet        = intervalSet;
 }
예제 #4
0
 /// <summary>
 /// CollapsedRegionStateManager
 /// </summary>
 /// <param name="includeRefAlleles"></param>
 /// <param name="minBasecallQuality"></param>
 /// <param name="intervalSet"></param>
 /// <param name="blockSize"></param>
 /// <param name="trackOpenEnded"></param>
 /// <param name="trackReadSummaries"></param>
 /// <remarks>create CollapsedRegionStateManager (derived) if both ExpectCollapsedReads and ExpectStitchedReads are true
 /// otherwise use RegionStateManager (based) instead </remarks>
 public CollapsedRegionStateManager(bool includeRefAlleles     = false, int minBasecallQuality  = 20,
                                    ChrIntervalSet intervalSet = null, int blockSize            = 1000,
                                    bool trackOpenEnded        = false, bool trackReadSummaries = false, int trackedAnchorSize = 5)
     : base(includeRefAlleles, minBasecallQuality, true, intervalSet, blockSize, trackOpenEnded,
            trackReadSummaries, trackedAnchorSize)
 {
     ExpectCollapsedReads = true;
 }
예제 #5
0
 public AlleleCaller(VariantCallerConfig config, ChrIntervalSet intervalSet = null,
                     IVariantCollapser variantCollapser = null, ICoverageCalculator coverageCalculator = null)
 {
     _config             = config;
     _intervalSet        = intervalSet;
     _collapser          = variantCollapser;
     _coverageCalculator = coverageCalculator ?? new CoverageCalculator();
     _genotypeCalculator = config.GenotypeCalculator;
 }
예제 #6
0
        public List <CalledAllele> DoFiltering(List <CalledAllele> alleles)
        {
            var result = new List <CalledAllele>();

            if (alleles.Count == 0)
            {
                return(result);
            }

            var chrName = alleles[0].Chromosome;



            if (chrName != _currentChr)
            {
                if (_regionsByChr.ContainsKey(chrName))
                {
                    _currentChrIntervalSet = new ChrIntervalSet(_regionsByChr[chrName], chrName);
                }
                else
                {
                    _currentChrIntervalSet = new ChrIntervalSet(new List <Region>()
                    {
                    }, chrName);
                    return(result);
                }
            }


            if (_currentChrIntervalSet.Intervals.Count == 0)
            {
                return(result);
            }


            switch (_mode)
            {
            case GeometricFilterParameters.InclusionModel.ByOverlap:
            {
                throw new ArgumentException("Option GeometricFilterParameters.InclusionModel.ByOverlap not currently supported.");
            }

            case GeometricFilterParameters.InclusionModel.Expanded:
            {
                result = DoFilteringByExpandingRegion(alleles, _currentChrIntervalSet);
                break;
            }

            default:
            {
                result = DoFilteringByStartPosition(alleles, _currentChrIntervalSet);
                break;
            }
            }

            return(result);
        }
예제 #7
0
        private void ExecuteTest_Minus(BasicRegion keepRegion, List <BasicRegion> excludeRegions, List <BasicRegion> expectedRegions)
        {
            var results = ChrIntervalSet.GetMinus(keepRegion, excludeRegions);

            Assert.Equal(expectedRegions.Count, results.Count);

            for (var i = 0; i < expectedRegions.Count; i++)
            {
                Assert.Equal(expectedRegions[i], results[i]);
            }
        }
예제 #8
0
        private void ExecuteTest_SortAndCollapse(ChrIntervalSet set, List <BasicRegion> expectedRegions)
        {
            set.SortAndCollapse();

            Assert.Equal(expectedRegions.Count, set.Intervals.Count);

            for (var i = 0; i < expectedRegions.Count; i++)
            {
                Assert.Equal(expectedRegions[i], set.Intervals[i]);
            }
        }
예제 #9
0
 public RegionStateManager(bool includeRefAlleles     = false, int minBasecallQuality  = 20, bool expectStitchedReads = false,
                           ChrIntervalSet intervalSet = null, int blockSize            = 1000,
                           bool trackOpenEnded        = false, bool trackReadSummaries = false)
 {
     _regionSize          = blockSize;
     _minBasecallQuality  = minBasecallQuality;
     _includeRefAlleles   = includeRefAlleles;
     _intervalSet         = intervalSet;
     _trackOpenEnded      = trackOpenEnded;
     _trackReadSummaries  = trackReadSummaries;
     _expectStitchedReads = expectStitchedReads;
 }
예제 #10
0
 public SomaticVariantCaller(IAlignmentSource alignmentSource, ICandidateVariantFinder variantFinder, IAlleleCaller alleleCaller,
                             IVcfWriter <CalledAllele> vcfWriter, IStateManager stateManager, ChrReference chrReference, IRegionMapper regionMapper,
                             IStrandBiasFileWriter biasFileWriter, ChrIntervalSet intervalSet = null)
 {
     _alignmentSource = alignmentSource;
     _variantFinder   = variantFinder;
     _alleleCaller    = alleleCaller;
     _vcfWriter       = vcfWriter;
     _stateManager    = stateManager;
     _chrReference    = chrReference;
     _regionMapper    = regionMapper;
     _biasFileWriter  = biasFileWriter;
     _intervalSet     = intervalSet;
 }
예제 #11
0
 public SomaticVariantCaller(IAlignmentSource alignmentSource, ICandidateVariantFinder variantFinder, IAlleleCaller alleleCaller,
                             IVcfWriter <CalledAllele> vcfWriter, IStateManager stateManager, ChrReference chrReference, IRegionMapper regionMapper,
                             IStrandBiasFileWriter biasFileWriter, ChrIntervalSet intervalSet = null, HashSet <Tuple <string, int, string, string> > forcedGTAlleles = null)
 {
     _alignmentSource = alignmentSource;
     _variantFinder   = variantFinder;
     _alleleCaller    = alleleCaller;
     _vcfWriter       = vcfWriter;
     _stateManager    = stateManager;
     _chrReference    = chrReference;
     _regionMapper    = regionMapper;
     _biasFileWriter  = biasFileWriter;
     _intervalSet     = intervalSet;
     _forcedGtAlleles = forcedGTAlleles;
     _unProcessedForcedAllelesByPos = CreateForcedAllelePos(_forcedGtAlleles);
 }
예제 #12
0
 protected virtual IStateManager CreateStateManager(ChrIntervalSet intervalSet, bool expectStitchedReads = false, bool expectCollapsedReads = true)
 {
     if (expectStitchedReads && expectCollapsedReads)
     {
         // Create CollapsedRegionStateManager if input BAM is collapsed and stitched.
         return(new CollapsedRegionStateManager(_options.VcfWritingParameters.OutputGvcfFile,
                                                _options.BamFilterParameters.MinimumBaseCallQuality, intervalSet,
                                                trackOpenEnded: _options.Collapse, blockSize: GlobalConstants.RegionSize,
                                                trackReadSummaries: _options.CoverageMethod == CoverageMethod.Exact, trackedAnchorSize: (int)_options.TrackedAnchorSize));
     }
     // otherwise use the base
     return(new RegionStateManager(_options.VcfWritingParameters.OutputGvcfFile,
                                   _options.BamFilterParameters.MinimumBaseCallQuality, expectStitchedReads,
                                   intervalSet,
                                   trackOpenEnded: _options.Collapse, blockSize: GlobalConstants.RegionSize,
                                   trackReadSummaries: _options.CoverageMethod == CoverageMethod.Exact, numAnchorTypes: (int)_options.TrackedAnchorSize));
 }
예제 #13
0
        private ChrIntervalSet GetIntervalSet(string chrName, string bamFilePath)
        {
            ChrIntervalSet chrIntervalSet = null;

            if (_bamIntervalLookup.ContainsKey(bamFilePath))
            {
                var bamIntervals = _bamIntervalLookup[bamFilePath];
                var chrRegions   = bamIntervals.ContainsKey(chrName)
                    ? bamIntervals[chrName]
                    : new List <Region>();  // empty means intervals applied, but none found for this chromosome

                chrIntervalSet = new ChrIntervalSet(chrRegions, chrName);
                chrIntervalSet.SortAndCollapse(); // make sure intervals are valid
            }

            return(chrIntervalSet);
        }
 public RegionStateManager(bool includeRefAlleles     = false, int minBasecallQuality = 20,
                           bool expectStitchedReads   = false,
                           ChrIntervalSet intervalSet = null, int blockSize            = 1000,
                           bool trackOpenEnded        = false, bool trackReadSummaries = false,
                           bool trackAmpliconCounts   = false,
                           int numAnchorTypes         = 5)
 {
     _regionSize          = blockSize;
     _minBasecallQuality  = minBasecallQuality;
     _includeRefAlleles   = includeRefAlleles;
     _intervalSet         = intervalSet;
     _trackOpenEnded      = trackOpenEnded;
     _trackReadSummaries  = trackReadSummaries;
     ExpectStitchedReads  = expectStitchedReads;
     ExpectCollapsedReads = false;
     _numAnchorTypes      = numAnchorTypes;
     _trackAmpliconCounts = trackAmpliconCounts;
 }
예제 #15
0
        private void ExecuteTest_GetClipped(BasicRegion clipRegion, List <BasicRegion> excludeRegions, List <BasicRegion> expectedRegions = null)
        {
            var intervalSet = new ChrIntervalSet(
                new List <BasicRegion>()
            {
                new BasicRegion(5, 10),
                new BasicRegion(20, 30),
                new BasicRegion(40, 50),
            }, "chr1");

            var results = intervalSet.GetClipped(clipRegion, excludeRegions);

            Assert.Equal(expectedRegions.Count, results.Count);

            for (var i = 0; i < expectedRegions.Count; i++)
            {
                Assert.Equal(expectedRegions[i], results[i]);
            }
        }
예제 #16
0
        public void Constructor()
        {
            //Happy Path
            var intervalSet = new ChrIntervalSet(new List <BasicRegion>()
            {
                new BasicRegion(4, 6),
                new BasicRegion(8, 8),
                new BasicRegion(9, 10)
            }, "chr1");

            Assert.Equal(4, intervalSet.MinPosition);
            Assert.Equal(10, intervalSet.MaxPosition);
            Assert.Equal("chr1", intervalSet.ChrName);

            intervalSet = new ChrIntervalSet(new List <BasicRegion>(), "chr1");

            Assert.Equal(0, intervalSet.MinPosition);
            Assert.Equal(0, intervalSet.MaxPosition);

            //Null Intervals
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(null, "chr1"));
            //Null Chromosome Name
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>(), null));
            //Empty Chromosome Name
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>(), ""));
            //Invalid Interval - StartPosition > End Position
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>()
            {
                new BasicRegion(7, 6)
            }, "chr1"));
            //Invalid Interval - StartPosition <= 0
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>()
            {
                new BasicRegion(0, 6)
            }, "chr1"));
            //Invalid Interval - EndPosition <= 0
            Assert.Throws <ArgumentException>(() => new ChrIntervalSet(new List <BasicRegion>()
            {
                new BasicRegion(7, 0)
            }, "chr1"));
        }
예제 #17
0
        public void ForceAlleleNotBeReportedWhenOutOfInterval()
        {
            var testRegion = new MyRegionState(10, 15);
            var myChrRef   = new ChrReference()
            {
                Name     = "chr1",
                Sequence = "ATGGCCTACGATTAGTAGGT"
            };
            HashSet <Tuple <string, int, string, string> > forcedAlleles = new HashSet <Tuple <string, int, string, string> >
            {
                new Tuple <string, int, string, string>("chr1", 12, "T", "C"),
                new Tuple <string, int, string, string>("chr1", 12, "T", "A")
            };

            var myIntervals = new ChrIntervalSet(new List <Region> {
                new Region(13, 15)
            }, "chr1");

            var observedCandidateAllele = testRegion.GetAllCandidates(true, myChrRef, myIntervals, forcedAlleles);

            Assert.Equal(3, observedCandidateAllele.Count);
            Assert.True(observedCandidateAllele.All(x => x.ReferencePosition > 12));
        }
예제 #18
0
        public List <CalledAllele> DoFilteringByStartPosition(List <CalledAllele> alleles, ChrIntervalSet chrIntervalSet)
        {
            //these should all be co-located alleles
            var testAllele = alleles[0];
            var result     = new List <CalledAllele> {
            };

            if (chrIntervalSet.ContainsPosition(testAllele.ReferencePosition))
            {
                result = alleles;
            }

            chrIntervalSet.SetCleared(testAllele.ReferencePosition);

            return(result);
        }
예제 #19
0
 protected override IRegionPadder CreateRegionPadder(ChrReference chrReference, ChrIntervalSet intervalSet, bool includeReferences)
 {
     return(MockRegionMapper != null ? MockRegionMapper.Object : base.CreateRegionPadder(chrReference, intervalSet, includeReferences));
 }
예제 #20
0
 protected override IStateManager CreateStateManager(ChrIntervalSet intervalSet)
 {
     return(MockStateManager != null ? MockStateManager.Object : base.CreateStateManager(intervalSet));
 }
예제 #21
0
 protected override IAlleleCaller CreateVariantCaller(ChrReference chrReference, ChrIntervalSet intervalSet)
 {
     return(MockVariantCaller != null ? MockVariantCaller.Object : base.CreateVariantCaller(chrReference, intervalSet));
 }
예제 #22
0
        private HashSet <Tuple <string, int, string, string> > SelectForcedAllele(Dictionary <string, HashSet <Tuple <string, int, string, string> > > forcedAllelesByChrom, string referenceName, ChrIntervalSet intervalSet)
        {
            var forcedGtAlleles = _forcedAllelesByChrom.ContainsKey(referenceName) ? _forcedAllelesByChrom[referenceName] : new HashSet <Tuple <string, int, string, string> >();

            if (intervalSet == null)
            {
                return(forcedGtAlleles);
            }
            var allelesInInterval = new HashSet <Tuple <string, int, string, string> >();

            foreach (var allele in forcedGtAlleles)
            {
                if (allele.Item1 == intervalSet.ChrName && intervalSet.ContainsPosition(allele.Item2))
                {
                    allelesInInterval.Add(allele);
                }
            }

            return(allelesInInterval);
        }
예제 #23
0
 protected virtual IRegionMapper CreateRegionPadder(ChrReference chrReference, ChrIntervalSet intervalSet, bool includeReference)
 {
     // padder is only required if there are intervals and we are including reference calls
     return(intervalSet == null || !_options.VcfWritingParameters.OutputGvcfFile ? null : new RegionMapper(chrReference, intervalSet, _options.BamFilterParameters.MinimumBaseCallQuality));
 }
예제 #24
0
        protected virtual IAlleleCaller CreateVariantCaller(ChrReference chrReference, ChrIntervalSet intervalSet, IAlignmentSource alignmentSource, HashSet <Tuple <string, int, string, string> > forceGtAlleles = null)
        {
            var coverageCalculator = CreateCoverageCalculator(alignmentSource);
            var genotypeCalculator = GenotypeCreator.CreateGenotypeCalculator(
                _options.VariantCallingParameters.PloidyModel, _options.VariantCallingParameters.MinimumFrequencyFilter,
                _options.VariantCallingParameters.MinimumCoverage,
                _options.VariantCallingParameters.DiploidSNVThresholdingParameters,
                _options.VariantCallingParameters.DiploidINDELThresholdingParameters,
                _options.VariantCallingParameters.AdaptiveGenotypingParameters,
                _options.VariantCallingParameters.MinimumGenotypeQScore,
                _options.VariantCallingParameters.MaximumGenotypeQScore,
                _options.VariantCallingParameters.TargetLODFrequency,
                _options.VariantCallingParameters.MinimumFrequency,
                chrReference.Name, _options.VariantCallingParameters.IsMale);

            genotypeCalculator.SetMinFreqFilter(_options.VariantCallingParameters.MinimumFrequencyFilter);

            var locusProcessor = _options.VariantCallingParameters.PloidyModel == PloidyModel.DiploidByThresholding
                ? (ILocusProcessor) new DiploidLocusProcessor()
                : new SomaticLocusProcessor();

            var variantCallerConfig = new VariantCallerConfig
            {
                IncludeReferenceCalls        = _options.VcfWritingParameters.OutputGvcfFile,
                MinVariantQscore             = _options.VariantCallingParameters.MinimumVariantQScore,
                MaxVariantQscore             = _options.VariantCallingParameters.MaximumVariantQScore,
                MinGenotypeQscore            = _options.VariantCallingParameters.MinimumGenotypeQScore,
                MaxGenotypeQscore            = _options.VariantCallingParameters.MaximumGenotypeQScore,
                VariantQscoreFilterThreshold = _options.VariantCallingParameters.MinimumVariantQScoreFilter,
                NoCallFilterThreshold        = _options.VariantCallingParameters.NoCallFilterThreshold,
                AmpliconBiasFilterThreshold  = _options.VariantCallingParameters.AmpliconBiasFilterThreshold,
                MinCoverage  = _options.VariantCallingParameters.MinimumCoverage,
                MinFrequency = genotypeCalculator.MinVarFrequency,
                NoiseLevelUsedForQScoring  = _options.VariantCallingParameters.NoiseLevelUsedForQScoring,
                StrandBiasModel            = _options.VariantCallingParameters.StrandBiasModel,
                StrandBiasFilterThreshold  = _options.VariantCallingParameters.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = _options.VariantCallingParameters.FilterOutVariantsPresentOnlyOneStrand,
                GenotypeCalculator         = genotypeCalculator,
                VariantFreqFilter          = genotypeCalculator.MinVarFrequencyFilter,
                LowGTqFilter       = _options.VariantCallingParameters.LowGenotypeQualityFilter,
                IndelRepeatFilter  = _options.VariantCallingParameters.IndelRepeatFilter,
                LowDepthFilter     = _options.VariantCallingParameters.LowDepthFilter,
                ChrReference       = chrReference,
                RMxNFilterSettings = new RMxNFilterSettings
                {
                    RMxNFilterMaxLengthRepeat = _options.VariantCallingParameters.RMxNFilterMaxLengthRepeat,
                    RMxNFilterMinRepetitions  = _options.VariantCallingParameters.RMxNFilterMinRepetitions,
                    RMxNFilterFrequencyLimit  = _options.VariantCallingParameters.RMxNFilterFrequencyLimit
                },
                NoiseModel     = _options.VariantCallingParameters.NoiseModel,
                LocusProcessor = locusProcessor
            };



            var alleleCaller = new AlleleCaller(variantCallerConfig, intervalSet,
                                                CreateVariantCollapser(chrReference.Name, coverageCalculator),
                                                coverageCalculator);

            alleleCaller.AddForcedGtAlleles(forceGtAlleles);

            return(alleleCaller);
        }
 protected override IStateManager CreateStateManager(ChrIntervalSet intervalSet, bool expectStitchedReads = false, bool expectCollapsedReads = false)
 {
     return(MockStateManager != null ? MockStateManager.Object : base.CreateStateManager(intervalSet));
 }
예제 #26
0
 /// <summary>
 /// Sole job is to pad empty reference calls when using intervals.  Assumes batch has already included reference calls (either empty or not)
 /// for cleared regions.
 /// </summary>
 /// <param name="chrReference"></param>
 /// <param name="includeReferenceCalls"></param>
 /// <param name="intervals"></param>
 public RegionPadder(ChrReference chrReference, ChrIntervalSet intervals)
 {
     _chrReference = chrReference;
     IntervalSet   = intervals;
 }
예제 #27
0
 public AlleleCaller(VariantCallerConfig config, ChrIntervalSet intervalSet = null)
 {
     _config      = config;
     _intervalSet = intervalSet;
 }
예제 #28
0
        public List <CandidateAllele> GetAllCandidates(bool includeRefAlleles, ChrReference chrReference,
                                                       ChrIntervalSet intervals = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null)
        {
            var alleles = new List <CandidateAllele>();

            // add all candidates - these are potentially collapsable targets
            foreach (var positionLookup in _candidateVariantsLookup)
            {
                if (positionLookup != null)
                {
                    alleles.AddRange(positionLookup);
                }
            }

            var IntervalsInUse = includeRefAlleles ? intervals : CreateIntervalsFromAllels(chrReference, forcesGtAlleles);

            if (includeRefAlleles || (forcesGtAlleles != null && forcesGtAlleles.Count != 0))
            {
                var regionsToFetch = IntervalsInUse == null
                    ? new List <Region> {
                    this
                }                                      // fetch whole block region
                    : IntervalsInUse.GetClipped(this); // clip intervals to block region

                for (var i = 0; i < regionsToFetch.Count; i++)
                {
                    var clippedInterval = regionsToFetch[i];
                    for (var position = clippedInterval.StartPosition;
                         position <= clippedInterval.EndPosition;
                         position++)
                    {
                        var positionIndex = position - StartPosition;

                        // add ref alleles within region to fetch - note that zero coverage ref positions are only added if input intervals provided
                        if (position > chrReference.Sequence.Length)
                        {
                            break;
                        }

                        var refBase = chrReference.Sequence[position - 1].ToString();

                        var refBaseIndex = (int)AlleleHelper.GetAlleleType(refBase);
                        var refAllele    = new CandidateAllele(chrReference.Name, position,
                                                               refBase, refBase, AlleleCategory.Reference);

                        // gather support for allele
                        var totalSupport = 0;

                        for (var alleleTypeIndex = 0; alleleTypeIndex < Constants.NumAlleleTypes; alleleTypeIndex++)
                        {
                            for (var directionIndex = 0; directionIndex < Constants.NumDirectionTypes; directionIndex++)
                            {
                                var count = 0;
                                for (int anchorIndex = 0; anchorIndex < NumAnchorIndexes; anchorIndex++)
                                {
                                    var countForAnchorType = _alleleCounts[positionIndex, alleleTypeIndex, directionIndex, anchorIndex];
                                    count += countForAnchorType;
                                }

                                if (alleleTypeIndex == refBaseIndex)
                                {
                                    refAllele.SupportByDirection[directionIndex] = count;

                                    // TODO this isn't really proven to be well-anchored, nor is it proven not to be
                                    //refAllele.WellAnchoredSupportByDirection[directionIndex] = count;
                                }

                                totalSupport += count;
                            }
                        }

                        if (IntervalsInUse != null || totalSupport > 0)
                        {
                            alleles.Add(refAllele);
                        }
                    }
                }
            }

            return(alleles);
        }
예제 #29
0
        public void ExecuteTest_GetCandidates(bool withReference, bool withIntervals)
        {
            var testRegion   = new RegionState(1, 50);
            var chrReference = new ChrReference()
            {
                Name     = "chr1",
                Sequence = string.Concat(Enumerable.Repeat("A", 50))
            };
            var snv1 = new CandidateAllele("chr1", 5, "A", "T", AlleleCategory.Snv)
            {
                SupportByDirection = new [] { 10, 5, 0 }
            };
            var snv2 = new CandidateAllele("chr1", 15, "A", "T", AlleleCategory.Snv)
            {
                SupportByDirection = new[] { 10, 5, 0 }
            };

            testRegion.AddCandidate(snv1);
            testRegion.AddCandidate(snv2);

            for (var i = 0; i < 5; i++)
            {
                testRegion.AddAlleleCount(5, AlleleType.A, DirectionType.Stitched);  // ref @ variant position
                testRegion.AddAlleleCount(6, AlleleType.A, DirectionType.Stitched);  // ref by itself
                testRegion.AddAlleleCount(10, AlleleType.C, DirectionType.Stitched); // nonref by itself (no ref)
                testRegion.AddAlleleCount(15, AlleleType.A, DirectionType.Reverse);  // ref (multiple directions) + nonref
                testRegion.AddAlleleCount(15, AlleleType.A, DirectionType.Forward);
                testRegion.AddAlleleCount(15, AlleleType.T, DirectionType.Reverse);
            }

            ChrIntervalSet intervals = null;

            if (withIntervals)
            {
                intervals = new ChrIntervalSet(new List <CallSomaticVariants.Logic.RegionState.Region>()
                {
                    new CallSomaticVariants.Logic.RegionState.Region(3, 6),
                    new CallSomaticVariants.Logic.RegionState.Region(16, 16)
                }, "chr1");
            }
            var expectedList = new List <CandidateAllele>();

            expectedList.Add(snv1);
            expectedList.Add(snv2);

            if (withReference)
            {
                expectedList.Add(new CandidateAllele("chr1", 5, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 0, 0, 5 }
                });
                expectedList.Add(new CandidateAllele("chr1", 6, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 0, 0, 5 }
                });
                expectedList.Add(new CandidateAllele("chr1", 10, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 0, 0, 0 }
                });
                expectedList.Add(new CandidateAllele("chr1", 15, "A", "A", AlleleCategory.Reference)
                {
                    SupportByDirection = new[] { 5, 5, 0 }
                });
            }

            if (withIntervals)
            {
                expectedList = expectedList.Where(c => c.Coordinate == 5 || c.Coordinate == 6 || c.Type != AlleleCategory.Reference).ToList();
                if (withReference)
                {
                    expectedList.Add(new CandidateAllele("chr1", 3, "A", "A", AlleleCategory.Reference)
                    {
                        SupportByDirection = new[] { 0, 0, 0 }
                    });
                    expectedList.Add(new CandidateAllele("chr1", 4, "A", "A", AlleleCategory.Reference)
                    {
                        SupportByDirection = new[] { 0, 0, 0 }
                    });
                    expectedList.Add(new CandidateAllele("chr1", 16, "A", "A", AlleleCategory.Reference)
                    {
                        SupportByDirection = new[] { 0, 0, 0 }
                    });
                }
            }
            var allCandidates = testRegion.GetAllCandidates(withReference, chrReference, intervals);

            VerifyCandidates(expectedList, allCandidates);
        }
예제 #30
0
        public List <CalledAllele> DoFilteringByExpandingRegion(List <CalledAllele> alleles, ChrIntervalSet chrIntervalSet)
        {
            //these should all be co-located alleles
            var emptyResult = new List <CalledAllele> {
            };
            var testAllele  = alleles[0];


            if (chrIntervalSet.ContainsPosition(testAllele.ReferencePosition))
            {
                chrIntervalSet.SetCleared(testAllele.ReferencePosition);
                return(alleles);
            }
            else //we already know the start positons are NOT in the interval. Now check the rest of the bases.
            {
                bool expandInterval = false;

                foreach (var allele in alleles)
                {
                    int startPosPlusOne = allele.ReferencePosition + 1;
                    int endPos          = allele.ReferencePosition + allele.ReferenceAllele.Length - 1;

                    for (int internalPosition = startPosPlusOne; internalPosition <= endPos; internalPosition++)
                    {
                        if (chrIntervalSet.ContainsPosition(internalPosition))
                        {
                            chrIntervalSet.ExpandInterval(internalPosition, testAllele.ReferencePosition);
                            expandInterval = true;
                            break;
                        }
                    }
                }

                chrIntervalSet.SetCleared(testAllele.ReferencePosition);

                if (expandInterval)
                {
                    return(alleles);
                }
                else
                {
                    return(emptyResult);
                }
            }
        }