Пример #1
0
        public void CreateAndAdd()
        {
            var candidates = new List <CandidateAllele>()
            {
                new CandidateAllele("chr1", 100, "A", "AT", AlleleCategory.Insertion),
                new CandidateAllele("chr1", 100, "A", "A", AlleleCategory.Reference),
                new CandidateAllele("chr1", 100, "A", "T", AlleleCategory.Snv),
                new CandidateAllele("chr1", 200, "A", "T", AlleleCategory.Mnv)
            };

            var batch           = new CandidateBatch(candidates);
            var batchCandidates = batch.GetCandidates();

            Assert.Equal(4, batchCandidates.Count);
            foreach (var candidate in candidates)
            {
                Assert.True(batchCandidates.Contains(candidate));
            }

            // add new candidate
            var newCandidate = new CandidateAllele("chr1", 200, "AT", "A", AlleleCategory.Deletion);

            batch.Add(new List <CandidateAllele>()
            {
                newCandidate
            });

            Assert.Equal(5, batchCandidates.Count);
            Assert.True(batchCandidates.Contains(newCandidate));
        }
Пример #2
0
        public void CallVariants_MnvReallocatesToSnvOutsideInterval()
        {
            var config = new VariantCallerConfig
            {
                MaxVariantQscore         = 100,
                EstimatedBaseCallQuality = 20,
                IncludeReferenceCalls    = true,
                MinFrequency             = 6f / 150
            };

            var intervalSet = new ChrIntervalSet(new List <Region>()
            {
                new Region(1900, 1950)
            }, "chr1");
            var variantCaller = new AlleleCaller(config, intervalSet);

            // -----------------------------------------------
            // Passing MNV that spans interval edge should be called if it begins within intervals
            // Failing MNVs that span interval edge and are reallocated to SNVs should only have those SNVs called if they are within intervals
            // (broken-out SNVs outside intervals should not be called even if they gain enough support to be called).
            // -----------------------------------------------

            var passingMnv = new CandidateAllele("chr1", 1950, "TTT", "CCC", AlleleCategory.Mnv)
            {
                SupportByDirection = new[] { 10, 0, 0 }
            };
            var failingMnv1 = new CandidateAllele("chr1", 1950, "TTT", "GGG", AlleleCategory.Mnv) // only the first SNV should be called (1950 T>G)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };
            var failingMnv1Booster = new CandidateAllele("chr1", 1949, "TTTT", "GGGG", AlleleCategory.Mnv) // only the second SNV should be called (1950 T>G)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };
            var failingMnv2 = new CandidateAllele("chr1", 1950, "TTT", "AAA", AlleleCategory.Mnv) // none of these should be called
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };

            var mockStateManager = MockStateManager(306, 0);

            var candidateVariants = new List <CandidateAllele>
            {
                passingMnv,
                failingMnv1,
                failingMnv2,
                failingMnv1Booster
            };

            var batch = new CandidateBatch(candidateVariants)
            {
                MaxClearedPosition = 2000
            };

            var calledVariants = variantCaller.Call(batch, mockStateManager.Object);

            PrintResults(calledVariants.ToList());

            Assert.Equal(2, calledVariants.Count());
        }
        /// <summary>
        /// Only pass back candidates from blocks where the entire block region is less than upToPosition.
        /// The second criteria is to ensure variants that span blocks have fully completed info in either flanking block.
        /// </summary>
        /// <param name="upToPosition"></param>
        /// /// <param name="chrReference"></param>
        /// /// <param name="intervalSet"></param>
        /// <returns></returns>
        public virtual ICandidateBatch GetCandidatesToProcess(int?upToPosition, ChrReference chrReference = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null)
        {
            try
            {
                // only create a real batch if we haved moved onto another block
                if (upToPosition.HasValue && GetBlockKey(upToPosition.Value) == _lastUpToBlockKey)
                {
                    return(null);
                }

                var batch = new CandidateBatch {
                    MaxClearedPosition = upToPosition.HasValue ? -1 : (int?)null
                };

                var blockKeys = upToPosition.HasValue
                    ? _regionLookup.Keys.Where(k => k * _regionSize <= upToPosition).ToArray()
                    : _regionLookup.Keys.ToArray();

                var blocks = new List <MyRegionState>();

                Array.Sort(blockKeys); // need to sort the keys so we can bounce out as soon as we hit a held block

                foreach (var key in blockKeys)
                {
                    var block = _regionLookup[key];
                    if (upToPosition != null && block.MaxAlleleEndpoint > upToPosition)
                    {
                        break;
                    }
                    //Console.WriteLine("block start="+ block.StartPosition+" ;block end = "+ block.EndPosition);
                    batch.Add(block.GetAllCandidates(_includeRefAlleles, chrReference, _intervalSet, forcesGtAlleles));
                    batch.BlockKeys.Add(key);
                    blocks.Add(block);
                }

                if (blocks.Any())
                {
                    batch.ClearedRegions     = new List <Region>(blocks.Select(b => b as Region));
                    batch.MaxClearedPosition = blocks.Max(b => b.EndPosition);

                    if (upToPosition.HasValue && blocks.Max(b => b.MaxAlleleEndpoint) > batch.MaxClearedPosition.Value && _trackOpenEnded)
                    {
                        AddCollapsableFromOtherBlocks(batch,
                                                      batch.MaxClearedPosition.Value,
                                                      upToPosition.Value);
                    }
                }

                return(batch);
            }
            finally
            {
                _lastUpToBlockKey = upToPosition.HasValue ? GetBlockKey(upToPosition.Value) : -1;
                // doesnt matter what we set to for last round
            }
        }
Пример #4
0
        private void ExecuteTest(RegionPadder mapper, CandidateBatch batch,
                                 List <CandidateAllele> expectedAlleles, bool mapAll = false)
        {
            mapper.Pad(batch, mapAll);
            var candidates = batch.GetCandidates();

            Assert.Equal(expectedAlleles.Count, candidates.Count());

            foreach (var candidate in candidates)
            {
                Assert.True(expectedAlleles.Contains(candidate));
            }
        }
Пример #5
0
        public override ICandidateBatch GetCandidatesToProcess(int?upToPosition, ChrReference chrReference = null, HashSet <Tuple <string, int, string, string> > forcedGtAlleles = null)
        {
            try
            {
                // only create a real batch if we haved moved onto another block
                if (upToPosition.HasValue && GetBlockKey(upToPosition.Value) == _lastUpToBlockKey)
                {
                    return(null);
                }

                var batch = new CandidateBatch {
                    MaxClearedPosition = upToPosition.HasValue ? -1 : (int?)null
                };

                var blockKeys = upToPosition.HasValue
                    ? _regionLookup.Keys.Where(k => k * _regionSize <= upToPosition).ToArray()
                    : _regionLookup.Keys.ToArray();

                var blocksToRealign = new List <MyRegionState>();

                Array.Sort(blockKeys); // need to sort the keys so we can bounce out as soon as we hit a held block

                foreach (var key in blockKeys)
                {
                    // add candidates from everyone
                    var block = _regionLookup[key];
                    batch.Add(block.GetAllCandidates(false, chrReference, null));

                    // only realign blocks that havent been cleared and are one window away from upToPosition
                    if (block.StartPosition > _lastMaxClearedPosition &&
                        (upToPosition.HasValue && block.EndPosition + _regionSize < upToPosition))
                    {
                        batch.BlockKeys.Add(key);
                        blocksToRealign.Add(block);
                    }
                }

                if (blocksToRealign.Any())
                {
                    batch.ClearedRegions     = new List <Region>(blocksToRealign.Select(b => b as Region));
                    batch.MaxClearedPosition = blocksToRealign.Max(b => b.EndPosition);
                }

                return(batch);
            }
            finally
            {
                _lastUpToBlockKey = upToPosition.HasValue ? GetBlockKey(upToPosition.Value) : -1;
                // doesnt matter what we set to for last round
            }
        }
Пример #6
0
        private MockFactoryWithDefaults GetMockedFlowFactory(int numIterations)
        {
            var currentIteration = 0;

            var factory = new MockFactoryWithDefaults(new ApplicationOptions());

            // alignment source
            var mockAlignmentSource = new Mock <IAlignmentSource>();

            mockAlignmentSource.Setup(s => s.GetNextAlignmentSet()).Returns(() =>
                                                                            currentIteration < numIterations ? new AlignmentSet(TestHelper.CreateRead(_chrReference.Name, "AAA", 1 + currentIteration++), null) : null);
            mockAlignmentSource.Setup(s => s.LastClearedPosition).Returns(() => currentIteration);
            mockAlignmentSource.Setup(s => s.ChromosomeFilter).Returns(_chrReference.Name);
            factory.MockAlignmentSource = mockAlignmentSource;

            // state manager
            _candidateList = new List <CandidateAllele>()
            {
                new CandidateAllele("chr1", 100, "A", "G", AlleleCategory.Snv)
            };
            _batch = new CandidateBatch(_candidateList);

            var mockStateManager = new Mock <IStateManager>();

            mockStateManager.Setup(s => s.GetCandidatesToProcess(It.IsAny <int?>(), _chrReference)).Returns(_batch);
            factory.MockStateManager = mockStateManager;

            // variant finder
            var mockVariantFinder = new Mock <ICandidateVariantFinder>();

            mockVariantFinder.Setup(v => v.FindCandidates(It.IsAny <AlignmentSet>(), _chrReference.Sequence, _chrReference.Name)).Returns(_candidateList);
            factory.MockVariantFinder = mockVariantFinder;

            // variant caller
            var mockVariantCaller = new Mock <IAlleleCaller>();

            mockVariantCaller.Setup(v => v.Call(_batch, mockStateManager.Object)).Returns(_calledList);
            factory.MockVariantCaller = mockVariantCaller;

            // region mapper
            var mockRegionMapper = new Mock <IRegionPadder>();

            factory.MockRegionMapper = mockRegionMapper;

            return(factory);
        }
Пример #7
0
        /// <summary>
        /// Only pass back candidates from blocks where the entire block region is less than upToPosition
        /// and there's a fully completed block after it.  The second criteria is to ensure variants that
        /// span blocks have fully completed info in either flanking block.
        /// </summary>
        /// <param name="upToPosition"></param>
        /// /// <param name="chrReference"></param>
        /// /// <param name="intervalSet"></param>
        /// <returns></returns>
        public ICandidateBatch GetCandidatesToProcess(int?upToPosition, ChrReference chrReference = null)
        {
            var batch = new CandidateBatch {
                MaxClearedPosition = upToPosition.HasValue ? -1 : (int?)null
            };

            // only create a real batch if we haved moved onto another block
            if (!upToPosition.HasValue || GetBlockKey(upToPosition.Value) != _lastUpToKey)
            {
                var blockKeys = upToPosition.HasValue
                    ? _regionLookup.Keys.Where(k => (k + 1) * _regionSize <= upToPosition).ToArray()
                    : _regionLookup.Keys.ToArray();

                var blocks = new List <RegionState>();

                Array.Sort(blockKeys); // need to sort the keys so we can bounce out as soon as we hit a held block

                foreach (var key in blockKeys)
                {
                    var block = _regionLookup[key];
                    if (upToPosition != null && block.MaxAlleleEndpoint > upToPosition)
                    {
                        break;
                    }
                    batch.Add(block.GetAllCandidates(_includeRefAlleles, chrReference, _intervalSet));
                    batch.BlockKeys.Add(key);
                    blocks.Add(block);
                }

                if (blocks.Any())
                {
                    batch.ClearedRegions     = new List <Region>(blocks.Select(b => b as Region));
                    batch.MaxClearedPosition = blocks.Max(b => b.EndPosition);
                }
            }

            _lastUpToKey = upToPosition.HasValue ? GetBlockKey(upToPosition.Value) : -1;  // doesnt matter what we set to for last round

            return(batch);
        }
Пример #8
0
        public void CallVariants_MnvReallocatesToDifferentBlock()
        {
            var config = new VariantCallerConfig
            {
                MaxVariantQscore          = 100,
                NoiseLevelUsedForQScoring = 20,
                IncludeReferenceCalls     = true,
                MinCoverage      = 0,
                MinVariantQscore = 0,
                MinFrequency     = 6f / 150,
                ChrReference     = new ChrReference
                {
                    Sequence = "ACGTACGT",
                    Name     = "Boo"
                },
                GenotypeCalculator = new SomaticGenotyper(),
                LocusProcessor     = new SomaticLocusProcessor()
            };

            var variantCaller = new AlleleCaller(config);

            var passingMnv = new CandidateAllele("chr1", 1999, "TTT", "CCC", AlleleCategory.Mnv)
            {
                SupportByDirection = new[] { 10, 0, 0 }
            };

            var failingMnv = new CandidateAllele("chr1", 2000, "TTT", "GGG", AlleleCategory.Mnv)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };
            var failingMnv2 = new CandidateAllele("chr1", 1999, "TTT", "AAA", AlleleCategory.Mnv)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };
            var failingGappedMnv = new CandidateAllele("chr1", 2000, "TTT", "ATA", AlleleCategory.Mnv)
            {
                SupportByDirection = new[] { 5, 0, 0 }
            };


            var mockStateManager = MockStateManager(306, 0);


            variantCaller = new AlleleCaller(config);

            var candidateVariants = new List <CandidateAllele>
            {
                passingMnv,
                failingMnv,
                failingMnv2,
                failingGappedMnv
            };

            var batch = new CandidateBatch(candidateVariants)
            {
                MaxClearedPosition = 2000
            };

            var BaseCalledAlleles = variantCaller.Call(batch, mockStateManager.Object);

            mockStateManager.Setup(c => c.AddCandidates(It.IsAny <IEnumerable <CandidateAllele> >()))
            .Callback((IEnumerable <CandidateAllele> vars) => Console.WriteLine(vars.Count()));
            mockStateManager.Verify(c => c.AddCandidates(It.IsAny <IEnumerable <CandidateAllele> >()), Times.Once);

            // For regular MNVs that span blocks, whole sub-MNV belonging to next block should be passed over together.
            // If it begins with a ref, should skip that ref and just deliver the rest of the MNV. Thus we should have the following added to the next block:
            //  - MNV at 2001 from failingMnv
            //  - SNV at 2001 from failingMnv2
            //  - SNV at 2002 from failingGappedMnv

            mockStateManager.Verify(c => c.AddCandidates(It.Is <IEnumerable <CandidateAllele> >(x => x.Count() == 3)), Times.Once);
            mockStateManager.Verify(c => c.AddCandidates(It.Is <IEnumerable <CandidateAllele> >(x =>
                                                                                                x.Count(a => a.ReferencePosition == 2001) == 2 &&
                                                                                                x.Count(a => a.ReferencePosition == 2002) == 1)),
                                    Times.Once);
            mockStateManager.Verify(c => c.AddCandidates(It.Is <IEnumerable <CandidateAllele> >(x =>
                                                                                                x.Count(a => a.ReferencePosition == 2001 && a.Type == AlleleCategory.Mnv) == 1 &&
                                                                                                x.Count(a => a.ReferencePosition == 2001 && a.Type == AlleleCategory.Snv) == 1 &&
                                                                                                x.Count(a => a.ReferencePosition == 2001 && a.Type == AlleleCategory.Reference) == 0 &&
                                                                                                x.Count(a => a.ReferencePosition == 2002 && a.Type == AlleleCategory.Snv) == 1
                                                                                                )),
                                    Times.Once);

            var variants = BaseCalledAlleles.Values.SelectMany(v => v);

            PrintResults(variants.ToList());

            Assert.True(variants.Any(v => MatchVariants(v, passingMnv, 10))); // Passing MNV should have additional support from big failed MNV
        }
Пример #9
0
        public void Map()
        {
            // set up test
            var intervals = new List <CallSomaticVariants.Logic.RegionState.Region>
            {
                new CallSomaticVariants.Logic.RegionState.Region(4, 10),
                new CallSomaticVariants.Logic.RegionState.Region(15, 17),
                new CallSomaticVariants.Logic.RegionState.Region(25, 39),
                new CallSomaticVariants.Logic.RegionState.Region(50, 55),
                new CallSomaticVariants.Logic.RegionState.Region(60, 70),
                new CallSomaticVariants.Logic.RegionState.Region(80, 80)
            };

            var mapper = new RegionPadder(_chrReference, new ChrIntervalSet(intervals, "chr1"));

            // ------------------------------------
            // first batch starts after interval start - make sure beginning positions arent skipped
            // ------------------------------------
            var batch = new CandidateBatch
            {
                ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region>
                {
                    new CallSomaticVariants.Logic.RegionState.Region(5, 11)
                }
            };
            var expectedAlleles = new List <CandidateAllele>();

            AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >()
            {
                new Tuple <int, int>(4, 4)
            });
            ExecuteTest(mapper, batch, expectedAlleles);

            // ------------------------------------
            // next batch and starts after the second interval, fully covers third interval and partially the fourth
            // ------------------------------------
            batch = new CandidateBatch
            {
                ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region>
                {
                    new CallSomaticVariants.Logic.RegionState.Region(20, 52),
                }
            };
            expectedAlleles.Clear();
            AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >()
            {
                new Tuple <int, int>(15, 17)
            });
            ExecuteTest(mapper, batch, expectedAlleles);

            // ------------------------------------
            // next batch contains multiple cleared regions
            // ------------------------------------
            batch = new CandidateBatch
            {
                ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region>
                {
                    new CallSomaticVariants.Logic.RegionState.Region(58, 59),
                    new CallSomaticVariants.Logic.RegionState.Region(62, 68)
                }
            };
            expectedAlleles.Clear();
            AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >()
            {
                new Tuple <int, int>(53, 55),
                new Tuple <int, int>(60, 61)
            });
            ExecuteTest(mapper, batch, expectedAlleles);

            // ------------------------------------
            // empty batch
            // ------------------------------------
            batch = new CandidateBatch
            {
                ClearedRegions = null
            };
            expectedAlleles.Clear();
            ExecuteTest(mapper, batch, expectedAlleles);

            // ------------------------------------
            // all the rest
            // ------------------------------------
            batch = new CandidateBatch
            {
                ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region>
                {
                    new CallSomaticVariants.Logic.RegionState.Region(69, 69)
                }
            };
            expectedAlleles.Clear();
            AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >()
            {
                new Tuple <int, int>(70, 70),
                new Tuple <int, int>(80, 80)
            });
            ExecuteTest(mapper, batch, expectedAlleles, true);
        }