public void CreateAndAdd() { var candidates = new List <CandidateAllele>() { new CandidateAllele("chr1", 100, "A", "AT", AlleleCategory.Insertion), new CandidateAllele("chr1", 100, "A", "A", AlleleCategory.Reference), new CandidateAllele("chr1", 100, "A", "T", AlleleCategory.Snv), new CandidateAllele("chr1", 200, "A", "T", AlleleCategory.Mnv) }; var batch = new CandidateBatch(candidates); var batchCandidates = batch.GetCandidates(); Assert.Equal(4, batchCandidates.Count); foreach (var candidate in candidates) { Assert.True(batchCandidates.Contains(candidate)); } // add new candidate var newCandidate = new CandidateAllele("chr1", 200, "AT", "A", AlleleCategory.Deletion); batch.Add(new List <CandidateAllele>() { newCandidate }); Assert.Equal(5, batchCandidates.Count); Assert.True(batchCandidates.Contains(newCandidate)); }
public void CallVariants_MnvReallocatesToSnvOutsideInterval() { var config = new VariantCallerConfig { MaxVariantQscore = 100, EstimatedBaseCallQuality = 20, IncludeReferenceCalls = true, MinFrequency = 6f / 150 }; var intervalSet = new ChrIntervalSet(new List <Region>() { new Region(1900, 1950) }, "chr1"); var variantCaller = new AlleleCaller(config, intervalSet); // ----------------------------------------------- // Passing MNV that spans interval edge should be called if it begins within intervals // Failing MNVs that span interval edge and are reallocated to SNVs should only have those SNVs called if they are within intervals // (broken-out SNVs outside intervals should not be called even if they gain enough support to be called). // ----------------------------------------------- var passingMnv = new CandidateAllele("chr1", 1950, "TTT", "CCC", AlleleCategory.Mnv) { SupportByDirection = new[] { 10, 0, 0 } }; var failingMnv1 = new CandidateAllele("chr1", 1950, "TTT", "GGG", AlleleCategory.Mnv) // only the first SNV should be called (1950 T>G) { SupportByDirection = new[] { 5, 0, 0 } }; var failingMnv1Booster = new CandidateAllele("chr1", 1949, "TTTT", "GGGG", AlleleCategory.Mnv) // only the second SNV should be called (1950 T>G) { SupportByDirection = new[] { 5, 0, 0 } }; var failingMnv2 = new CandidateAllele("chr1", 1950, "TTT", "AAA", AlleleCategory.Mnv) // none of these should be called { SupportByDirection = new[] { 5, 0, 0 } }; var mockStateManager = MockStateManager(306, 0); var candidateVariants = new List <CandidateAllele> { passingMnv, failingMnv1, failingMnv2, failingMnv1Booster }; var batch = new CandidateBatch(candidateVariants) { MaxClearedPosition = 2000 }; var calledVariants = variantCaller.Call(batch, mockStateManager.Object); PrintResults(calledVariants.ToList()); Assert.Equal(2, calledVariants.Count()); }
/// <summary> /// Only pass back candidates from blocks where the entire block region is less than upToPosition. /// The second criteria is to ensure variants that span blocks have fully completed info in either flanking block. /// </summary> /// <param name="upToPosition"></param> /// /// <param name="chrReference"></param> /// /// <param name="intervalSet"></param> /// <returns></returns> public virtual ICandidateBatch GetCandidatesToProcess(int?upToPosition, ChrReference chrReference = null, HashSet <Tuple <string, int, string, string> > forcesGtAlleles = null) { try { // only create a real batch if we haved moved onto another block if (upToPosition.HasValue && GetBlockKey(upToPosition.Value) == _lastUpToBlockKey) { return(null); } var batch = new CandidateBatch { MaxClearedPosition = upToPosition.HasValue ? -1 : (int?)null }; var blockKeys = upToPosition.HasValue ? _regionLookup.Keys.Where(k => k * _regionSize <= upToPosition).ToArray() : _regionLookup.Keys.ToArray(); var blocks = new List <MyRegionState>(); Array.Sort(blockKeys); // need to sort the keys so we can bounce out as soon as we hit a held block foreach (var key in blockKeys) { var block = _regionLookup[key]; if (upToPosition != null && block.MaxAlleleEndpoint > upToPosition) { break; } //Console.WriteLine("block start="+ block.StartPosition+" ;block end = "+ block.EndPosition); batch.Add(block.GetAllCandidates(_includeRefAlleles, chrReference, _intervalSet, forcesGtAlleles)); batch.BlockKeys.Add(key); blocks.Add(block); } if (blocks.Any()) { batch.ClearedRegions = new List <Region>(blocks.Select(b => b as Region)); batch.MaxClearedPosition = blocks.Max(b => b.EndPosition); if (upToPosition.HasValue && blocks.Max(b => b.MaxAlleleEndpoint) > batch.MaxClearedPosition.Value && _trackOpenEnded) { AddCollapsableFromOtherBlocks(batch, batch.MaxClearedPosition.Value, upToPosition.Value); } } return(batch); } finally { _lastUpToBlockKey = upToPosition.HasValue ? GetBlockKey(upToPosition.Value) : -1; // doesnt matter what we set to for last round } }
private void ExecuteTest(RegionPadder mapper, CandidateBatch batch, List <CandidateAllele> expectedAlleles, bool mapAll = false) { mapper.Pad(batch, mapAll); var candidates = batch.GetCandidates(); Assert.Equal(expectedAlleles.Count, candidates.Count()); foreach (var candidate in candidates) { Assert.True(expectedAlleles.Contains(candidate)); } }
public override ICandidateBatch GetCandidatesToProcess(int?upToPosition, ChrReference chrReference = null, HashSet <Tuple <string, int, string, string> > forcedGtAlleles = null) { try { // only create a real batch if we haved moved onto another block if (upToPosition.HasValue && GetBlockKey(upToPosition.Value) == _lastUpToBlockKey) { return(null); } var batch = new CandidateBatch { MaxClearedPosition = upToPosition.HasValue ? -1 : (int?)null }; var blockKeys = upToPosition.HasValue ? _regionLookup.Keys.Where(k => k * _regionSize <= upToPosition).ToArray() : _regionLookup.Keys.ToArray(); var blocksToRealign = new List <MyRegionState>(); Array.Sort(blockKeys); // need to sort the keys so we can bounce out as soon as we hit a held block foreach (var key in blockKeys) { // add candidates from everyone var block = _regionLookup[key]; batch.Add(block.GetAllCandidates(false, chrReference, null)); // only realign blocks that havent been cleared and are one window away from upToPosition if (block.StartPosition > _lastMaxClearedPosition && (upToPosition.HasValue && block.EndPosition + _regionSize < upToPosition)) { batch.BlockKeys.Add(key); blocksToRealign.Add(block); } } if (blocksToRealign.Any()) { batch.ClearedRegions = new List <Region>(blocksToRealign.Select(b => b as Region)); batch.MaxClearedPosition = blocksToRealign.Max(b => b.EndPosition); } return(batch); } finally { _lastUpToBlockKey = upToPosition.HasValue ? GetBlockKey(upToPosition.Value) : -1; // doesnt matter what we set to for last round } }
private MockFactoryWithDefaults GetMockedFlowFactory(int numIterations) { var currentIteration = 0; var factory = new MockFactoryWithDefaults(new ApplicationOptions()); // alignment source var mockAlignmentSource = new Mock <IAlignmentSource>(); mockAlignmentSource.Setup(s => s.GetNextAlignmentSet()).Returns(() => currentIteration < numIterations ? new AlignmentSet(TestHelper.CreateRead(_chrReference.Name, "AAA", 1 + currentIteration++), null) : null); mockAlignmentSource.Setup(s => s.LastClearedPosition).Returns(() => currentIteration); mockAlignmentSource.Setup(s => s.ChromosomeFilter).Returns(_chrReference.Name); factory.MockAlignmentSource = mockAlignmentSource; // state manager _candidateList = new List <CandidateAllele>() { new CandidateAllele("chr1", 100, "A", "G", AlleleCategory.Snv) }; _batch = new CandidateBatch(_candidateList); var mockStateManager = new Mock <IStateManager>(); mockStateManager.Setup(s => s.GetCandidatesToProcess(It.IsAny <int?>(), _chrReference)).Returns(_batch); factory.MockStateManager = mockStateManager; // variant finder var mockVariantFinder = new Mock <ICandidateVariantFinder>(); mockVariantFinder.Setup(v => v.FindCandidates(It.IsAny <AlignmentSet>(), _chrReference.Sequence, _chrReference.Name)).Returns(_candidateList); factory.MockVariantFinder = mockVariantFinder; // variant caller var mockVariantCaller = new Mock <IAlleleCaller>(); mockVariantCaller.Setup(v => v.Call(_batch, mockStateManager.Object)).Returns(_calledList); factory.MockVariantCaller = mockVariantCaller; // region mapper var mockRegionMapper = new Mock <IRegionPadder>(); factory.MockRegionMapper = mockRegionMapper; return(factory); }
/// <summary> /// Only pass back candidates from blocks where the entire block region is less than upToPosition /// and there's a fully completed block after it. The second criteria is to ensure variants that /// span blocks have fully completed info in either flanking block. /// </summary> /// <param name="upToPosition"></param> /// /// <param name="chrReference"></param> /// /// <param name="intervalSet"></param> /// <returns></returns> public ICandidateBatch GetCandidatesToProcess(int?upToPosition, ChrReference chrReference = null) { var batch = new CandidateBatch { MaxClearedPosition = upToPosition.HasValue ? -1 : (int?)null }; // only create a real batch if we haved moved onto another block if (!upToPosition.HasValue || GetBlockKey(upToPosition.Value) != _lastUpToKey) { var blockKeys = upToPosition.HasValue ? _regionLookup.Keys.Where(k => (k + 1) * _regionSize <= upToPosition).ToArray() : _regionLookup.Keys.ToArray(); var blocks = new List <RegionState>(); Array.Sort(blockKeys); // need to sort the keys so we can bounce out as soon as we hit a held block foreach (var key in blockKeys) { var block = _regionLookup[key]; if (upToPosition != null && block.MaxAlleleEndpoint > upToPosition) { break; } batch.Add(block.GetAllCandidates(_includeRefAlleles, chrReference, _intervalSet)); batch.BlockKeys.Add(key); blocks.Add(block); } if (blocks.Any()) { batch.ClearedRegions = new List <Region>(blocks.Select(b => b as Region)); batch.MaxClearedPosition = blocks.Max(b => b.EndPosition); } } _lastUpToKey = upToPosition.HasValue ? GetBlockKey(upToPosition.Value) : -1; // doesnt matter what we set to for last round return(batch); }
public void CallVariants_MnvReallocatesToDifferentBlock() { var config = new VariantCallerConfig { MaxVariantQscore = 100, NoiseLevelUsedForQScoring = 20, IncludeReferenceCalls = true, MinCoverage = 0, MinVariantQscore = 0, MinFrequency = 6f / 150, ChrReference = new ChrReference { Sequence = "ACGTACGT", Name = "Boo" }, GenotypeCalculator = new SomaticGenotyper(), LocusProcessor = new SomaticLocusProcessor() }; var variantCaller = new AlleleCaller(config); var passingMnv = new CandidateAllele("chr1", 1999, "TTT", "CCC", AlleleCategory.Mnv) { SupportByDirection = new[] { 10, 0, 0 } }; var failingMnv = new CandidateAllele("chr1", 2000, "TTT", "GGG", AlleleCategory.Mnv) { SupportByDirection = new[] { 5, 0, 0 } }; var failingMnv2 = new CandidateAllele("chr1", 1999, "TTT", "AAA", AlleleCategory.Mnv) { SupportByDirection = new[] { 5, 0, 0 } }; var failingGappedMnv = new CandidateAllele("chr1", 2000, "TTT", "ATA", AlleleCategory.Mnv) { SupportByDirection = new[] { 5, 0, 0 } }; var mockStateManager = MockStateManager(306, 0); variantCaller = new AlleleCaller(config); var candidateVariants = new List <CandidateAllele> { passingMnv, failingMnv, failingMnv2, failingGappedMnv }; var batch = new CandidateBatch(candidateVariants) { MaxClearedPosition = 2000 }; var BaseCalledAlleles = variantCaller.Call(batch, mockStateManager.Object); mockStateManager.Setup(c => c.AddCandidates(It.IsAny <IEnumerable <CandidateAllele> >())) .Callback((IEnumerable <CandidateAllele> vars) => Console.WriteLine(vars.Count())); mockStateManager.Verify(c => c.AddCandidates(It.IsAny <IEnumerable <CandidateAllele> >()), Times.Once); // For regular MNVs that span blocks, whole sub-MNV belonging to next block should be passed over together. // If it begins with a ref, should skip that ref and just deliver the rest of the MNV. Thus we should have the following added to the next block: // - MNV at 2001 from failingMnv // - SNV at 2001 from failingMnv2 // - SNV at 2002 from failingGappedMnv mockStateManager.Verify(c => c.AddCandidates(It.Is <IEnumerable <CandidateAllele> >(x => x.Count() == 3)), Times.Once); mockStateManager.Verify(c => c.AddCandidates(It.Is <IEnumerable <CandidateAllele> >(x => x.Count(a => a.ReferencePosition == 2001) == 2 && x.Count(a => a.ReferencePosition == 2002) == 1)), Times.Once); mockStateManager.Verify(c => c.AddCandidates(It.Is <IEnumerable <CandidateAllele> >(x => x.Count(a => a.ReferencePosition == 2001 && a.Type == AlleleCategory.Mnv) == 1 && x.Count(a => a.ReferencePosition == 2001 && a.Type == AlleleCategory.Snv) == 1 && x.Count(a => a.ReferencePosition == 2001 && a.Type == AlleleCategory.Reference) == 0 && x.Count(a => a.ReferencePosition == 2002 && a.Type == AlleleCategory.Snv) == 1 )), Times.Once); var variants = BaseCalledAlleles.Values.SelectMany(v => v); PrintResults(variants.ToList()); Assert.True(variants.Any(v => MatchVariants(v, passingMnv, 10))); // Passing MNV should have additional support from big failed MNV }
public void Map() { // set up test var intervals = new List <CallSomaticVariants.Logic.RegionState.Region> { new CallSomaticVariants.Logic.RegionState.Region(4, 10), new CallSomaticVariants.Logic.RegionState.Region(15, 17), new CallSomaticVariants.Logic.RegionState.Region(25, 39), new CallSomaticVariants.Logic.RegionState.Region(50, 55), new CallSomaticVariants.Logic.RegionState.Region(60, 70), new CallSomaticVariants.Logic.RegionState.Region(80, 80) }; var mapper = new RegionPadder(_chrReference, new ChrIntervalSet(intervals, "chr1")); // ------------------------------------ // first batch starts after interval start - make sure beginning positions arent skipped // ------------------------------------ var batch = new CandidateBatch { ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region> { new CallSomaticVariants.Logic.RegionState.Region(5, 11) } }; var expectedAlleles = new List <CandidateAllele>(); AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >() { new Tuple <int, int>(4, 4) }); ExecuteTest(mapper, batch, expectedAlleles); // ------------------------------------ // next batch and starts after the second interval, fully covers third interval and partially the fourth // ------------------------------------ batch = new CandidateBatch { ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region> { new CallSomaticVariants.Logic.RegionState.Region(20, 52), } }; expectedAlleles.Clear(); AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >() { new Tuple <int, int>(15, 17) }); ExecuteTest(mapper, batch, expectedAlleles); // ------------------------------------ // next batch contains multiple cleared regions // ------------------------------------ batch = new CandidateBatch { ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region> { new CallSomaticVariants.Logic.RegionState.Region(58, 59), new CallSomaticVariants.Logic.RegionState.Region(62, 68) } }; expectedAlleles.Clear(); AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >() { new Tuple <int, int>(53, 55), new Tuple <int, int>(60, 61) }); ExecuteTest(mapper, batch, expectedAlleles); // ------------------------------------ // empty batch // ------------------------------------ batch = new CandidateBatch { ClearedRegions = null }; expectedAlleles.Clear(); ExecuteTest(mapper, batch, expectedAlleles); // ------------------------------------ // all the rest // ------------------------------------ batch = new CandidateBatch { ClearedRegions = new List <CallSomaticVariants.Logic.RegionState.Region> { new CallSomaticVariants.Logic.RegionState.Region(69, 69) } }; expectedAlleles.Clear(); AddReferenceCandidatesByRange(expectedAlleles, new List <Tuple <int, int> >() { new Tuple <int, int>(70, 70), new Tuple <int, int>(80, 80) }); ExecuteTest(mapper, batch, expectedAlleles, true); }