コード例 #1
0
        public Task[] GetAndLinkAllClassificationBlocksWithEcFinalization(
            ISourceBlock <PairResult> pairClassifierBlock,
            int startPosition, int endPosition, ConcurrentDictionary <int, EdgeState> edgeStates,
            ConcurrentDictionary <int, Task> edgeToWaitOn, int prevBlockStart,
            bool isFinalTask = false)
        {
            if (_lightDebug)
            {
                Logger.WriteToLog(
                    $"Creating tasks for region {_chrom}:{startPosition}-{endPosition}.");
            }

            var allToWaitFor = new ConcurrentDictionary <Task, int>();

            var messySiteWidth = _geminiOptions.MessySiteWidth;
            var effectiveMax   = 0;

            var adjustedStartPosition = startPosition;

            adjustedStartPosition = RoundedStartPosition(adjustedStartPosition, messySiteWidth);

            var pairResultLookup = new ConcurrentDictionary <PairClassification, List <PairResult> >();
            var indelLookup      = new ConcurrentDictionary <string, IndelEvidence>();

            var regionLength = endPosition - adjustedStartPosition;
            var numBins      = (regionLength / messySiteWidth) + 1000;

            var totalBinCounts          = InitializeTotalBinCounts(numBins);
            var singleMismatchBinCounts = InitializeSingleMismatchBinCounts(numBins);

            var actBlockFactory = _actionBlockFactoryProvider.GetFactory(startPosition, endPosition,
                                                                         adjustedStartPosition, totalBinCounts, singleMismatchBinCounts, numBins, allToWaitFor);

            foreach (var classification in classifications)
            {
                var toWaitFor = GetAndLinkPerClassificationBlocksWithEcFinalization(pairClassifierBlock, classification, indelLookup);

                var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification);
                var categoryIsRealignable = _categoriesForRealignment.Contains(classification);

                // Even if we're not going to realign these reads, they may still be useful for bin evidence, so don't give them the immediate flush
                var shouldCollectBinEvidence = TypeClassifier.MessyTypes.Contains(classification) || TypeClassifier._indelTypes.Contains(classification);

                var isSingleMismatch = _geminiOptions.AvoidLikelySnvs && (classification == PairClassification.SingleMismatchStitched ||
                                                                          classification == PairClassification.UnstitchSingleMismatch);

                if (!(categoryIsRealignable || doStitch || shouldCollectBinEvidence))
                {
                    var actBlock = actBlockFactory.GetEarlyFlushBlock(classification, isSingleMismatch);

                    foreach (var transformBlock in toWaitFor)
                    {
                        transformBlock.LinkTo(actBlock, new DataflowLinkOptions()
                        {
                            PropagateCompletion = true
                        });
                    }

                    var toRemove = allToWaitFor.Keys.Where(x => x.IsCompleted);
                    foreach (var task in toRemove)
                    {
                        allToWaitFor.TryRemove(task, out _);
                    }
                    if (!allToWaitFor.TryAdd(actBlock.Completion, 1))
                    {
                        throw new Exception("Failed to add task.");
                    }
                }
                else
                {
                    var actBlock = actBlockFactory.GetActionablePairsBlock(classification, pairResultLookup);

                    foreach (var transformBlock in toWaitFor)
                    {
                        transformBlock.LinkTo(actBlock, new DataflowLinkOptions()
                        {
                            PropagateCompletion = true
                        });
                    }

                    if (!allToWaitFor.TryAdd(actBlock.Completion, 1))
                    {
                        throw new Exception("Failed to add task.");
                    }
                }
            }

            var finalTask = AggregateTask(indelLookup, startPosition, endPosition, isFinalTask, _progressTracker);
            var intermediateWriterTask = new TransformBlock <AggregateRegionResults, List <BamAlignment> >(results =>
            {
                edgeStates.AddOrUpdate(startPosition, results.EdgeState, (s, e) =>
                {
                    Logger.WriteWarningToLog($"Edge state already exists: {s}.");
                    return(results.EdgeState);
                });
                return(results.AlignmentsReadyToBeFlushed);
            }, new ExecutionDataflowBlockOptions()
            {
                EnsureOrdered = false
            });

            var finalWriteTask = actBlockFactory.GetWriterBlock();

            finalTask.LinkTo(intermediateWriterTask, new DataflowLinkOptions()
            {
                PropagateCompletion = true
            });
            intermediateWriterTask.LinkTo(finalWriteTask, new DataflowLinkOptions()
            {
                PropagateCompletion = true
            });

            if (edgeToWaitOn.ContainsKey(prevBlockStart))
            {
                if (!allToWaitFor.TryAdd(edgeToWaitOn[prevBlockStart], 1))
                {
                    throw new Exception("Failed to add task for previous edge.");
                }
            }
            else
            {
                Logger.WriteToLog($"At {startPosition}, prev block is {prevBlockStart}, nothing to wait on.");
            }

            if (!isFinalTask)
            {
                edgeToWaitOn.AddOrUpdate(startPosition, intermediateWriterTask.Completion, (s, e) =>
                {
                    Logger.WriteWarningToLog($"Edge state task already exists: {s}.");
                    return(intermediateWriterTask.Completion);
                });
            }

            var allTasks = allToWaitFor.Keys.ToList();
            var t        = Task.WhenAll(allTasks)
                           .ContinueWith(_ =>
            {
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Preparing for aggregation for region {_chrom}:{startPosition}-{endPosition}.");
                }

                if (allTasks.Any(x => x.Status != TaskStatus.RanToCompletion))
                {
                    Logger.WriteToLog("ERROR: Task did not complete.");

                    foreach (var task in allTasks)
                    {
                        Logger.WriteToLog($"{task.Id}\t{task.Status}\t{task.Exception}");
                        if (task.Status == TaskStatus.Faulted)
                        {
                            // Pass the exception along to the final task so it can be forced to error out.
                            finalTask = ForceFailFinalTask(intermediateWriterTask, task.Exception);
                        }
                    }
                }

                var numStillToProcess = 0;
                foreach (var item in pairResultLookup)
                {
                    effectiveMax       = Math.Max(effectiveMax, item.Value.Max(x => x.ReadPair.MaxPosition));
                    numStillToProcess += item.Value.Count;
                }

                if (_lightDebug)
                {
                    Logger.WriteToLog($"Preparing edge state info for region {_chrom}:{startPosition}-{endPosition}.");
                }

                EdgeState edgeState = null;
                var extraBins       = 0;
                if (edgeStates.ContainsKey(prevBlockStart))
                {
                    edgeStates.Remove(prevBlockStart, out edgeState);
                    if (edgeState.EdgeIndels.Any() || edgeState.EdgeAlignments.Any())
                    {
                        var newAdjustedStartPosition = RoundedStartPosition(Math.Min(adjustedStartPosition, edgeState.EffectiveMinPosition), messySiteWidth);
                        extraBins             = (adjustedStartPosition - newAdjustedStartPosition) / messySiteWidth;
                        adjustedStartPosition = newAdjustedStartPosition;
                    }
                }
                allToWaitFor.Clear();
                allTasks.Clear();


                if (_lightDebug)
                {
                    var totalReadsInRegion = _categoryLookup.Values.Sum();
                    Console.WriteLine($"STILL TO PROCESS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {numStillToProcess}");
                    Console.WriteLine(
                        $"READS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {totalReadsInRegion}");
                    foreach (var kvp in _categoryLookup)
                    {
                        Console.WriteLine(
                            $"CATEGORYCOUNT ({startPosition}-{endPosition} (eff:{effectiveMax})): {kvp.Key}: {kvp.Value} ({Math.Round(kvp.Value * 100 / (float)totalReadsInRegion)}%)");
                    }
                }

                var totalNumBins       = numBins + extraBins;
                var allHits            = new uint[totalNumBins];
                var singleMismatchHits = new uint[totalNumBins];

                for (var i = 0; i < totalNumBins; i++)
                {
                    var newBin = i + extraBins;
                    if (newBin >= totalNumBins)
                    {
                        break;
                    }

                    if (totalBinCounts[i] > 0)
                    {
                        allHits[newBin]            = totalBinCounts[i];
                        singleMismatchHits[newBin] = singleMismatchBinCounts[i];
                    }
                }


                if (_lightDebug)
                {
                    Logger.WriteToLog(
                        $"Creating bin evidence for region {_chrom}:{startPosition}-{endPosition}.");
                }

                var binEvidence = _binEvidenceFactory.GetBinEvidence(totalNumBins, adjustedStartPosition);
                binEvidence.SetSingleMismatchHits(singleMismatchHits);
                binEvidence.AddAllHits(allHits);
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Adding edge hits for region {_chrom}:{startPosition}-{endPosition}.");
                }

                if (edgeState != null)
                {
                    var edgeBinInNew = binEvidence.GetBinId(edgeState.EffectiveMinPosition);
                    var edgeBinInOld = edgeState.BinEvidence.GetBinId(edgeState.EffectiveMinPosition);
                    AddEdgeHits(edgeState, binEvidence, edgeBinInOld, edgeBinInOld - edgeBinInNew);
                }
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Done adding edge hits for region {_chrom}:{startPosition}-{endPosition}.");
                }

                var finalState = new RegionDataForAggregation()
                {
                    BinEvidence          = binEvidence,
                    PairResultLookup     = pairResultLookup,
                    EdgeState            = edgeState,
                    EffectiveMaxPosition = effectiveMax,
                    EffectiveMinPosition = adjustedStartPosition
                };

                finalTask.Post(finalState);
                finalTask.Complete();
            });

            return(new[] { t, finalWriteTask.Completion });
        }
コード例 #2
0
        public AggregateRegionResults GetAggregateRegionResults(ConcurrentDictionary <string, IndelEvidence> indelLookup,
                                                                int startPosition,
                                                                int endPosition, bool isFinalTask, RegionDataForAggregation regionData)
        {
            if (_geminiOptions.LightDebug)
            {
                Logger.WriteToLog(
                    $"Started processing for region {_chrom}:{startPosition}-{endPosition}.");
            }

            var adjustedStartPosition = regionData.EffectiveMinPosition;
            var edgeThresholdOrig     = Math.Max(1, regionData.EffectiveMaxPosition - 5000);
            var finalIndelLookup      = GetAndSyncFinalIndelLookup(indelLookup, _masterIndelLookup);
            var edgeState             = regionData.EdgeState;
            var nextEdgeMinPosition   = int.MaxValue;

            var finalizedIndels         = FinalizeIndels(finalIndelLookup, _chrReference, regionData.EffectiveMaxPosition);
            var finalizedIndelsForChrom = GetFinalizedIndelsForChrom(_chrom, finalizedIndels, edgeState);

            IChromosomeIndelSource indelSource = null;

            var       messySiteWidth = _geminiOptions.MessySiteWidth;
            const int binsToExtendTo = 2; // Treated as <, so 2 means we get to extend status to one on either side

            var binEvidence    = regionData.BinEvidence;
            var binConclusions = new BinConclusions(binEvidence, _geminiOptions.CollectDepth, trackDirectionalMess: _geminiOptions.SilenceDirectionalMessReads, trackMapqMess: _geminiOptions.SilenceMessyMapMessReads);
            var numBins        = binEvidence.NumBins;

            bool shouldRealignAtAll = finalizedIndelsForChrom.Any();

            var imperfectFreqThreshold   = _geminiOptions.ImperfectFreqThreshold;
            var indelRegionfreqThreshold = _geminiOptions.IndelRegionFreqThreshold;
            var messySiteThreshold       = _geminiOptions.MessySiteThreshold;

            var numRetrievedFromLastBlock = 0;
            var numPairsSentToNextBlock   = 0;
            var pairResultsForNextBlock   = new Dictionary <PairClassification, List <PairResult> >();

            var pairResultLookup = new Dictionary <PairClassification, List <PairResult> >();

            foreach (var key in regionData.PairResultLookup.Keys)
            {
                if (!pairResultLookup.ContainsKey(key))
                {
                    pairResultLookup.Add(key, new List <PairResult>());
                }

                pairResultLookup[key].AddRange(regionData.PairResultLookup[key]);
            }

            foreach (var category in pairResultLookup)
            {
                var isMessy          = TypeClassifier.MessyTypes.Contains(category.Key);
                var isIndel          = TypeClassifier._indelTypes.Contains(category.Key);
                var isSingleMismatch = _geminiOptions.AvoidLikelySnvs &&
                                       (category.Key == PairClassification.SingleMismatchStitched ||
                                        category.Key == PairClassification.UnstitchSingleMismatch);
                var isForwardOnlyMessy = IsForwardMessy(category.Key);
                var isReverseOnlyMessy = IsReverseMessy(category.Key);
                var isMapMessy         = IsSuspiciousMapping(category.Key);
                foreach (var pairResult in category.Value)
                {
                    // If on the edge, kick it over to the edge lookup.
                    if (!isFinalTask && pairResult.ReadPair.MaxPosition > edgeThresholdOrig)
                    {
                        numPairsSentToNextBlock++;
                        if (!pairResultsForNextBlock.ContainsKey(category.Key))
                        {
                            pairResultsForNextBlock.Add(category.Key, new List <PairResult>());
                        }

                        pairResultsForNextBlock[category.Key].Add(pairResult);

                        nextEdgeMinPosition = Math.Min(nextEdgeMinPosition, pairResult.ReadPair.MinPosition);
                    }
                    // Still collect evidence even if it's edge, because that could impact this block as well as next block.

                    binEvidence.AddMessEvidence(isMessy, pairResult, isIndel, isSingleMismatch, isForwardOnlyMessy,
                                                isReverseOnlyMessy, isMapMessy);
                }
            }

            numRetrievedFromLastBlock = AddAlignmentsFromEdgeState(edgeState, pairResultLookup, numRetrievedFromLastBlock);

            var finalizedBins = new UsableBins(binConclusions);

            if (shouldRealignAtAll)
            {
                binConclusions.AddIndelEvidence(finalizedIndelsForChrom, binsToExtendTo);
                binConclusions.ProcessRegions(messySiteThreshold, imperfectFreqThreshold,
                                              _geminiOptions.RegionDepthThreshold, indelRegionfreqThreshold, binsToExtendTo, _geminiOptions.DirectionalMessThreshold);
                finalizedBins.FinalizeConclusions(binsToExtendTo);
            }

            using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference))
            {
                indelSource =
                    _dataSourceFactory.GetChromosomeIndelSource(finalizedIndelsForChrom, snippetSource);
            }


            foreach (var kvp in pairResultsForNextBlock)
            {
                foreach (var pairResult in kvp.Value)
                {
                    pairResultLookup[kvp.Key].Remove(pairResult);
                }
            }

            var allAlignments  = new List <BamAlignment>();
            var outcomesLookup = new Dictionary <HashableIndel, int[]>();

            var numSkippedDueToSites = 0;
            var numKept      = 0;
            var numRealigned = 0;
            var numSilenced  = 0;

            var snowballCategories = _realignmentOptions.CategoriesForSnowballing;
            var doSnowball         = snowballCategories.Any();


            foreach (var category in snowballCategories)
            {
                if (pairResultLookup.ContainsKey(category))
                {
                    pairResultLookup.Remove(category, out var categoryReads);
                    allAlignments.AddRange(ProcessCategory(_categoriesForRealignment,
                                                           indelSource, shouldRealignAtAll,
                                                           outcomesLookup, ref numSkippedDueToSites, ref numKept, ref numRealigned, ref numSilenced,
                                                           categoryReads, category, binEvidence, _progressTracker, binConclusions, finalizedBins, startPosition, endPosition));
                }
            }

            List <HashableIndel> superFinalizedIndels;

            if (doSnowball)
            {
                superFinalizedIndels = GetSuperFinalizedIndelsAfterSnowball(finalizedIndelsForChrom, outcomesLookup);

                if (_geminiOptions.Debug)
                {
                    Logger.WriteToLog(
                        $"After snowballing for region {_chrom}:{startPosition}-{endPosition}, filtered down to {superFinalizedIndels.Count} indels from {finalizedIndelsForChrom.Count} ({finalIndelLookup.Count} preliminary indels).");
                }

                using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference))
                {
                    indelSource =
                        _dataSourceFactory.GetChromosomeIndelSource(superFinalizedIndels, snippetSource);
                }

                if (_geminiOptions.RecalculateUsableSitesAfterSnowball)
                {
                    binConclusions.ResetIndelRegions();

                    foreach (var indel in superFinalizedIndels)
                    {
                        var bin = (indel.ReferencePosition - adjustedStartPosition) / messySiteWidth;
                        binConclusions.SetIndelRegionTrue(bin);

                        for (int j = 0; j < binsToExtendTo; j++)
                        {
                            var binIndex = bin - j;
                            if (binIndex >= 0)
                            {
                                binConclusions.SetIndelRegionTrue(binIndex);
                            }
                            else
                            {
                                break;
                            }
                        }

                        for (int j = 0; j < binsToExtendTo; j++)
                        {
                            var binIndex = bin + j;
                            if (!binConclusions.SetIndelRegionTrue(binIndex))
                            {
                                break;
                            }
                        }
                    }

                    finalizedBins.FinalizeConclusions(binsToExtendTo);
                }
            }
            else
            {
                superFinalizedIndels = finalizedIndelsForChrom;
            }

            // TODO pull out the allocs below, or ideally actually remove them from realign pair handler or use something different altogether
            foreach (var category in pairResultLookup)
            {
                if (snowballCategories.Contains(category.Key))
                {
                    continue;
                }

                allAlignments.AddRange(ProcessCategory(_categoriesForRealignment, indelSource,
                                                       shouldRealignAtAll,
                                                       outcomesLookup, ref numSkippedDueToSites, ref numKept, ref numRealigned, ref numSilenced, category.Value,
                                                       category.Key, binEvidence, _progressTracker, binConclusions, finalizedBins, startPosition, endPosition));
            }

            var edgeHits = new Dictionary <int, int>();
            var edgeSingleMismatchHits = new Dictionary <int, int>();
            var edgeIndelHits          = new Dictionary <int, int>();
            var edgeMessyHits          = new Dictionary <int, int>();

            PopulateEdgeHitsAndLogBins(numBins, adjustedStartPosition, messySiteWidth, nextEdgeMinPosition, binEvidence,
                                       edgeHits, edgeSingleMismatchHits, edgeIndelHits, edgeMessyHits, startPosition, binConclusions, finalizedBins);

            UpdateMasterOutcomes(_masterOutcomesLookup, outcomesLookup);

            foreach (var hashableIndel in superFinalizedIndels)
            {
                _masterFinalIndels.AddOrUpdate(hashableIndel, 1, (h, n) => { return(n + 1); });
            }

            _progressTracker.AddOrUpdate("Flushed", allAlignments.Count(),
                                         (x, currentCount) => { return(currentCount + allAlignments.Count()); });
            _progressTracker.AddOrUpdate("Sent To Next Block", numPairsSentToNextBlock,
                                         (x, currentCount) => { return(currentCount + numPairsSentToNextBlock); });
            _progressTracker.AddOrUpdate("Retrieved from Past Block", numRetrievedFromLastBlock,
                                         (x, currentCount) => { return(currentCount + numRetrievedFromLastBlock); });
            _progressTracker.AddOrUpdate("Realigned", numRealigned,
                                         (x, currentCount) => { return(currentCount + numRealigned); });
            _progressTracker.AddOrUpdate("Attempts", numKept,
                                         (x, currentCount) => { return(currentCount + numKept); });
            _progressTracker.AddOrUpdate("Skipped", numSkippedDueToSites,
                                         (x, currentCount) => { return(currentCount + numSkippedDueToSites); });
            _progressTracker.AddOrUpdate("Silenced", numSilenced,
                                         (x, currentCount) => { return(currentCount + numSilenced); });

            pairResultLookup.Clear();
            Logger.WriteToLog(
                $"Finished processing for region {_chrom}:{startPosition}-{endPosition}. {allAlignments.Count()} alignments flushed, " +
                $"{numPairsSentToNextBlock} sent to next block, {numRetrievedFromLastBlock} retrieved from {regionData.EdgeState?.Name}. " +
                $"Realigned {numRealigned}/{numKept} attempts ({numSkippedDueToSites} pairs skipped realignment), silenced {numSilenced} messy mates.");


            return(new AggregateRegionResults()
            {
                EdgeState = isFinalTask
                    ? new EdgeState()
                {
                    Name = "Final"
                }
                    : new EdgeState()
                {
                    EdgeAlignments = pairResultsForNextBlock,
                    EdgeIndels = finalizedIndelsForChrom.Where(y => y.ReferencePosition > nextEdgeMinPosition)
                                 .ToList(),
                    EffectiveMinPosition = nextEdgeMinPosition,
                    Name = $"{startPosition}-{endPosition}",
                    BinEvidence = binEvidence
                },
                AlignmentsReadyToBeFlushed = allAlignments
            });
        }