コード例 #1
0
        public Task[] GetAndLinkAllClassificationBlocksWithEcFinalization(
            ISourceBlock <PairResult> pairClassifierBlock,
            int startPosition, int endPosition, ConcurrentDictionary <int, EdgeState> edgeStates,
            ConcurrentDictionary <int, Task> edgeToWaitOn, int prevBlockStart,
            bool isFinalTask = false)
        {
            if (_lightDebug)
            {
                Logger.WriteToLog(
                    $"Creating tasks for region {_chrom}:{startPosition}-{endPosition}.");
            }

            var allToWaitFor = new ConcurrentDictionary <Task, int>();

            var messySiteWidth = _geminiOptions.MessySiteWidth;
            var effectiveMax   = 0;

            var adjustedStartPosition = startPosition;

            adjustedStartPosition = RoundedStartPosition(adjustedStartPosition, messySiteWidth);

            var pairResultLookup = new ConcurrentDictionary <PairClassification, List <PairResult> >();
            var indelLookup      = new ConcurrentDictionary <string, IndelEvidence>();

            var regionLength = endPosition - adjustedStartPosition;
            var numBins      = (regionLength / messySiteWidth) + 1000;

            var totalBinCounts          = InitializeTotalBinCounts(numBins);
            var singleMismatchBinCounts = InitializeSingleMismatchBinCounts(numBins);

            var actBlockFactory = _actionBlockFactoryProvider.GetFactory(startPosition, endPosition,
                                                                         adjustedStartPosition, totalBinCounts, singleMismatchBinCounts, numBins, allToWaitFor);

            foreach (var classification in classifications)
            {
                var toWaitFor = GetAndLinkPerClassificationBlocksWithEcFinalization(pairClassifierBlock, classification, indelLookup);

                var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification);
                var categoryIsRealignable = _categoriesForRealignment.Contains(classification);

                // Even if we're not going to realign these reads, they may still be useful for bin evidence, so don't give them the immediate flush
                var shouldCollectBinEvidence = TypeClassifier.MessyTypes.Contains(classification) || TypeClassifier._indelTypes.Contains(classification);

                var isSingleMismatch = _geminiOptions.AvoidLikelySnvs && (classification == PairClassification.SingleMismatchStitched ||
                                                                          classification == PairClassification.UnstitchSingleMismatch);

                if (!(categoryIsRealignable || doStitch || shouldCollectBinEvidence))
                {
                    var actBlock = actBlockFactory.GetEarlyFlushBlock(classification, isSingleMismatch);

                    foreach (var transformBlock in toWaitFor)
                    {
                        transformBlock.LinkTo(actBlock, new DataflowLinkOptions()
                        {
                            PropagateCompletion = true
                        });
                    }

                    var toRemove = allToWaitFor.Keys.Where(x => x.IsCompleted);
                    foreach (var task in toRemove)
                    {
                        allToWaitFor.TryRemove(task, out _);
                    }
                    if (!allToWaitFor.TryAdd(actBlock.Completion, 1))
                    {
                        throw new Exception("Failed to add task.");
                    }
                }
                else
                {
                    var actBlock = actBlockFactory.GetActionablePairsBlock(classification, pairResultLookup);

                    foreach (var transformBlock in toWaitFor)
                    {
                        transformBlock.LinkTo(actBlock, new DataflowLinkOptions()
                        {
                            PropagateCompletion = true
                        });
                    }

                    if (!allToWaitFor.TryAdd(actBlock.Completion, 1))
                    {
                        throw new Exception("Failed to add task.");
                    }
                }
            }

            var finalTask = AggregateTask(indelLookup, startPosition, endPosition, isFinalTask, _progressTracker);
            var intermediateWriterTask = new TransformBlock <AggregateRegionResults, List <BamAlignment> >(results =>
            {
                edgeStates.AddOrUpdate(startPosition, results.EdgeState, (s, e) =>
                {
                    Logger.WriteWarningToLog($"Edge state already exists: {s}.");
                    return(results.EdgeState);
                });
                return(results.AlignmentsReadyToBeFlushed);
            }, new ExecutionDataflowBlockOptions()
            {
                EnsureOrdered = false
            });

            var finalWriteTask = actBlockFactory.GetWriterBlock();

            finalTask.LinkTo(intermediateWriterTask, new DataflowLinkOptions()
            {
                PropagateCompletion = true
            });
            intermediateWriterTask.LinkTo(finalWriteTask, new DataflowLinkOptions()
            {
                PropagateCompletion = true
            });

            if (edgeToWaitOn.ContainsKey(prevBlockStart))
            {
                if (!allToWaitFor.TryAdd(edgeToWaitOn[prevBlockStart], 1))
                {
                    throw new Exception("Failed to add task for previous edge.");
                }
            }
            else
            {
                Logger.WriteToLog($"At {startPosition}, prev block is {prevBlockStart}, nothing to wait on.");
            }

            if (!isFinalTask)
            {
                edgeToWaitOn.AddOrUpdate(startPosition, intermediateWriterTask.Completion, (s, e) =>
                {
                    Logger.WriteWarningToLog($"Edge state task already exists: {s}.");
                    return(intermediateWriterTask.Completion);
                });
            }

            var allTasks = allToWaitFor.Keys.ToList();
            var t        = Task.WhenAll(allTasks)
                           .ContinueWith(_ =>
            {
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Preparing for aggregation for region {_chrom}:{startPosition}-{endPosition}.");
                }

                if (allTasks.Any(x => x.Status != TaskStatus.RanToCompletion))
                {
                    Logger.WriteToLog("ERROR: Task did not complete.");

                    foreach (var task in allTasks)
                    {
                        Logger.WriteToLog($"{task.Id}\t{task.Status}\t{task.Exception}");
                        if (task.Status == TaskStatus.Faulted)
                        {
                            // Pass the exception along to the final task so it can be forced to error out.
                            finalTask = ForceFailFinalTask(intermediateWriterTask, task.Exception);
                        }
                    }
                }

                var numStillToProcess = 0;
                foreach (var item in pairResultLookup)
                {
                    effectiveMax       = Math.Max(effectiveMax, item.Value.Max(x => x.ReadPair.MaxPosition));
                    numStillToProcess += item.Value.Count;
                }

                if (_lightDebug)
                {
                    Logger.WriteToLog($"Preparing edge state info for region {_chrom}:{startPosition}-{endPosition}.");
                }

                EdgeState edgeState = null;
                var extraBins       = 0;
                if (edgeStates.ContainsKey(prevBlockStart))
                {
                    edgeStates.Remove(prevBlockStart, out edgeState);
                    if (edgeState.EdgeIndels.Any() || edgeState.EdgeAlignments.Any())
                    {
                        var newAdjustedStartPosition = RoundedStartPosition(Math.Min(adjustedStartPosition, edgeState.EffectiveMinPosition), messySiteWidth);
                        extraBins             = (adjustedStartPosition - newAdjustedStartPosition) / messySiteWidth;
                        adjustedStartPosition = newAdjustedStartPosition;
                    }
                }
                allToWaitFor.Clear();
                allTasks.Clear();


                if (_lightDebug)
                {
                    var totalReadsInRegion = _categoryLookup.Values.Sum();
                    Console.WriteLine($"STILL TO PROCESS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {numStillToProcess}");
                    Console.WriteLine(
                        $"READS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {totalReadsInRegion}");
                    foreach (var kvp in _categoryLookup)
                    {
                        Console.WriteLine(
                            $"CATEGORYCOUNT ({startPosition}-{endPosition} (eff:{effectiveMax})): {kvp.Key}: {kvp.Value} ({Math.Round(kvp.Value * 100 / (float)totalReadsInRegion)}%)");
                    }
                }

                var totalNumBins       = numBins + extraBins;
                var allHits            = new uint[totalNumBins];
                var singleMismatchHits = new uint[totalNumBins];

                for (var i = 0; i < totalNumBins; i++)
                {
                    var newBin = i + extraBins;
                    if (newBin >= totalNumBins)
                    {
                        break;
                    }

                    if (totalBinCounts[i] > 0)
                    {
                        allHits[newBin]            = totalBinCounts[i];
                        singleMismatchHits[newBin] = singleMismatchBinCounts[i];
                    }
                }


                if (_lightDebug)
                {
                    Logger.WriteToLog(
                        $"Creating bin evidence for region {_chrom}:{startPosition}-{endPosition}.");
                }

                var binEvidence = _binEvidenceFactory.GetBinEvidence(totalNumBins, adjustedStartPosition);
                binEvidence.SetSingleMismatchHits(singleMismatchHits);
                binEvidence.AddAllHits(allHits);
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Adding edge hits for region {_chrom}:{startPosition}-{endPosition}.");
                }

                if (edgeState != null)
                {
                    var edgeBinInNew = binEvidence.GetBinId(edgeState.EffectiveMinPosition);
                    var edgeBinInOld = edgeState.BinEvidence.GetBinId(edgeState.EffectiveMinPosition);
                    AddEdgeHits(edgeState, binEvidence, edgeBinInOld, edgeBinInOld - edgeBinInNew);
                }
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Done adding edge hits for region {_chrom}:{startPosition}-{endPosition}.");
                }

                var finalState = new RegionDataForAggregation()
                {
                    BinEvidence          = binEvidence,
                    PairResultLookup     = pairResultLookup,
                    EdgeState            = edgeState,
                    EffectiveMaxPosition = effectiveMax,
                    EffectiveMinPosition = adjustedStartPosition
                };

                finalTask.Post(finalState);
                finalTask.Complete();
            });

            return(new[] { t, finalWriteTask.Completion });
        }
コード例 #2
0
        private List <BamAlignment> ProcessCategory(
            List <PairClassification> categoriesForRealignment, IChromosomeIndelSource indelSource,
            bool shouldRealignAtAll, Dictionary <HashableIndel, int[]> outcomesLookup, ref int numSkippedDueToSites,
            ref int numKept, ref int numRealigned, ref int numSilenced,
            List <PairResult> pairResults, PairClassification classification, IBinEvidence binEvidence,
            ConcurrentDictionary <string, int> progressTracker, BinConclusions binConclusions, UsableBins usableBins, int startPosition, int endPosition)
        {
            var allAlignments = new List <BamAlignment>();
            var isHighLikelihoodForRealign = false;

            if (_geminiOptions.ForceHighLikelihoodRealigners)
            {
                var highLikelihoodCategories = new List <PairClassification>()
                {
                    PairClassification.Disagree,
                    PairClassification.MessyStitched,
                    PairClassification.MessySplit,
                    PairClassification.UnstitchMessy,
                    PairClassification.UnstitchIndel
                };
                isHighLikelihoodForRealign = highLikelihoodCategories.Contains(classification);
            }

            int alignmentsCount = 0;

            var doRealign = false;
            ReadPairRealignerAndCombiner realignHandler = null;
            var alreadyStitched       = ClassificationIsStitched(classification);
            var doStitch              = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification);
            var categoryIsRealignable = categoriesForRealignment.Contains(classification);

            if (categoryIsRealignable || doStitch)
            {
                doRealign = true;

                realignHandler = _bamRealignmentFactory.GetRealignPairHandler(doStitch,
                                                                              alreadyStitched,
                                                                              _realignmentOptions.PairAwareEverything ||
                                                                              ClassificationIsPairAwareRealignable(classification),
                                                                              _refIdMapping,
                                                                              new ReadStatusCounter(), false, indelSource, _chrom, new Dictionary <string, IndelEvidence>(),
                                                                              ClassificationHasIndels(classification), outcomesLookup
                                                                              , SkipRestitchIfUnchanged(classification));
            }

            using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference))
                using (var singleSnippetSource = new ReusableSnippetSource(snippetSource))
                {
                    var nmCalculator = new NmCalculator(singleSnippetSource);

                    var classificationString = classification.ToString();
                    foreach (var pairResult in pairResults)
                    {
                        int toSilence = 0;

                        IEnumerable <BamAlignment> alignments;
                        if (!doRealign)
                        {
                            alignments = pairResult.Alignments;
                        }
                        else
                        {
                            bool doRealignPair =
                                shouldRealignAtAll && (isHighLikelihoodForRealign ||
                                                       (categoryIsRealignable &&
                                                        (usableBins.IsPositionUsable(pairResult.ReadPair.MinPosition) ||
                                                         usableBins.IsPositionUsable(pairResult.ReadPair.MaxPosition))));


                            if (!doRealignPair)
                            {
                                numSkippedDueToSites++;
                            }
                            else
                            {
                                numKept++;
                            }

                            toSilence = ReadsToSilence(classification, binConclusions, pairResult);
                            if (toSilence > 0)
                            {
                                numSilenced++;
                            }

                            alignments = realignHandler.ExtractReads(pairResult, nmCalculator, doRealignPair, toSilence);

                            if (pairResult.ReadPair.Realigned || pairResult.ReadPair.RealignedR1 ||
                                pairResult.ReadPair.RealignedR2)
                            {
                                numRealigned++;
                            }
                        }

                        var silencedR1    = (toSilence == 1 || toSilence == 3) && !pairResult.ReadPair.RealignedR1;
                        var silencedR2    = (toSilence == 2 || toSilence == 3) && !pairResult.ReadPair.RealignedR2;
                        var readTreatment = ReadTreatment(silencedR1, silencedR2, pairResult);

                        progressTracker.AddOrUpdate(classificationString + ":" + readTreatment, 1,
                                                    (x, currentCount) => { return(currentCount + 1); });

                        var alignmentsList = alignments.ToList();
                        foreach (var bamAlignment in alignmentsList)
                        {
                            if (_geminiOptions.LightDebug)
                            {
                                AddMdTagCountsTags(bamAlignment, pairResult);
                            }

                            bamAlignment.ReplaceOrAddStringTag("XT", readTreatment);
                            bamAlignment.ReplaceOrAddStringTag("XP", classificationString);
                        }

                        alignmentsCount += alignmentsList.Count();
                        allAlignments.AddRange(alignmentsList);
                    }
                }

            if (realignHandler != null)
            {
                realignHandler.Finish();
            }

            pairResults.Clear();
            return(allAlignments);
        }