public Task[] GetAndLinkAllClassificationBlocksWithEcFinalization( ISourceBlock <PairResult> pairClassifierBlock, int startPosition, int endPosition, ConcurrentDictionary <int, EdgeState> edgeStates, ConcurrentDictionary <int, Task> edgeToWaitOn, int prevBlockStart, bool isFinalTask = false) { if (_lightDebug) { Logger.WriteToLog( $"Creating tasks for region {_chrom}:{startPosition}-{endPosition}."); } var allToWaitFor = new ConcurrentDictionary <Task, int>(); var messySiteWidth = _geminiOptions.MessySiteWidth; var effectiveMax = 0; var adjustedStartPosition = startPosition; adjustedStartPosition = RoundedStartPosition(adjustedStartPosition, messySiteWidth); var pairResultLookup = new ConcurrentDictionary <PairClassification, List <PairResult> >(); var indelLookup = new ConcurrentDictionary <string, IndelEvidence>(); var regionLength = endPosition - adjustedStartPosition; var numBins = (regionLength / messySiteWidth) + 1000; var totalBinCounts = InitializeTotalBinCounts(numBins); var singleMismatchBinCounts = InitializeSingleMismatchBinCounts(numBins); var actBlockFactory = _actionBlockFactoryProvider.GetFactory(startPosition, endPosition, adjustedStartPosition, totalBinCounts, singleMismatchBinCounts, numBins, allToWaitFor); foreach (var classification in classifications) { var toWaitFor = GetAndLinkPerClassificationBlocksWithEcFinalization(pairClassifierBlock, classification, indelLookup); var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification); var categoryIsRealignable = _categoriesForRealignment.Contains(classification); // Even if we're not going to realign these reads, they may still be useful for bin evidence, so don't give them the immediate flush var shouldCollectBinEvidence = TypeClassifier.MessyTypes.Contains(classification) || TypeClassifier._indelTypes.Contains(classification); var isSingleMismatch = _geminiOptions.AvoidLikelySnvs && (classification == PairClassification.SingleMismatchStitched || classification == PairClassification.UnstitchSingleMismatch); if (!(categoryIsRealignable || doStitch || shouldCollectBinEvidence)) { var actBlock = actBlockFactory.GetEarlyFlushBlock(classification, isSingleMismatch); foreach (var transformBlock in toWaitFor) { transformBlock.LinkTo(actBlock, new DataflowLinkOptions() { PropagateCompletion = true }); } var toRemove = allToWaitFor.Keys.Where(x => x.IsCompleted); foreach (var task in toRemove) { allToWaitFor.TryRemove(task, out _); } if (!allToWaitFor.TryAdd(actBlock.Completion, 1)) { throw new Exception("Failed to add task."); } } else { var actBlock = actBlockFactory.GetActionablePairsBlock(classification, pairResultLookup); foreach (var transformBlock in toWaitFor) { transformBlock.LinkTo(actBlock, new DataflowLinkOptions() { PropagateCompletion = true }); } if (!allToWaitFor.TryAdd(actBlock.Completion, 1)) { throw new Exception("Failed to add task."); } } } var finalTask = AggregateTask(indelLookup, startPosition, endPosition, isFinalTask, _progressTracker); var intermediateWriterTask = new TransformBlock <AggregateRegionResults, List <BamAlignment> >(results => { edgeStates.AddOrUpdate(startPosition, results.EdgeState, (s, e) => { Logger.WriteWarningToLog($"Edge state already exists: {s}."); return(results.EdgeState); }); return(results.AlignmentsReadyToBeFlushed); }, new ExecutionDataflowBlockOptions() { EnsureOrdered = false }); var finalWriteTask = actBlockFactory.GetWriterBlock(); finalTask.LinkTo(intermediateWriterTask, new DataflowLinkOptions() { PropagateCompletion = true }); intermediateWriterTask.LinkTo(finalWriteTask, new DataflowLinkOptions() { PropagateCompletion = true }); if (edgeToWaitOn.ContainsKey(prevBlockStart)) { if (!allToWaitFor.TryAdd(edgeToWaitOn[prevBlockStart], 1)) { throw new Exception("Failed to add task for previous edge."); } } else { Logger.WriteToLog($"At {startPosition}, prev block is {prevBlockStart}, nothing to wait on."); } if (!isFinalTask) { edgeToWaitOn.AddOrUpdate(startPosition, intermediateWriterTask.Completion, (s, e) => { Logger.WriteWarningToLog($"Edge state task already exists: {s}."); return(intermediateWriterTask.Completion); }); } var allTasks = allToWaitFor.Keys.ToList(); var t = Task.WhenAll(allTasks) .ContinueWith(_ => { if (_lightDebug) { Logger.WriteToLog($"Preparing for aggregation for region {_chrom}:{startPosition}-{endPosition}."); } if (allTasks.Any(x => x.Status != TaskStatus.RanToCompletion)) { Logger.WriteToLog("ERROR: Task did not complete."); foreach (var task in allTasks) { Logger.WriteToLog($"{task.Id}\t{task.Status}\t{task.Exception}"); if (task.Status == TaskStatus.Faulted) { // Pass the exception along to the final task so it can be forced to error out. finalTask = ForceFailFinalTask(intermediateWriterTask, task.Exception); } } } var numStillToProcess = 0; foreach (var item in pairResultLookup) { effectiveMax = Math.Max(effectiveMax, item.Value.Max(x => x.ReadPair.MaxPosition)); numStillToProcess += item.Value.Count; } if (_lightDebug) { Logger.WriteToLog($"Preparing edge state info for region {_chrom}:{startPosition}-{endPosition}."); } EdgeState edgeState = null; var extraBins = 0; if (edgeStates.ContainsKey(prevBlockStart)) { edgeStates.Remove(prevBlockStart, out edgeState); if (edgeState.EdgeIndels.Any() || edgeState.EdgeAlignments.Any()) { var newAdjustedStartPosition = RoundedStartPosition(Math.Min(adjustedStartPosition, edgeState.EffectiveMinPosition), messySiteWidth); extraBins = (adjustedStartPosition - newAdjustedStartPosition) / messySiteWidth; adjustedStartPosition = newAdjustedStartPosition; } } allToWaitFor.Clear(); allTasks.Clear(); if (_lightDebug) { var totalReadsInRegion = _categoryLookup.Values.Sum(); Console.WriteLine($"STILL TO PROCESS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {numStillToProcess}"); Console.WriteLine( $"READS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {totalReadsInRegion}"); foreach (var kvp in _categoryLookup) { Console.WriteLine( $"CATEGORYCOUNT ({startPosition}-{endPosition} (eff:{effectiveMax})): {kvp.Key}: {kvp.Value} ({Math.Round(kvp.Value * 100 / (float)totalReadsInRegion)}%)"); } } var totalNumBins = numBins + extraBins; var allHits = new uint[totalNumBins]; var singleMismatchHits = new uint[totalNumBins]; for (var i = 0; i < totalNumBins; i++) { var newBin = i + extraBins; if (newBin >= totalNumBins) { break; } if (totalBinCounts[i] > 0) { allHits[newBin] = totalBinCounts[i]; singleMismatchHits[newBin] = singleMismatchBinCounts[i]; } } if (_lightDebug) { Logger.WriteToLog( $"Creating bin evidence for region {_chrom}:{startPosition}-{endPosition}."); } var binEvidence = _binEvidenceFactory.GetBinEvidence(totalNumBins, adjustedStartPosition); binEvidence.SetSingleMismatchHits(singleMismatchHits); binEvidence.AddAllHits(allHits); if (_lightDebug) { Logger.WriteToLog($"Adding edge hits for region {_chrom}:{startPosition}-{endPosition}."); } if (edgeState != null) { var edgeBinInNew = binEvidence.GetBinId(edgeState.EffectiveMinPosition); var edgeBinInOld = edgeState.BinEvidence.GetBinId(edgeState.EffectiveMinPosition); AddEdgeHits(edgeState, binEvidence, edgeBinInOld, edgeBinInOld - edgeBinInNew); } if (_lightDebug) { Logger.WriteToLog($"Done adding edge hits for region {_chrom}:{startPosition}-{endPosition}."); } var finalState = new RegionDataForAggregation() { BinEvidence = binEvidence, PairResultLookup = pairResultLookup, EdgeState = edgeState, EffectiveMaxPosition = effectiveMax, EffectiveMinPosition = adjustedStartPosition }; finalTask.Post(finalState); finalTask.Complete(); }); return(new[] { t, finalWriteTask.Completion }); }
private List <BamAlignment> ProcessCategory( List <PairClassification> categoriesForRealignment, IChromosomeIndelSource indelSource, bool shouldRealignAtAll, Dictionary <HashableIndel, int[]> outcomesLookup, ref int numSkippedDueToSites, ref int numKept, ref int numRealigned, ref int numSilenced, List <PairResult> pairResults, PairClassification classification, IBinEvidence binEvidence, ConcurrentDictionary <string, int> progressTracker, BinConclusions binConclusions, UsableBins usableBins, int startPosition, int endPosition) { var allAlignments = new List <BamAlignment>(); var isHighLikelihoodForRealign = false; if (_geminiOptions.ForceHighLikelihoodRealigners) { var highLikelihoodCategories = new List <PairClassification>() { PairClassification.Disagree, PairClassification.MessyStitched, PairClassification.MessySplit, PairClassification.UnstitchMessy, PairClassification.UnstitchIndel }; isHighLikelihoodForRealign = highLikelihoodCategories.Contains(classification); } int alignmentsCount = 0; var doRealign = false; ReadPairRealignerAndCombiner realignHandler = null; var alreadyStitched = ClassificationIsStitched(classification); var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification); var categoryIsRealignable = categoriesForRealignment.Contains(classification); if (categoryIsRealignable || doStitch) { doRealign = true; realignHandler = _bamRealignmentFactory.GetRealignPairHandler(doStitch, alreadyStitched, _realignmentOptions.PairAwareEverything || ClassificationIsPairAwareRealignable(classification), _refIdMapping, new ReadStatusCounter(), false, indelSource, _chrom, new Dictionary <string, IndelEvidence>(), ClassificationHasIndels(classification), outcomesLookup , SkipRestitchIfUnchanged(classification)); } using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference)) using (var singleSnippetSource = new ReusableSnippetSource(snippetSource)) { var nmCalculator = new NmCalculator(singleSnippetSource); var classificationString = classification.ToString(); foreach (var pairResult in pairResults) { int toSilence = 0; IEnumerable <BamAlignment> alignments; if (!doRealign) { alignments = pairResult.Alignments; } else { bool doRealignPair = shouldRealignAtAll && (isHighLikelihoodForRealign || (categoryIsRealignable && (usableBins.IsPositionUsable(pairResult.ReadPair.MinPosition) || usableBins.IsPositionUsable(pairResult.ReadPair.MaxPosition)))); if (!doRealignPair) { numSkippedDueToSites++; } else { numKept++; } toSilence = ReadsToSilence(classification, binConclusions, pairResult); if (toSilence > 0) { numSilenced++; } alignments = realignHandler.ExtractReads(pairResult, nmCalculator, doRealignPair, toSilence); if (pairResult.ReadPair.Realigned || pairResult.ReadPair.RealignedR1 || pairResult.ReadPair.RealignedR2) { numRealigned++; } } var silencedR1 = (toSilence == 1 || toSilence == 3) && !pairResult.ReadPair.RealignedR1; var silencedR2 = (toSilence == 2 || toSilence == 3) && !pairResult.ReadPair.RealignedR2; var readTreatment = ReadTreatment(silencedR1, silencedR2, pairResult); progressTracker.AddOrUpdate(classificationString + ":" + readTreatment, 1, (x, currentCount) => { return(currentCount + 1); }); var alignmentsList = alignments.ToList(); foreach (var bamAlignment in alignmentsList) { if (_geminiOptions.LightDebug) { AddMdTagCountsTags(bamAlignment, pairResult); } bamAlignment.ReplaceOrAddStringTag("XT", readTreatment); bamAlignment.ReplaceOrAddStringTag("XP", classificationString); } alignmentsCount += alignmentsList.Count(); allAlignments.AddRange(alignmentsList); } } if (realignHandler != null) { realignHandler.Finish(); } pairResults.Clear(); return(allAlignments); }