private static int AddAlignmentsFromEdgeState(EdgeState edgeState, Dictionary <PairClassification, List <PairResult> > pairResultLookup, int numRetrievedFromLastBlock) { if (edgeState != null) { foreach (var key in edgeState.EdgeAlignments.Keys) { if (!pairResultLookup.ContainsKey(key)) { pairResultLookup.Add(key, new List <PairResult>()); } var alns = edgeState.EdgeAlignments[key]; pairResultLookup[key].AddRange(alns); numRetrievedFromLastBlock += alns.Count; } } return(numRetrievedFromLastBlock); }
private static void AddEdgeHits(EdgeState edgeState, IBinEvidence binEvidence2, int offset, int startInOld) { binEvidence2.CombineBinEvidence(edgeState.BinEvidence, offset, startInOld, edgeState.BinEvidence.NumBins); }
public Task[] GetAndLinkAllClassificationBlocksWithEcFinalization( ISourceBlock <PairResult> pairClassifierBlock, int startPosition, int endPosition, ConcurrentDictionary <int, EdgeState> edgeStates, ConcurrentDictionary <int, Task> edgeToWaitOn, int prevBlockStart, bool isFinalTask = false) { if (_lightDebug) { Logger.WriteToLog( $"Creating tasks for region {_chrom}:{startPosition}-{endPosition}."); } var allToWaitFor = new ConcurrentDictionary <Task, int>(); var messySiteWidth = _geminiOptions.MessySiteWidth; var effectiveMax = 0; var adjustedStartPosition = startPosition; adjustedStartPosition = RoundedStartPosition(adjustedStartPosition, messySiteWidth); var pairResultLookup = new ConcurrentDictionary <PairClassification, List <PairResult> >(); var indelLookup = new ConcurrentDictionary <string, IndelEvidence>(); var regionLength = endPosition - adjustedStartPosition; var numBins = (regionLength / messySiteWidth) + 1000; var totalBinCounts = InitializeTotalBinCounts(numBins); var singleMismatchBinCounts = InitializeSingleMismatchBinCounts(numBins); var actBlockFactory = _actionBlockFactoryProvider.GetFactory(startPosition, endPosition, adjustedStartPosition, totalBinCounts, singleMismatchBinCounts, numBins, allToWaitFor); foreach (var classification in classifications) { var toWaitFor = GetAndLinkPerClassificationBlocksWithEcFinalization(pairClassifierBlock, classification, indelLookup); var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification); var categoryIsRealignable = _categoriesForRealignment.Contains(classification); // Even if we're not going to realign these reads, they may still be useful for bin evidence, so don't give them the immediate flush var shouldCollectBinEvidence = TypeClassifier.MessyTypes.Contains(classification) || TypeClassifier._indelTypes.Contains(classification); var isSingleMismatch = _geminiOptions.AvoidLikelySnvs && (classification == PairClassification.SingleMismatchStitched || classification == PairClassification.UnstitchSingleMismatch); if (!(categoryIsRealignable || doStitch || shouldCollectBinEvidence)) { var actBlock = actBlockFactory.GetEarlyFlushBlock(classification, isSingleMismatch); foreach (var transformBlock in toWaitFor) { transformBlock.LinkTo(actBlock, new DataflowLinkOptions() { PropagateCompletion = true }); } var toRemove = allToWaitFor.Keys.Where(x => x.IsCompleted); foreach (var task in toRemove) { allToWaitFor.TryRemove(task, out _); } if (!allToWaitFor.TryAdd(actBlock.Completion, 1)) { throw new Exception("Failed to add task."); } } else { var actBlock = actBlockFactory.GetActionablePairsBlock(classification, pairResultLookup); foreach (var transformBlock in toWaitFor) { transformBlock.LinkTo(actBlock, new DataflowLinkOptions() { PropagateCompletion = true }); } if (!allToWaitFor.TryAdd(actBlock.Completion, 1)) { throw new Exception("Failed to add task."); } } } var finalTask = AggregateTask(indelLookup, startPosition, endPosition, isFinalTask, _progressTracker); var intermediateWriterTask = new TransformBlock <AggregateRegionResults, List <BamAlignment> >(results => { edgeStates.AddOrUpdate(startPosition, results.EdgeState, (s, e) => { Logger.WriteWarningToLog($"Edge state already exists: {s}."); return(results.EdgeState); }); return(results.AlignmentsReadyToBeFlushed); }, new ExecutionDataflowBlockOptions() { EnsureOrdered = false }); var finalWriteTask = actBlockFactory.GetWriterBlock(); finalTask.LinkTo(intermediateWriterTask, new DataflowLinkOptions() { PropagateCompletion = true }); intermediateWriterTask.LinkTo(finalWriteTask, new DataflowLinkOptions() { PropagateCompletion = true }); if (edgeToWaitOn.ContainsKey(prevBlockStart)) { if (!allToWaitFor.TryAdd(edgeToWaitOn[prevBlockStart], 1)) { throw new Exception("Failed to add task for previous edge."); } } else { Logger.WriteToLog($"At {startPosition}, prev block is {prevBlockStart}, nothing to wait on."); } if (!isFinalTask) { edgeToWaitOn.AddOrUpdate(startPosition, intermediateWriterTask.Completion, (s, e) => { Logger.WriteWarningToLog($"Edge state task already exists: {s}."); return(intermediateWriterTask.Completion); }); } var allTasks = allToWaitFor.Keys.ToList(); var t = Task.WhenAll(allTasks) .ContinueWith(_ => { if (_lightDebug) { Logger.WriteToLog($"Preparing for aggregation for region {_chrom}:{startPosition}-{endPosition}."); } if (allTasks.Any(x => x.Status != TaskStatus.RanToCompletion)) { Logger.WriteToLog("ERROR: Task did not complete."); foreach (var task in allTasks) { Logger.WriteToLog($"{task.Id}\t{task.Status}\t{task.Exception}"); if (task.Status == TaskStatus.Faulted) { // Pass the exception along to the final task so it can be forced to error out. finalTask = ForceFailFinalTask(intermediateWriterTask, task.Exception); } } } var numStillToProcess = 0; foreach (var item in pairResultLookup) { effectiveMax = Math.Max(effectiveMax, item.Value.Max(x => x.ReadPair.MaxPosition)); numStillToProcess += item.Value.Count; } if (_lightDebug) { Logger.WriteToLog($"Preparing edge state info for region {_chrom}:{startPosition}-{endPosition}."); } EdgeState edgeState = null; var extraBins = 0; if (edgeStates.ContainsKey(prevBlockStart)) { edgeStates.Remove(prevBlockStart, out edgeState); if (edgeState.EdgeIndels.Any() || edgeState.EdgeAlignments.Any()) { var newAdjustedStartPosition = RoundedStartPosition(Math.Min(adjustedStartPosition, edgeState.EffectiveMinPosition), messySiteWidth); extraBins = (adjustedStartPosition - newAdjustedStartPosition) / messySiteWidth; adjustedStartPosition = newAdjustedStartPosition; } } allToWaitFor.Clear(); allTasks.Clear(); if (_lightDebug) { var totalReadsInRegion = _categoryLookup.Values.Sum(); Console.WriteLine($"STILL TO PROCESS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {numStillToProcess}"); Console.WriteLine( $"READS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {totalReadsInRegion}"); foreach (var kvp in _categoryLookup) { Console.WriteLine( $"CATEGORYCOUNT ({startPosition}-{endPosition} (eff:{effectiveMax})): {kvp.Key}: {kvp.Value} ({Math.Round(kvp.Value * 100 / (float)totalReadsInRegion)}%)"); } } var totalNumBins = numBins + extraBins; var allHits = new uint[totalNumBins]; var singleMismatchHits = new uint[totalNumBins]; for (var i = 0; i < totalNumBins; i++) { var newBin = i + extraBins; if (newBin >= totalNumBins) { break; } if (totalBinCounts[i] > 0) { allHits[newBin] = totalBinCounts[i]; singleMismatchHits[newBin] = singleMismatchBinCounts[i]; } } if (_lightDebug) { Logger.WriteToLog( $"Creating bin evidence for region {_chrom}:{startPosition}-{endPosition}."); } var binEvidence = _binEvidenceFactory.GetBinEvidence(totalNumBins, adjustedStartPosition); binEvidence.SetSingleMismatchHits(singleMismatchHits); binEvidence.AddAllHits(allHits); if (_lightDebug) { Logger.WriteToLog($"Adding edge hits for region {_chrom}:{startPosition}-{endPosition}."); } if (edgeState != null) { var edgeBinInNew = binEvidence.GetBinId(edgeState.EffectiveMinPosition); var edgeBinInOld = edgeState.BinEvidence.GetBinId(edgeState.EffectiveMinPosition); AddEdgeHits(edgeState, binEvidence, edgeBinInOld, edgeBinInOld - edgeBinInNew); } if (_lightDebug) { Logger.WriteToLog($"Done adding edge hits for region {_chrom}:{startPosition}-{endPosition}."); } var finalState = new RegionDataForAggregation() { BinEvidence = binEvidence, PairResultLookup = pairResultLookup, EdgeState = edgeState, EffectiveMaxPosition = effectiveMax, EffectiveMinPosition = adjustedStartPosition }; finalTask.Post(finalState); finalTask.Complete(); }); return(new[] { t, finalWriteTask.Completion }); }
private static List <HashableIndel> GetFinalizedIndelsForChrom(string chrom, Dictionary <string, List <HashableIndel> > finalizedIndels, EdgeState edgeState) { var finalizedIndelsForChrom = finalizedIndels.ContainsKey(chrom) ? finalizedIndels[chrom] : new List <HashableIndel>(); if (edgeState != null) { finalizedIndelsForChrom.AddRange(edgeState.EdgeIndels); finalizedIndelsForChrom = finalizedIndelsForChrom.Distinct().ToList(); } return(finalizedIndelsForChrom); }