コード例 #1
0
        private static int AddAlignmentsFromEdgeState(EdgeState edgeState, Dictionary <PairClassification, List <PairResult> > pairResultLookup,
                                                      int numRetrievedFromLastBlock)
        {
            if (edgeState != null)
            {
                foreach (var key in edgeState.EdgeAlignments.Keys)
                {
                    if (!pairResultLookup.ContainsKey(key))
                    {
                        pairResultLookup.Add(key, new List <PairResult>());
                    }

                    var alns = edgeState.EdgeAlignments[key];
                    pairResultLookup[key].AddRange(alns);
                    numRetrievedFromLastBlock += alns.Count;
                }
            }

            return(numRetrievedFromLastBlock);
        }
コード例 #2
0
 private static void AddEdgeHits(EdgeState edgeState, IBinEvidence binEvidence2, int offset, int startInOld)
 {
     binEvidence2.CombineBinEvidence(edgeState.BinEvidence, offset, startInOld, edgeState.BinEvidence.NumBins);
 }
コード例 #3
0
        public Task[] GetAndLinkAllClassificationBlocksWithEcFinalization(
            ISourceBlock <PairResult> pairClassifierBlock,
            int startPosition, int endPosition, ConcurrentDictionary <int, EdgeState> edgeStates,
            ConcurrentDictionary <int, Task> edgeToWaitOn, int prevBlockStart,
            bool isFinalTask = false)
        {
            if (_lightDebug)
            {
                Logger.WriteToLog(
                    $"Creating tasks for region {_chrom}:{startPosition}-{endPosition}.");
            }

            var allToWaitFor = new ConcurrentDictionary <Task, int>();

            var messySiteWidth = _geminiOptions.MessySiteWidth;
            var effectiveMax   = 0;

            var adjustedStartPosition = startPosition;

            adjustedStartPosition = RoundedStartPosition(adjustedStartPosition, messySiteWidth);

            var pairResultLookup = new ConcurrentDictionary <PairClassification, List <PairResult> >();
            var indelLookup      = new ConcurrentDictionary <string, IndelEvidence>();

            var regionLength = endPosition - adjustedStartPosition;
            var numBins      = (regionLength / messySiteWidth) + 1000;

            var totalBinCounts          = InitializeTotalBinCounts(numBins);
            var singleMismatchBinCounts = InitializeSingleMismatchBinCounts(numBins);

            var actBlockFactory = _actionBlockFactoryProvider.GetFactory(startPosition, endPosition,
                                                                         adjustedStartPosition, totalBinCounts, singleMismatchBinCounts, numBins, allToWaitFor);

            foreach (var classification in classifications)
            {
                var toWaitFor = GetAndLinkPerClassificationBlocksWithEcFinalization(pairClassifierBlock, classification, indelLookup);

                var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification);
                var categoryIsRealignable = _categoriesForRealignment.Contains(classification);

                // Even if we're not going to realign these reads, they may still be useful for bin evidence, so don't give them the immediate flush
                var shouldCollectBinEvidence = TypeClassifier.MessyTypes.Contains(classification) || TypeClassifier._indelTypes.Contains(classification);

                var isSingleMismatch = _geminiOptions.AvoidLikelySnvs && (classification == PairClassification.SingleMismatchStitched ||
                                                                          classification == PairClassification.UnstitchSingleMismatch);

                if (!(categoryIsRealignable || doStitch || shouldCollectBinEvidence))
                {
                    var actBlock = actBlockFactory.GetEarlyFlushBlock(classification, isSingleMismatch);

                    foreach (var transformBlock in toWaitFor)
                    {
                        transformBlock.LinkTo(actBlock, new DataflowLinkOptions()
                        {
                            PropagateCompletion = true
                        });
                    }

                    var toRemove = allToWaitFor.Keys.Where(x => x.IsCompleted);
                    foreach (var task in toRemove)
                    {
                        allToWaitFor.TryRemove(task, out _);
                    }
                    if (!allToWaitFor.TryAdd(actBlock.Completion, 1))
                    {
                        throw new Exception("Failed to add task.");
                    }
                }
                else
                {
                    var actBlock = actBlockFactory.GetActionablePairsBlock(classification, pairResultLookup);

                    foreach (var transformBlock in toWaitFor)
                    {
                        transformBlock.LinkTo(actBlock, new DataflowLinkOptions()
                        {
                            PropagateCompletion = true
                        });
                    }

                    if (!allToWaitFor.TryAdd(actBlock.Completion, 1))
                    {
                        throw new Exception("Failed to add task.");
                    }
                }
            }

            var finalTask = AggregateTask(indelLookup, startPosition, endPosition, isFinalTask, _progressTracker);
            var intermediateWriterTask = new TransformBlock <AggregateRegionResults, List <BamAlignment> >(results =>
            {
                edgeStates.AddOrUpdate(startPosition, results.EdgeState, (s, e) =>
                {
                    Logger.WriteWarningToLog($"Edge state already exists: {s}.");
                    return(results.EdgeState);
                });
                return(results.AlignmentsReadyToBeFlushed);
            }, new ExecutionDataflowBlockOptions()
            {
                EnsureOrdered = false
            });

            var finalWriteTask = actBlockFactory.GetWriterBlock();

            finalTask.LinkTo(intermediateWriterTask, new DataflowLinkOptions()
            {
                PropagateCompletion = true
            });
            intermediateWriterTask.LinkTo(finalWriteTask, new DataflowLinkOptions()
            {
                PropagateCompletion = true
            });

            if (edgeToWaitOn.ContainsKey(prevBlockStart))
            {
                if (!allToWaitFor.TryAdd(edgeToWaitOn[prevBlockStart], 1))
                {
                    throw new Exception("Failed to add task for previous edge.");
                }
            }
            else
            {
                Logger.WriteToLog($"At {startPosition}, prev block is {prevBlockStart}, nothing to wait on.");
            }

            if (!isFinalTask)
            {
                edgeToWaitOn.AddOrUpdate(startPosition, intermediateWriterTask.Completion, (s, e) =>
                {
                    Logger.WriteWarningToLog($"Edge state task already exists: {s}.");
                    return(intermediateWriterTask.Completion);
                });
            }

            var allTasks = allToWaitFor.Keys.ToList();
            var t        = Task.WhenAll(allTasks)
                           .ContinueWith(_ =>
            {
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Preparing for aggregation for region {_chrom}:{startPosition}-{endPosition}.");
                }

                if (allTasks.Any(x => x.Status != TaskStatus.RanToCompletion))
                {
                    Logger.WriteToLog("ERROR: Task did not complete.");

                    foreach (var task in allTasks)
                    {
                        Logger.WriteToLog($"{task.Id}\t{task.Status}\t{task.Exception}");
                        if (task.Status == TaskStatus.Faulted)
                        {
                            // Pass the exception along to the final task so it can be forced to error out.
                            finalTask = ForceFailFinalTask(intermediateWriterTask, task.Exception);
                        }
                    }
                }

                var numStillToProcess = 0;
                foreach (var item in pairResultLookup)
                {
                    effectiveMax       = Math.Max(effectiveMax, item.Value.Max(x => x.ReadPair.MaxPosition));
                    numStillToProcess += item.Value.Count;
                }

                if (_lightDebug)
                {
                    Logger.WriteToLog($"Preparing edge state info for region {_chrom}:{startPosition}-{endPosition}.");
                }

                EdgeState edgeState = null;
                var extraBins       = 0;
                if (edgeStates.ContainsKey(prevBlockStart))
                {
                    edgeStates.Remove(prevBlockStart, out edgeState);
                    if (edgeState.EdgeIndels.Any() || edgeState.EdgeAlignments.Any())
                    {
                        var newAdjustedStartPosition = RoundedStartPosition(Math.Min(adjustedStartPosition, edgeState.EffectiveMinPosition), messySiteWidth);
                        extraBins             = (adjustedStartPosition - newAdjustedStartPosition) / messySiteWidth;
                        adjustedStartPosition = newAdjustedStartPosition;
                    }
                }
                allToWaitFor.Clear();
                allTasks.Clear();


                if (_lightDebug)
                {
                    var totalReadsInRegion = _categoryLookup.Values.Sum();
                    Console.WriteLine($"STILL TO PROCESS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {numStillToProcess}");
                    Console.WriteLine(
                        $"READS IN REGION ({startPosition}-{endPosition} (eff:{effectiveMax})): {totalReadsInRegion}");
                    foreach (var kvp in _categoryLookup)
                    {
                        Console.WriteLine(
                            $"CATEGORYCOUNT ({startPosition}-{endPosition} (eff:{effectiveMax})): {kvp.Key}: {kvp.Value} ({Math.Round(kvp.Value * 100 / (float)totalReadsInRegion)}%)");
                    }
                }

                var totalNumBins       = numBins + extraBins;
                var allHits            = new uint[totalNumBins];
                var singleMismatchHits = new uint[totalNumBins];

                for (var i = 0; i < totalNumBins; i++)
                {
                    var newBin = i + extraBins;
                    if (newBin >= totalNumBins)
                    {
                        break;
                    }

                    if (totalBinCounts[i] > 0)
                    {
                        allHits[newBin]            = totalBinCounts[i];
                        singleMismatchHits[newBin] = singleMismatchBinCounts[i];
                    }
                }


                if (_lightDebug)
                {
                    Logger.WriteToLog(
                        $"Creating bin evidence for region {_chrom}:{startPosition}-{endPosition}.");
                }

                var binEvidence = _binEvidenceFactory.GetBinEvidence(totalNumBins, adjustedStartPosition);
                binEvidence.SetSingleMismatchHits(singleMismatchHits);
                binEvidence.AddAllHits(allHits);
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Adding edge hits for region {_chrom}:{startPosition}-{endPosition}.");
                }

                if (edgeState != null)
                {
                    var edgeBinInNew = binEvidence.GetBinId(edgeState.EffectiveMinPosition);
                    var edgeBinInOld = edgeState.BinEvidence.GetBinId(edgeState.EffectiveMinPosition);
                    AddEdgeHits(edgeState, binEvidence, edgeBinInOld, edgeBinInOld - edgeBinInNew);
                }
                if (_lightDebug)
                {
                    Logger.WriteToLog($"Done adding edge hits for region {_chrom}:{startPosition}-{endPosition}.");
                }

                var finalState = new RegionDataForAggregation()
                {
                    BinEvidence          = binEvidence,
                    PairResultLookup     = pairResultLookup,
                    EdgeState            = edgeState,
                    EffectiveMaxPosition = effectiveMax,
                    EffectiveMinPosition = adjustedStartPosition
                };

                finalTask.Post(finalState);
                finalTask.Complete();
            });

            return(new[] { t, finalWriteTask.Completion });
        }
コード例 #4
0
        private static List <HashableIndel> GetFinalizedIndelsForChrom(string chrom, Dictionary <string, List <HashableIndel> > finalizedIndels, EdgeState edgeState)
        {
            var finalizedIndelsForChrom = finalizedIndels.ContainsKey(chrom)
                ? finalizedIndels[chrom]
                : new List <HashableIndel>();

            if (edgeState != null)
            {
                finalizedIndelsForChrom.AddRange(edgeState.EdgeIndels);
                finalizedIndelsForChrom = finalizedIndelsForChrom.Distinct().ToList();
            }

            return(finalizedIndelsForChrom);
        }