private void PopulateEdgeHitsAndLogBins(int numBins, int adjustedStartPosition, int messySiteWidth, int edgeThreshold, IBinEvidence binEvidence, Dictionary <int, int> edgeHits, Dictionary <int, int> edgeSingleMismatchHits, Dictionary <int, int> edgeIndelHits, Dictionary <int, int> edgeMessyHits, int startPosition, BinConclusions binConclusions, UsableBins usableBins) { for (int binId = 0; binId < numBins; binId++) { var inEdge = false; var binStart = adjustedStartPosition + (binId * messySiteWidth); if (_geminiOptions.LogRegionsAndRealignments) { if (binEvidence.GetAllHits(binId) > 10 && !inEdge) { var binCounts = $"{binId},{inEdge},{binStart},{binStart + messySiteWidth},{binEvidence.GetAllHits(binId)},{usableBins.IsPositionUsable(binStart)},{binEvidence.GetSingleMismatchHit(binId)}," + $"{binConclusions.GetProbableTrueSnvRegion(binId)},{binEvidence.GetIndelHit(binId)},{binConclusions.GetIndelRegionHit(binId)}," + $"{binEvidence.GetMessyHit(binId)},{binConclusions.GetIsMessyEnough(binId)},{binEvidence.GetForwardMessyRegionHit(binId)},{binConclusions.GetFwdMessyStatus(binId)},{binEvidence.GetReverseMessyRegionHit(binId)},{binConclusions.GetRevMessyStatus(binId)},{binEvidence.GetMapqMessyHit(binId)},{binConclusions.GetMapqMessyStatus(binId)}"; // TODO consider writing this to a proper output file if (_geminiOptions.LogRegionsAndRealignments) { Logger.WriteToLog("BINCOUNTS\t" + binCounts); } } } } }
public BinConclusions(IBinEvidence binEvidence, bool collectDepth, bool trackDirectionalMess = false, bool trackMapqMess = false) { _binEvidence = binEvidence; _collectDepth = collectDepth; var numBins = _binEvidence.NumBins; _isMessyEnough = new SparseGroupedBoolBins(numBins); _indelRegions = new SparseGroupedBoolBins(numBins); if (trackDirectionalMess) { _fwdMessyStatus = new SparseGroupedBoolBins(numBins); _revMessyStatus = new SparseGroupedBoolBins(numBins); } else { _fwdMessyStatus = new DummyBins <bool>(); _revMessyStatus = new DummyBins <bool>(); } if (trackMapqMess) { _mapqMessyStatus = new SparseGroupedBoolBins(numBins); } else { _mapqMessyStatus = new DummyBins <bool>(); } //if (_avoidLikelySnvs) { _probableTrueSnvRegions = new SparseGroupedBoolBins(numBins, 10); } }
public void CombineBinEvidence(IBinEvidence evidence, int binOffset = 0, int startBinInOther = 0, int endBinInOther = int.MaxValue) { var binEvidence = evidence as BinEvidence; if (binEvidence == null) { throw new ArgumentException($"Not able to combine bin evidence between two different types."); } _indelHits.Merge(binEvidence._indelHits, binOffset, startBinInOther, endBinInOther); _messyHits.Merge(binEvidence._messyHits, binOffset, startBinInOther, endBinInOther); _singleMismatchHits.Merge(binEvidence._singleMismatchHits, binOffset, startBinInOther, endBinInOther); AllHits.Merge(binEvidence.AllHits, binOffset, startBinInOther, endBinInOther); _revOnlyMessyHits.Merge(binEvidence._revOnlyMessyHits, binOffset, startBinInOther, endBinInOther); _mapqMessyHits.Merge(binEvidence._mapqMessyHits, binOffset, startBinInOther, endBinInOther); _fwdOnlyMessyHits.Merge(binEvidence._fwdOnlyMessyHits, binOffset, startBinInOther, endBinInOther); }
private static void AddEdgeHits(EdgeState edgeState, IBinEvidence binEvidence2, int offset, int startInOld) { binEvidence2.CombineBinEvidence(edgeState.BinEvidence, offset, startInOld, edgeState.BinEvidence.NumBins); }
private List <BamAlignment> ProcessCategory( List <PairClassification> categoriesForRealignment, IChromosomeIndelSource indelSource, bool shouldRealignAtAll, Dictionary <HashableIndel, int[]> outcomesLookup, ref int numSkippedDueToSites, ref int numKept, ref int numRealigned, ref int numSilenced, List <PairResult> pairResults, PairClassification classification, IBinEvidence binEvidence, ConcurrentDictionary <string, int> progressTracker, BinConclusions binConclusions, UsableBins usableBins, int startPosition, int endPosition) { var allAlignments = new List <BamAlignment>(); var isHighLikelihoodForRealign = false; if (_geminiOptions.ForceHighLikelihoodRealigners) { var highLikelihoodCategories = new List <PairClassification>() { PairClassification.Disagree, PairClassification.MessyStitched, PairClassification.MessySplit, PairClassification.UnstitchMessy, PairClassification.UnstitchIndel }; isHighLikelihoodForRealign = highLikelihoodCategories.Contains(classification); } int alignmentsCount = 0; var doRealign = false; ReadPairRealignerAndCombiner realignHandler = null; var alreadyStitched = ClassificationIsStitched(classification); var doStitch = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification); var categoryIsRealignable = categoriesForRealignment.Contains(classification); if (categoryIsRealignable || doStitch) { doRealign = true; realignHandler = _bamRealignmentFactory.GetRealignPairHandler(doStitch, alreadyStitched, _realignmentOptions.PairAwareEverything || ClassificationIsPairAwareRealignable(classification), _refIdMapping, new ReadStatusCounter(), false, indelSource, _chrom, new Dictionary <string, IndelEvidence>(), ClassificationHasIndels(classification), outcomesLookup , SkipRestitchIfUnchanged(classification)); } using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference)) using (var singleSnippetSource = new ReusableSnippetSource(snippetSource)) { var nmCalculator = new NmCalculator(singleSnippetSource); var classificationString = classification.ToString(); foreach (var pairResult in pairResults) { int toSilence = 0; IEnumerable <BamAlignment> alignments; if (!doRealign) { alignments = pairResult.Alignments; } else { bool doRealignPair = shouldRealignAtAll && (isHighLikelihoodForRealign || (categoryIsRealignable && (usableBins.IsPositionUsable(pairResult.ReadPair.MinPosition) || usableBins.IsPositionUsable(pairResult.ReadPair.MaxPosition)))); if (!doRealignPair) { numSkippedDueToSites++; } else { numKept++; } toSilence = ReadsToSilence(classification, binConclusions, pairResult); if (toSilence > 0) { numSilenced++; } alignments = realignHandler.ExtractReads(pairResult, nmCalculator, doRealignPair, toSilence); if (pairResult.ReadPair.Realigned || pairResult.ReadPair.RealignedR1 || pairResult.ReadPair.RealignedR2) { numRealigned++; } } var silencedR1 = (toSilence == 1 || toSilence == 3) && !pairResult.ReadPair.RealignedR1; var silencedR2 = (toSilence == 2 || toSilence == 3) && !pairResult.ReadPair.RealignedR2; var readTreatment = ReadTreatment(silencedR1, silencedR2, pairResult); progressTracker.AddOrUpdate(classificationString + ":" + readTreatment, 1, (x, currentCount) => { return(currentCount + 1); }); var alignmentsList = alignments.ToList(); foreach (var bamAlignment in alignmentsList) { if (_geminiOptions.LightDebug) { AddMdTagCountsTags(bamAlignment, pairResult); } bamAlignment.ReplaceOrAddStringTag("XT", readTreatment); bamAlignment.ReplaceOrAddStringTag("XP", classificationString); } alignmentsCount += alignmentsList.Count(); allAlignments.AddRange(alignmentsList); } } if (realignHandler != null) { realignHandler.Finish(); } pairResults.Clear(); return(allAlignments); }