private RealignmentResult RealignForAnchor(HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, Read read, bool anchorOnLeft, ReadToRealignDetails details, bool pairSpecific, int[] indexes) { try { var freshCigarWithoutTerminalNs = new CigarAlignment(details.FreshCigarWithoutTerminalNs); var freshPositionMap = new PositionMap(details.PositionMapLength); for (int i = 0; i < details.PositionMapLength; i++) { freshPositionMap.UpdatePositionAtIndex(i, details.PositionMapWithoutTerminalNs.GetPositionAtIndex(i)); } var result = new RealignmentResult(); // layer on indels one by one, indels already sorted by ascending position if (LayerOnIndels(indels, indelContexts, anchorOnLeft, details.SequenceWithoutTerminalNs, freshPositionMap, ref result, pairSpecific)) { return(null); } var context = indelContexts[indels[0]]; // Softclip partial insertions at read ends if (_maskPartialInsertion || _minimumUnanchoredInsertionLength > 0) { MaskPartialInsertion(indels, read, context.Sequence, result, context.StartPosition); } _softclipReapplier.ReapplySoftclips(read, details.NPrefixLength, details.NSuffixLength, freshPositionMap, result, context, details.PrefixSoftclip, details.SuffixSoftclip, freshCigarWithoutTerminalNs); result.AcceptedIndels = new List <int>(); result.AcceptedHashableIndels = new List <HashableIndel>(); for (int i = 0; i < result.AcceptedIndelsInSubList.Count; i++) { // TODO do we need to be more nuanced about this and only do it in duplication areas? var currentSubIndex = result.AcceptedIndelsInSubList[i]; result.AcceptedIndels.Add(indexes[currentSubIndex]); var currentIndel = indels[currentSubIndex]; result.AcceptedHashableIndels.Add(currentIndel); if (currentIndel.Type == AlleleCategory.Deletion) { var addedAt = result.IndelsAddedAt[i]; var anchorStart = addedAt + 1; var lastOp = result.Cigar[result.Cigar.Count - 1]; var rightSoftclipLength = lastOp.Type == 'S' ? (int)lastOp.Length : 0; var rightAnchorLength = read.Sequence.Length - anchorStart - rightSoftclipLength; if (rightAnchorLength < currentIndel.Length && anchorStart < read.Sequence.Length) { if (read.Sequence.Substring(anchorStart, rightAnchorLength) == currentIndel.ReferenceAllele.Substring(1, rightAnchorLength)) { return(null); } } } } if (result.SumOfMismatchingQualities == null) { result.SumOfMismatchingQualities = Helper.GetSumOfMismatchQualities(read.Qualities, read.Sequence, freshPositionMap, context.Sequence, context.StartPosition); } result.Indels = string.Join("|", indels.Select(x => StringifyIndel(x))); return(result); } catch (Exception e) { if (_debug) { Logger.WriteExceptionToLog(new Exception($"Realign for anchor failed: read '{read.Name}' with indels {(string.Join("|", indels.Select(x => StringifyIndel(x))))}, anchoring on {(anchorOnLeft ? "left" : "right")}.", e)); } return(null); } }
public RealignmentResult GetBestAlignment(List <HashableIndel> rankedIndels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, Read read, out int attemptedTargetSides, bool fromPairSpecificIndels) { bool realign2 = true; RealignmentResult bestResultSoFar = null; attemptedTargetSides = 0; // Note this used to be in the loop... hopefully I'm not killing anything here... var nPrefixLength = read.GetNPrefix(); if (_keepProbeSoftclips) { if ((_keepBothSideSoftclips || !read.BamAlignment.IsReverseStrand() || !read.BamAlignment.IsPaired()) && nPrefixLength == 0) { nPrefixLength = (int)read.CigarData.GetPrefixClip(); } } var details = new ReadToRealignDetails(read, read.GetAdjustedPosition(true, probePrefix: _keepProbeSoftclips ? nPrefixLength : 0), _keepProbeSoftclips, _keepBothSideSoftclips); var positionFromRight = read.GetAdjustedPosition(false, probePrefix: _keepProbeSoftclips ? nPrefixLength : 0); ReadToRealignDetails rightAnchoredDetails = null; if (positionFromRight >= 0) { rightAnchoredDetails = new ReadToRealignDetails(read, positionFromRight, _keepProbeSoftclips, _keepBothSideSoftclips); } // align to all permutations of one indel, two indels, and three indels // try to skip alignment if we know it will fail for (var i = 0; i < rankedIndels.Count; i++) { var indel1 = rankedIndels[i]; var indexes = new int[] { i }; // try aligning to one indel _oneIndelSimpleTargets[0] = indel1; var indel1Result = RealignToTargets(read, _oneIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes); attemptedTargetSides += 2; // update best result so far for one indel bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel1Result); if (IsUnbeatable(bestResultSoFar)) { return(bestResultSoFar); } //if (bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0) //{ // return bestResultSoFar; // can't beat this //} if (realign2) { var indexes2 = new int[2]; for (var j = i + 1; j < rankedIndels.Count; j++) { var indel2 = rankedIndels[j]; if (!CanCoexist(indel1, indel2, fromPairSpecificIndels)) { continue; } _twoIndelSimpleTargets[0] = indel1; _twoIndelSimpleTargets[1] = indel2; indexes2[0] = i; indexes2[1] = j; Array.Sort(_twoIndelSimpleTargets, CompareSimple); // need to sort by position // for optimization, don't try to align from a given side if we already failed aligning the indel on that side var alreadyFailedFromLeft = indel1Result == null && _twoIndelSimpleTargets[0].Equals(indel1); var alreadyFailedFromRight = indel1Result == null && _twoIndelSimpleTargets[1].Equals(indel1); if (!alreadyFailedFromLeft) { attemptedTargetSides++; } if (!alreadyFailedFromRight) { attemptedTargetSides++; } var indel2Result = RealignToTargets(read, _twoIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes2, skipLeftAnchored: alreadyFailedFromLeft, skipRightAnchored: alreadyFailedFromRight); bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel2Result); } } } return(bestResultSoFar); }
public RealignmentResult RealignToTargets(Read read, HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, ReadToRealignDetails leftAnchoredDetails, ReadToRealignDetails rightAnchoredDetails, bool pairSpecific, int[] indexes, bool skipLeftAnchored = false, bool skipRightAnchored = false) { if (rightAnchoredDetails == null) { skipRightAnchored = true; } // when aligning with left anchor, if there's an insertion and a deletion at the same position // we need to process the insertion first. this is an artifact of how we adjust positions after an insertion // luckily this is already how they are sorted in the default sort function var resultLeftAnchored = skipLeftAnchored ? null : RealignForAnchor(indels, indelContexts, read, true, leftAnchoredDetails, pairSpecific, indexes); if (IsUnbeatable(resultLeftAnchored)) { return(resultLeftAnchored); } // when aligning with right anchor, if there's an insertion and a deletion at the same position // we need to process the deletion first. // this is because the position of indels are reported on the left side of the indel, and the deletion // could have adjusted the other side positions such that an insertion comes into view (which otherwise might not) var resultRightAnchored = skipRightAnchored ? null : RealignForAnchor(indels, indelContexts, read, false, rightAnchoredDetails, pairSpecific, indexes); var betterResult = _comparer.GetBetterResult(resultLeftAnchored, resultRightAnchored); if (betterResult != null) { betterResult.FailedForLeftAnchor = resultLeftAnchored == null; betterResult.FailedForRightAnchor = resultRightAnchored == null; } return(betterResult); }