public bool RealignmentIsUnchanged(RealignmentResult realignResult, BamAlignment originalAlignment) { if (realignResult.Position - 1 != originalAlignment.Position) { return(false); } if (realignResult.Cigar.Count != originalAlignment.CigarData.Count) { return(false); } for (int i = 0; i < realignResult.Cigar.Count; i++) { if (realignResult.Cigar[i].Type != originalAlignment.CigarData[i].Type) { return(false); } if (realignResult.Cigar[i].Length != originalAlignment.CigarData[i].Length) { return(false); } } return(true); }
private bool ResultIsGoodEnough(RealignmentResult realignResult, BamAlignment origBamAlignment, AlignmentSummary originalAlignmentSummary, bool realignmentUnchanged, bool isPairAware) { if (realignmentUnchanged) { if (realignResult.NifiedAt.Any()) { return(true); } _statusCounter.AppendStatusStringTag("RX", "Not taking realignment: unchanged", origBamAlignment); _statusCounter.AddStatusCount("Not taking realignment: unchanged"); return(false); } if (!_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, isPairAware)) { _statusCounter.AppendStatusStringTag("RX", $"Realignment failed:not better ({originalAlignmentSummary.Cigar}->{realignResult.Cigar}): {realignResult.Conclusion}", origBamAlignment); _statusCounter.UpdateStatusStringTag("OS", $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q", origBamAlignment); _statusCounter.UpdateStatusStringTag("RS", $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q", origBamAlignment); _statusCounter.AddStatusCount("Not taking realignment: not better"); return(false); } return(true); }
private void AddStatusInfo(BamAlignment origBamAlignment, List <PreIndel> selectedIndels, List <PreIndel> existingIndels, RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, AlignmentSummary originalAlignmentSummary) { _statusCounter.AddStatusCount("INDEL STATUS\tAccepted\t" + realignResult.Indels); _statusCounter.AddStatusCount($"Successfully realigned (ps: {selectedIndels != null})"); _statusCounter.AppendStatusStringTag("RX", $"Successfully realigned after {realignResult.Attempts} attempts, indel is {string.Join("|", realignResult.AcceptedIndels)}", bamAlignment); if (existingIndels != null && existingIndels.Any()) { _statusCounter.AppendStatusStringTag("RX", $"Orig indels:{string.Join("|", existingIndels)}__New indels:{realignResult.Indels}", bamAlignment); _statusCounter.AddStatusCount( $"Replaced existing indels (nonsanctioned: {hasExistingUnsanctionedIndels})"); } bamAlignment.ReplaceOrAddStringTag("OC", $"{origBamAlignment.CigarData}"); bamAlignment.ReplaceOrAddStringTag("OS", $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q"); bamAlignment.ReplaceOrAddStringTag("RS", $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q"); }
private void HandleAcceptedRealignment(BamAlignment origBamAlignment, List <PreIndel> selectedIndels, List <PreIndel> existingIndels, RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, AlignmentSummary originalAlignmentSummary) { bamAlignment.Position = realignResult.Position - 1; // 0 base bamAlignment.CigarData = realignResult.Cigar; if (_lightDebug) { AddStatusInfo(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment, hasExistingUnsanctionedIndels, originalAlignmentSummary); } _statusCounter.AppendStatusStringTag("RC", bamAlignment.GetStringTag("RC"), bamAlignment); if (bamAlignment.MapQuality <= 20 && realignResult.NumMismatches == 0 && (_allowRescoringOrig0 || bamAlignment.MapQuality > 0)) { bamAlignment.MapQuality = 40; // todo what to set this to? } // Nify if using pair-specific indels if (realignResult.NifiedAt != null && realignResult.NifiedAt.Any()) { foreach (var i in realignResult.NifiedAt) { bamAlignment.Qualities[i] = 0; } _statusCounter.AddStatusCount( $"Successfully realigned with mismatch-insertion quality adjusted (ps: {selectedIndels != null})"); _statusCounter.AppendStatusStringTag("RX", $"Successfully realigned with mismatch-insertion quality adjusted ({string.Join(",", realignResult.NifiedAt)}", bamAlignment); } }
private Mock <IReadRealigner> GetMockReadRealigner(RealignmentResult result, List <HashableIndel> callbackIndelsList) { var readRealigner = new Mock <IReadRealigner>(); readRealigner.Setup(x => x.Realign(It.IsAny <Read>(), It.IsAny <List <HashableIndel> >(), It.IsAny <Dictionary <HashableIndel, GenomeSnippet> >(), It.IsAny <bool>(), It.IsAny <int>())).Returns <Read, List <HashableIndel>, Dictionary <HashableIndel, GenomeSnippet>, bool, int>((r, i, g, ps, m) => result).Callback <Read, List <HashableIndel>, Dictionary <HashableIndel, GenomeSnippet>, bool, int>((r, i, g, ps, m) => callbackIndelsList.AddRange(i)); return(readRealigner); }
private BamAlignment AcceptRealignment(BamAlignment origBamAlignment, out bool changed, List <PreIndel> selectedIndels, List <PreIndel> existingIndels, RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, out bool confirmed) { HandleAcceptedRealignment(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment, hasExistingUnsanctionedIndels, originalAlignmentSummary); confirmed = false; changed = true; return(bamAlignment); }
public void GetBetterResult() { var comparer = new BasicAlignmentComparer(); var preferred = new RealignmentResult(); var other = new RealignmentResult(); Assert.Equal(preferred, comparer.GetBetterResult(preferred, other)); Assert.Equal(other, comparer.GetBetterResult(other, preferred)); Assert.Equal(preferred, comparer.GetBetterResult(preferred, null)); Assert.Equal(preferred, comparer.GetBetterResult(null, preferred)); Assert.Equal(null, comparer.GetBetterResult(null, null)); preferred.NumMismatches = 5; Assert.Equal(other, comparer.GetBetterResult(preferred, other)); }
public RealignmentResult GetBetterResult(RealignmentResult preferred, RealignmentResult other) { if (preferred != null && other != null) { return(CompareAlignments(preferred, other) >= 0 ? preferred : other); // prefer first if equal } if (preferred != null) { return(preferred); } if (other != null) { return(other); } return(null); }
public static bool AttemptedAddingIndelInUnanchoredRepeat(Read read, RealignmentResult result, List <HashableIndel> indels) { { // TODO OBO should this be checking <= or < ? var rptPrefix = read.GetMonoRepeatPrefix(); if (rptPrefix > 3 && result.IndelsAddedAt.Min() <= rptPrefix) { return(true); } var rptSuffix = read.GetMonoRepeatSuffix(); var lastIndel = indels[result.AcceptedIndels.Last()]; if (rptSuffix > 3) { if (lastIndel.Type == AlleleCategory.Insertion) { if (read.ReadLength - result.IndelsAddedAt.Max() <= rptSuffix && rptSuffix <= lastIndel.NumBasesInReferenceSuffixBeforeUnique && read.Sequence.Substring(read.Sequence.Length - rptSuffix, rptSuffix) == lastIndel.RefSuffix.Substring(0, rptSuffix)) { return(true); } } else { if (read.ReadLength - result.IndelsAddedAt.Max() - 1 <= rptSuffix && rptSuffix <= lastIndel.NumBasesInReferenceSuffixBeforeUnique && read.Sequence.Substring(read.Sequence.Length - rptSuffix, rptSuffix) == lastIndel.RefSuffix.Substring(0, rptSuffix) ) { return(true); } } //return true; } } return(false); }
private void HandleFailedRealignment(BamAlignment origBamAlignment, ref bool forcedSoftclip, List <PreIndel> existingIndels, RealignmentResult realignResult, bool hasExistingUnsanctionedIndels, List <PreIndel> existingMatches) { _statusCounter.AddStatusCount("INDEL STATUS\tRejected\t" + realignResult.Indels); _statusCounter.AppendStatusStringTag("RX", "Did not accept: " + realignResult.Indels, origBamAlignment); // TODO could this be happening because of a low-ranked indel? Maybe we should be allowing to realign against all indels... // TODO STILL should this actually be happening also to reads that had no indels to realign around (i.e. started with weak indel, and couldn't go anywhere), not just the ones that were changed? if (_softclipUnknownIndels && hasExistingUnsanctionedIndels) { var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x)); foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition)) { var reverseClip = false; var clipLength = preIndel.RightAnchor; if (preIndel.LeftAnchor < preIndel.RightAnchor) { reverseClip = true; clipLength = preIndel.LeftAnchor; } // TODO arbitrary number here... // If it's pretty well-anchored, don't remove the indel if (clipLength > 20) { continue; } forcedSoftclip = true; _statusCounter.AddStatusCount("Softclipped out bad indel"); _statusCounter.AppendStatusStringTag("RX", $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}...{realignResult?.Indels}", origBamAlignment); _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels)); OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment, reverseClip, preIndel.ReferencePosition); } } }
private RealignmentResult GetResult(string cigar) { var result = new RealignmentResult() { Cigar = new CigarAlignment(cigar), IndelsAddedAt = new List <int>() { 8 }, NifiedAt = new List <int>(), AcceptedIndels = new List <int>() { 0 }, AcceptedIndelsInSubList = new List <int>() { 0 } }; return(result); }
private bool RealignmentBetterOrEqual(RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary) { return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary) >= 0); }
private bool RealignmentIsUnchanged(RealignmentResult realignResult, BamAlignment originalAlignment) { return(realignResult.Position - 1 == originalAlignment.Position && realignResult.Cigar.ToString() == originalAlignment.CigarData.ToString()); }
public void ReapplySoftclips(Read read, int nPrefixLength, int nSuffixLength, PositionMap positionMapWithoutTerminalNs, RealignmentResult result, GenomeSnippet context, uint prefixSoftclip, uint suffixSoftclip, CigarAlignment freshCigarWithoutTerminalNs) { // Re-append the N-prefix var nPrefixPositionMap = Enumerable.Repeat(-1, nPrefixLength); var nSuffixPositionMap = Enumerable.Repeat(-1, nSuffixLength); // TODO maybe have a function for combining pos maps instead var finalPositionMap = new PositionMap(nPrefixPositionMap.Concat(positionMapWithoutTerminalNs.Map).Concat(nSuffixPositionMap).ToArray()); var finalCigar = new CigarAlignment { new CigarOp('S', (uint)nPrefixLength) }; foreach (CigarOp op in result.Cigar) { finalCigar.Add(op); } finalCigar.Add(new CigarOp('S', (uint)nSuffixLength)); finalCigar.Compress(); result.Cigar = finalCigar; // In case realignment introduced a bunch of mismatch-Ms where there was previously softclipping, optionally re-mask them. if (result != null && _remaskSoftclips) { var mismatchMap = Helper.GetMismatchMap(read.Sequence, finalPositionMap, context.Sequence, context.StartPosition); var softclipAdjustedCigar = Helper.SoftclipCigar(result.Cigar, mismatchMap, prefixSoftclip, suffixSoftclip, maskNsOnly: _maskNsOnly, prefixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', false), suffixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', true), softclipEvenIfMatch: _keepProbeSoftclips || _keepBothSideSoftclips, softclipRepresentsMess: (!(_keepBothSideSoftclips || _keepProbeSoftclips))); // Update position map to account for any softclipping added var adjustedPrefixClip = softclipAdjustedCigar.GetPrefixClip(); for (var i = 0; i < adjustedPrefixClip; i++) { finalPositionMap.UpdatePositionAtIndex(i, -2, true); } var adjustedSuffixClip = softclipAdjustedCigar.GetSuffixClip(); for (var i = 0; i < adjustedSuffixClip; i++) { finalPositionMap.UpdatePositionAtIndex(finalPositionMap.Length - 1 - i, -2, true); } var editDistance = Helper.GetNumMismatches(read.Sequence, finalPositionMap, context.Sequence, context.StartPosition); if (editDistance == null) { // This shouldn't happen at this point - we already have a successful result throw new InvalidDataException("Edit distance is null for :" + read.Name + " with position map " + string.Join(",", finalPositionMap) + " and CIGAR " + softclipAdjustedCigar); } // TODO PERF - See how much this really helps analytically. I'm thinking maybe kill this altogether and remove from eval var sumOfMismatching = Helper.GetSumOfMismatchQualities(mismatchMap, read.Qualities); var readHasPosition = finalPositionMap.HasAnyMappableBases(); if (!readHasPosition) { throw new InvalidDataException(string.Format( "Read does not have any alignable bases. ({2} --> {0} --> {3}, {1})", freshCigarWithoutTerminalNs, string.Join(",", finalPositionMap), read.CigarData, softclipAdjustedCigar)); } result.Position = finalPositionMap.FirstMappableBase(); // TODO this used to be >= 0 but changed to > 0. Confirm correct. result.Cigar = softclipAdjustedCigar; result.NumMismatches = editDistance.Value; var addedAtFinal = new List <int>(); foreach (var i in result.IndelsAddedAt) { addedAtFinal.Add(i + nPrefixLength); } result.IndelsAddedAt = addedAtFinal; var nifiedAtFinal = new List <int>(); foreach (var i in result.NifiedAt) { nifiedAtFinal.Add(i + nPrefixLength); } result.NifiedAt = nifiedAtFinal; var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - context.StartPosition, result.Cigar, context.Sequence, read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches); result.NumNonNMismatches = newSummary.NumNonNMismatches; result.NumNonNSoftclips = newSummary.NumNonNSoftclips; result.NumSoftclips = newSummary.NumSoftclips; result.NumInsertedBases = newSummary.NumInsertedBases; result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip; //result.MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip; result.SumOfMismatchingQualities = sumOfMismatching; result.AnchorLength = newSummary.AnchorLength; } }
public bool RealignmentBetterOrEqual(RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary, bool isPairAware) { return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary, isPairAware) >= 0); }
public RealignmentResult GetBestAlignment(List <HashableIndel> rankedIndels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, Read read, out int attemptedTargetSides, bool fromPairSpecificIndels) { bool realign2 = true; RealignmentResult bestResultSoFar = null; attemptedTargetSides = 0; // Note this used to be in the loop... hopefully I'm not killing anything here... var nPrefixLength = read.GetNPrefix(); if (_keepProbeSoftclips) { if ((_keepBothSideSoftclips || !read.BamAlignment.IsReverseStrand() || !read.BamAlignment.IsPaired()) && nPrefixLength == 0) { nPrefixLength = (int)read.CigarData.GetPrefixClip(); } } var details = new ReadToRealignDetails(read, read.GetAdjustedPosition(true, probePrefix: _keepProbeSoftclips ? nPrefixLength : 0), _keepProbeSoftclips, _keepBothSideSoftclips); var positionFromRight = read.GetAdjustedPosition(false, probePrefix: _keepProbeSoftclips ? nPrefixLength : 0); ReadToRealignDetails rightAnchoredDetails = null; if (positionFromRight >= 0) { rightAnchoredDetails = new ReadToRealignDetails(read, positionFromRight, _keepProbeSoftclips, _keepBothSideSoftclips); } // align to all permutations of one indel, two indels, and three indels // try to skip alignment if we know it will fail for (var i = 0; i < rankedIndels.Count; i++) { var indel1 = rankedIndels[i]; var indexes = new int[] { i }; // try aligning to one indel _oneIndelSimpleTargets[0] = indel1; var indel1Result = RealignToTargets(read, _oneIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes); attemptedTargetSides += 2; // update best result so far for one indel bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel1Result); if (IsUnbeatable(bestResultSoFar)) { return(bestResultSoFar); } //if (bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0) //{ // return bestResultSoFar; // can't beat this //} if (realign2) { var indexes2 = new int[2]; for (var j = i + 1; j < rankedIndels.Count; j++) { var indel2 = rankedIndels[j]; if (!CanCoexist(indel1, indel2, fromPairSpecificIndels)) { continue; } _twoIndelSimpleTargets[0] = indel1; _twoIndelSimpleTargets[1] = indel2; indexes2[0] = i; indexes2[1] = j; Array.Sort(_twoIndelSimpleTargets, CompareSimple); // need to sort by position // for optimization, don't try to align from a given side if we already failed aligning the indel on that side var alreadyFailedFromLeft = indel1Result == null && _twoIndelSimpleTargets[0].Equals(indel1); var alreadyFailedFromRight = indel1Result == null && _twoIndelSimpleTargets[1].Equals(indel1); if (!alreadyFailedFromLeft) { attemptedTargetSides++; } if (!alreadyFailedFromRight) { attemptedTargetSides++; } var indel2Result = RealignToTargets(read, _twoIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes2, skipLeftAnchored: alreadyFailedFromLeft, skipRightAnchored: alreadyFailedFromRight); bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel2Result); } } } return(bestResultSoFar); }
private bool IsUnbeatable(RealignmentResult bestResultSoFar) { return(bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0 && bestResultSoFar.NumMismatchesIncludeSoftclip == 0); }
public void MaskPartialInsertion(HashableIndel[] indels, Read read, string refSequence, RealignmentResult result, int refSequenceStartIndex = 0) { // Softclip partial insertions at read ends // Assumption: there should be no softclips in the cigar by this time // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar". var firstIndel = indels[0]; var lastIndel = indels[indels.Length - 1]; bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion); if (hasInsertion) { if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion) { var newCigar = new CigarAlignment { }; for (int i = 0; i < result.Cigar.Count; i++) { if (result.Cigar[i].Type == 'S') { throw new InvalidDataException( string.Format( "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar)); } else if (i == 0 && EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion)) { newCigar.Add(new CigarOp('S', result.Cigar[i].Length)); } else if (i == result.Cigar.Count - 1 && EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion)) { newCigar.Add(new CigarOp('S', result.Cigar[i].Length)); } else { newCigar.Add(result.Cigar[i]); } } newCigar.Compress(); result.Cigar = newCigar; } } var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - refSequenceStartIndex, result.Cigar, refSequence, read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches); result.NumIndels = newSummary.NumIndels; result.NumNonNMismatches = newSummary.NumNonNMismatches; result.NumNonNSoftclips = newSummary.NumNonNSoftclips; result.NumSoftclips = newSummary.NumSoftclips; result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip; result.NumIndelBases = newSummary.NumIndelBases; result.NumInsertedBases = newSummary.NumInsertedBases; }
private RealignmentResult GetBestAlignment(List <CandidateIndel> rankedIndels, Read read, string refSequence, HashSet <Tuple <string, string, string> > indelCandidateGroups, out int attemptedTargetSides) { RealignmentResult bestResultSoFar = null; attemptedTargetSides = 0; // align to all permutations of one indel, two indels, and three indels // try to skip alignment if we know it will fail for (var i = 0; i < rankedIndels.Count; i++) { var indel1 = rankedIndels[i]; // try aligning to one indel _oneIndelTargets[0] = rankedIndels[i]; var indel1Result = RealignToTargets(read, _oneIndelTargets, refSequence); attemptedTargetSides += 2; // update best result so far for one indel bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel1Result); if (bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0) { return(bestResultSoFar); // can't beat this } // Do not realign to >1 indels if we haven't seen any coexisting indels. if (indelCandidateGroups == null) { continue; } if (indelCandidateGroups.Count == 0) { continue; } for (var j = i + 1; j < rankedIndels.Count; j++) { var indel2 = rankedIndels[j]; var indelPair = new List <CandidateIndel> { indel1, indel2 }.OrderBy(g => g.ReferencePosition).ThenBy(t => t.ReferenceAllele).Select(x => x.ToString()).ToList(); if (indelCandidateGroups.Contains(new Tuple <string, string, string>(indelPair[0], indelPair[1], null))) { if (!CanCoexist(indel1, indel2)) { continue; } _twoIndelTargets[0] = indel1; _twoIndelTargets[1] = indel2; Array.Sort(_twoIndelTargets, Compare); // need to sort by position // for optimization, don't try to align from a given side if we already failed aligning the indel on that side var alreadyFailedFromLeft = indel1Result == null && _twoIndelTargets[0] == indel1; var alreadyFailedFromRight = indel1Result == null && _twoIndelTargets[1] == indel1; if (!alreadyFailedFromLeft) { attemptedTargetSides++; } if (!alreadyFailedFromRight) { attemptedTargetSides++; } var indel2Result = RealignToTargets(read, _twoIndelTargets, refSequence, alreadyFailedFromLeft, alreadyFailedFromRight); bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel2Result); } if (_tryThree) { for (var k = j + 1; k < rankedIndels.Count; k++) { var indel3 = rankedIndels[k]; var indelList = new List <CandidateIndel> { indel1, indel2, indel3 }.OrderBy(g => g.ReferencePosition).ThenBy(t => t.ReferenceAllele).Select(x => x.ToString()).ToList(); bool groupCoexist = indelCandidateGroups.Contains(new Tuple <string, string, string>(indelList[0], indelList[1], indelList[2])); if (!groupCoexist) { continue; } if (!(CanCoexist(indel1, indel3) && CanCoexist(indel2, indel3))) { continue; } // only try to realign to three indels if bestResultSoFar is not good enough if (NeedBetter(bestResultSoFar)) { _threeIndelTargets[0] = indel1; _threeIndelTargets[1] = indel2; _threeIndelTargets[2] = indel3; Array.Sort(_threeIndelTargets, Compare); // need to sort by position var indel3Result = RealignToTargets(read, _threeIndelTargets, refSequence); bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel3Result); } } } } } return(bestResultSoFar); }
private bool RealignmentIsWithinRange(RealignmentResult realignResult, BamAlignment bamAlignment) { return(Math.Abs((realignResult.Position - 1) - bamAlignment.Position) < _maxRealignShift); }
public void GetBetterResult() { var perfect = new RealignmentResult(); var oneIndel = new RealignmentResult() { NumIndels = 1 }; var twoIndels = new RealignmentResult() { NumIndels = 2, }; var oneMismatch = new RealignmentResult() { NumMismatches = 1 }; var twoMismatches = new RealignmentResult() { NumMismatches = 2, }; var oneIndelOneMismatch = new RealignmentResult() { NumIndels = 1, NumMismatches = 1 }; var comparer = new ScoredAlignmentComparer(new AlignmentScorer() { MismatchCoefficient = -1, IndelCoefficient = -1 }); Assert.Equal(perfect, comparer.GetBetterResult(perfect, oneIndel)); Assert.Equal(perfect, comparer.GetBetterResult(perfect, twoIndels)); Assert.Equal(perfect, comparer.GetBetterResult(perfect, oneMismatch)); Assert.Equal(perfect, comparer.GetBetterResult(perfect, twoMismatches)); Assert.Equal(perfect, comparer.GetBetterResult(perfect, oneIndelOneMismatch)); // For ties, prefer the first one Assert.Equal(oneMismatch, comparer.GetBetterResult(oneMismatch, oneIndel)); Assert.Equal(oneIndel, comparer.GetBetterResult(oneIndel, oneMismatch)); Assert.Equal(twoIndels, comparer.GetBetterResult(twoIndels, twoMismatches)); Assert.Equal(twoIndels, comparer.GetBetterResult(twoIndels, oneIndelOneMismatch)); Assert.Equal(oneIndelOneMismatch, comparer.GetBetterResult(oneIndelOneMismatch, twoIndels)); // Prefer the less negative score Assert.Equal(oneIndel, comparer.GetBetterResult(twoIndels, oneIndel)); Assert.Equal(oneMismatch, comparer.GetBetterResult(twoIndels, oneMismatch)); Assert.Equal(oneIndel, comparer.GetBetterResult(twoMismatches, oneIndel)); Assert.Equal(oneMismatch, comparer.GetBetterResult(twoMismatches, oneMismatch)); Assert.Equal(oneIndel, comparer.GetBetterResult(oneIndelOneMismatch, oneIndel)); Assert.Equal(oneMismatch, comparer.GetBetterResult(oneIndelOneMismatch, oneMismatch)); // Weight unevenly comparer = new ScoredAlignmentComparer(new AlignmentScorer() { MismatchCoefficient = -2, IndelCoefficient = -1 }); Assert.Equal(oneIndel, comparer.GetBetterResult(oneMismatch, oneIndel)); Assert.Equal(twoIndels, comparer.GetBetterResult(twoIndels, oneMismatch)); Assert.Equal(oneMismatch, comparer.GetBetterResult(oneMismatch, twoIndels)); // same score, take first Assert.Equal(oneIndel, comparer.GetBetterResult(twoMismatches, oneIndel)); Assert.Equal(oneMismatch, comparer.GetBetterResult(twoMismatches, oneMismatch)); Assert.Equal(oneIndel, comparer.GetBetterResult(oneIndelOneMismatch, oneIndel)); Assert.Equal(oneMismatch, comparer.GetBetterResult(oneIndelOneMismatch, oneMismatch)); }
private RealignmentResult RealignForAnchor(CandidateIndel[] indels, Read read, string refSequence, bool anchorOnLeft) { var position = read.GetAdjustedPosition(anchorOnLeft); var freshCigarWithoutTerminalNs = new CigarAlignment(); var nPrefixLength = read.GetNPrefix(); var nSuffixLength = read.GetNSuffix(); // Only build up the cigar for the non-N middle. Add the N prefix back on after the realignment attempts. freshCigarWithoutTerminalNs.Add(new CigarOp('M', (uint)(read.Sequence.Length - nPrefixLength - nSuffixLength))); freshCigarWithoutTerminalNs.Compress(); // start with fresh position map var positionMapWithoutTerminalNs = new int[read.ReadLength - nPrefixLength - nSuffixLength]; Read.UpdatePositionMap(position, freshCigarWithoutTerminalNs, positionMapWithoutTerminalNs); var prefixSoftclip = read.CigarData.GetPrefixClip(); var suffixSoftclip = read.CigarData.GetSuffixClip(); RealignmentResult result = null; var sequenceWithoutTerminalNs = read.Sequence.Substring(nPrefixLength, read.Sequence.Length - nPrefixLength - nSuffixLength); // layer on indels one by one, indels already sorted by ascending position if (anchorOnLeft) { for (var i = 0; i < indels.Length; i++) { result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], refSequence, true, positionMapWithoutTerminalNs); if (result == null) { return(null); } } } else { for (var i = indels.Length - 1; i >= 0; i--) { result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], refSequence, false, positionMapWithoutTerminalNs); if (result == null) { return(null); } } } // Softclip partial insertions at read ends // Assumption: there should be no softclips in the cigar by this time // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar". var firstIndel = indels[0]; var lastIndel = indels[indels.Length - 1]; bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion); if (hasInsertion) { if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion) { var newCigar = new CigarAlignment { }; for (int i = 0; i < result.Cigar.Count; i++) { if (result.Cigar[i].Type == 'S') { throw new InvalidDataException( string.Format( "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar)); } else if (i == 0 && Helper.EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion)) { newCigar.Add(new CigarOp('S', result.Cigar[i].Length)); } else if (i == result.Cigar.Count - 1 && Helper.EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion)) { newCigar.Add(new CigarOp('S', result.Cigar[i].Length)); } else { newCigar.Add(result.Cigar[i]); } } newCigar.Compress(); result.Cigar = newCigar; } } // Re-append the N-prefix var nPrefixPositionMap = Enumerable.Repeat(-1, nPrefixLength); var nSuffixPositionMap = Enumerable.Repeat(-1, nSuffixLength); var finalPositionMap = nPrefixPositionMap.Concat(positionMapWithoutTerminalNs).Concat(nSuffixPositionMap).ToArray(); var finalCigar = new CigarAlignment { new CigarOp('S', (uint)nPrefixLength) }; foreach (CigarOp op in result.Cigar) { finalCigar.Add(op); } finalCigar.Add(new CigarOp('S', (uint)nSuffixLength)); finalCigar.Compress(); result.Cigar = finalCigar; var UpdatedSummary = Extensions.GetAlignmentSummary(result.Position - 1, result.Cigar, refSequence, read.Sequence); result.NumIndels = UpdatedSummary.NumIndels; result.NumNonNMismatches = UpdatedSummary.NumNonNMismatches; result.NumMismatchesIncludeSoftclip = UpdatedSummary.NumMismatchesIncludeSoftclip; result.NumNonNSoftclips = UpdatedSummary.NumNonNSoftclips; result.NumSoftclips = UpdatedSummary.NumSoftclips; result.NumIndelBases = UpdatedSummary.NumIndelBases; result.MismatchesIncludeSoftclip = UpdatedSummary.MismatchesIncludeSoftclip; result.HasHighFrequencyIndel = indels.Any(t => t.Frequency > HighFrequencyIndelCutoff); // In case realignment introduced a bunch of mismatch-Ms where there was previously softclipping, optionally re-mask them. if (result != null && _remaskSoftclips) { var mismatchMap = Helper.GetMismatchMap(read.Sequence, finalPositionMap, refSequence); var softclipAdjustedCigar = Helper.SoftclipCigar(result.Cigar, mismatchMap, prefixSoftclip, suffixSoftclip, maskNsOnly: true, prefixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', false), suffixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', true)); // Update position map to account for any softclipping added var adjustedPrefixClip = softclipAdjustedCigar.GetPrefixClip(); for (var i = 0; i < adjustedPrefixClip; i++) { finalPositionMap[i] = -2; } var adjustedSuffixClip = softclipAdjustedCigar.GetSuffixClip(); for (var i = 0; i < adjustedSuffixClip; i++) { finalPositionMap[finalPositionMap.Length - 1 - i] = -2; } var editDistance = Helper.GetEditDistance(read.Sequence, finalPositionMap, refSequence); if (editDistance == null) { // This shouldn't happen at this point - we already have a successful result throw new InvalidDataException("Edit distance is null for :" + read.Name + " with position map " + string.Join(",", finalPositionMap) + " and CIGAR " + softclipAdjustedCigar); } var readHasPosition = finalPositionMap.Any(p => p >= 0); if (!readHasPosition) { throw new InvalidDataException(string.Format("Read does not have any alignable bases. ({2} --> {0} --> {3}, {1})", freshCigarWithoutTerminalNs, string.Join(",", finalPositionMap), read.CigarData, softclipAdjustedCigar)); } result.Position = finalPositionMap.First(p => p >= 0); result.Cigar = softclipAdjustedCigar; result.NumMismatches = editDistance.Value; var newSummary = Extensions.GetAlignmentSummary(result.Position - 1, result.Cigar, refSequence, read.Sequence); result.NumNonNMismatches = newSummary.NumNonNMismatches; result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip; result.NumNonNSoftclips = newSummary.NumNonNSoftclips; result.NumSoftclips = newSummary.NumSoftclips; result.NumIndelBases = newSummary.NumIndelBases; result.MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip; result.HasHighFrequencyIndel = indels.Any(t => t.Frequency > HighFrequencyIndelCutoff); result.NumIndelBases = UpdatedSummary.NumIndelBases; } return(result); }
private RealignmentResult RealignForAnchor(HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, Read read, bool anchorOnLeft, ReadToRealignDetails details, bool pairSpecific, int[] indexes) { try { var freshCigarWithoutTerminalNs = new CigarAlignment(details.FreshCigarWithoutTerminalNs); var freshPositionMap = new PositionMap(details.PositionMapLength); for (int i = 0; i < details.PositionMapLength; i++) { freshPositionMap.UpdatePositionAtIndex(i, details.PositionMapWithoutTerminalNs.GetPositionAtIndex(i)); } var result = new RealignmentResult(); // layer on indels one by one, indels already sorted by ascending position if (LayerOnIndels(indels, indelContexts, anchorOnLeft, details.SequenceWithoutTerminalNs, freshPositionMap, ref result, pairSpecific)) { return(null); } var context = indelContexts[indels[0]]; // Softclip partial insertions at read ends if (_maskPartialInsertion || _minimumUnanchoredInsertionLength > 0) { MaskPartialInsertion(indels, read, context.Sequence, result, context.StartPosition); } _softclipReapplier.ReapplySoftclips(read, details.NPrefixLength, details.NSuffixLength, freshPositionMap, result, context, details.PrefixSoftclip, details.SuffixSoftclip, freshCigarWithoutTerminalNs); result.AcceptedIndels = new List <int>(); result.AcceptedHashableIndels = new List <HashableIndel>(); for (int i = 0; i < result.AcceptedIndelsInSubList.Count; i++) { // TODO do we need to be more nuanced about this and only do it in duplication areas? var currentSubIndex = result.AcceptedIndelsInSubList[i]; result.AcceptedIndels.Add(indexes[currentSubIndex]); var currentIndel = indels[currentSubIndex]; result.AcceptedHashableIndels.Add(currentIndel); if (currentIndel.Type == AlleleCategory.Deletion) { var addedAt = result.IndelsAddedAt[i]; var anchorStart = addedAt + 1; var lastOp = result.Cigar[result.Cigar.Count - 1]; var rightSoftclipLength = lastOp.Type == 'S' ? (int)lastOp.Length : 0; var rightAnchorLength = read.Sequence.Length - anchorStart - rightSoftclipLength; if (rightAnchorLength < currentIndel.Length && anchorStart < read.Sequence.Length) { if (read.Sequence.Substring(anchorStart, rightAnchorLength) == currentIndel.ReferenceAllele.Substring(1, rightAnchorLength)) { return(null); } } } } if (result.SumOfMismatchingQualities == null) { result.SumOfMismatchingQualities = Helper.GetSumOfMismatchQualities(read.Qualities, read.Sequence, freshPositionMap, context.Sequence, context.StartPosition); } result.Indels = string.Join("|", indels.Select(x => StringifyIndel(x))); return(result); } catch (Exception e) { if (_debug) { Logger.WriteExceptionToLog(new Exception($"Realign for anchor failed: read '{read.Name}' with indels {(string.Join("|", indels.Select(x => StringifyIndel(x))))}, anchoring on {(anchorOnLeft ? "left" : "right")}.", e)); } return(null); } }
private bool NeedBetter(RealignmentResult bestResultSoFar) { return(bestResultSoFar == null || bestResultSoFar.NumMismatches > 0); }
private bool LayerOnIndels(HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, bool anchorOnLeft, string sequenceWithoutTerminalNs, PositionMap positionMapWithoutTerminalNs, ref RealignmentResult result, bool pairSpecific) { var resultIndels = ""; var resultIndelIndexes = new List <int>(); var resultIndelsAddedAt = new List <int>(); var resultNifiedAt = new List <int>(); if (anchorOnLeft) { for (var i = 0; i < indels.Length; i++) { var snippet = GetContext(indels[i], indelContexts); result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], snippet.Sequence, true, positionMapWithoutTerminalNs, snippet.StartPosition, pairSpecific); if (result == null) { return(true); } resultIndels += result.Indels + "|"; resultIndelIndexes.Add(i); resultIndelsAddedAt.AddRange(result.IndelsAddedAt); resultNifiedAt.AddRange(result.NifiedAt); } } else { for (var i = indels.Length - 1; i >= 0; i--) { var snippet = GetContext(indels[i], indelContexts); result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], snippet.Sequence, false, positionMapWithoutTerminalNs, snippet.StartPosition, pairSpecific); if (result == null) { return(true); } resultIndels += result.Indels + "|"; resultIndelIndexes.Add(i); resultIndelsAddedAt.AddRange(result.IndelsAddedAt); resultNifiedAt.AddRange(result.NifiedAt); } } result.Indels = resultIndels; // TODO can we remove this? Think it gets overwritten later... result.AcceptedIndelsInSubList = resultIndelIndexes; result.NifiedAt = resultNifiedAt; result.IndelsAddedAt = resultIndelsAddedAt; return(false); }