private void AddStatusInfo(BamAlignment origBamAlignment, List <PreIndel> selectedIndels, List <PreIndel> existingIndels, RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, AlignmentSummary originalAlignmentSummary) { _statusCounter.AddStatusCount("INDEL STATUS\tAccepted\t" + realignResult.Indels); _statusCounter.AddStatusCount($"Successfully realigned (ps: {selectedIndels != null})"); _statusCounter.AppendStatusStringTag("RX", $"Successfully realigned after {realignResult.Attempts} attempts, indel is {string.Join("|", realignResult.AcceptedIndels)}", bamAlignment); if (existingIndels != null && existingIndels.Any()) { _statusCounter.AppendStatusStringTag("RX", $"Orig indels:{string.Join("|", existingIndels)}__New indels:{realignResult.Indels}", bamAlignment); _statusCounter.AddStatusCount( $"Replaced existing indels (nonsanctioned: {hasExistingUnsanctionedIndels})"); } bamAlignment.ReplaceOrAddStringTag("OC", $"{origBamAlignment.CigarData}"); bamAlignment.ReplaceOrAddStringTag("OS", $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q"); bamAlignment.ReplaceOrAddStringTag("RS", $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q"); }
public override int CompareAlignments(AlignmentSummary originalAlignmentSummary, AlignmentSummary realignResult) { var originalScore = _alignmentScorer.GetAlignmentScore(originalAlignmentSummary); var realignedScore = _alignmentScorer.GetAlignmentScore(realignResult); if (_debug) { var origScoreString = originalAlignmentSummary.Cigar + "," + originalAlignmentSummary.NumMismatches + "," + originalScore; var realignedScoreString = realignResult.Cigar + "," + realignResult.NumMismatches + "," + realignedScore; Console.WriteLine(origScoreString + "," + realignedScoreString + "," + (realignedScore > originalScore)); } if (originalScore > realignedScore) { return(1); } if (realignedScore > originalScore) { return(-1); } return(0); }
private void HandleAcceptedRealignment(BamAlignment origBamAlignment, List <PreIndel> selectedIndels, List <PreIndel> existingIndels, RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, AlignmentSummary originalAlignmentSummary) { bamAlignment.Position = realignResult.Position - 1; // 0 base bamAlignment.CigarData = realignResult.Cigar; if (_lightDebug) { AddStatusInfo(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment, hasExistingUnsanctionedIndels, originalAlignmentSummary); } _statusCounter.AppendStatusStringTag("RC", bamAlignment.GetStringTag("RC"), bamAlignment); if (bamAlignment.MapQuality <= 20 && realignResult.NumMismatches == 0 && (_allowRescoringOrig0 || bamAlignment.MapQuality > 0)) { bamAlignment.MapQuality = 40; // todo what to set this to? } // Nify if using pair-specific indels if (realignResult.NifiedAt != null && realignResult.NifiedAt.Any()) { foreach (var i in realignResult.NifiedAt) { bamAlignment.Qualities[i] = 0; } _statusCounter.AddStatusCount( $"Successfully realigned with mismatch-insertion quality adjusted (ps: {selectedIndels != null})"); _statusCounter.AppendStatusStringTag("RX", $"Successfully realigned with mismatch-insertion quality adjusted ({string.Join(",", realignResult.NifiedAt)}", bamAlignment); } }
private bool ResultIsGoodEnough(RealignmentResult realignResult, BamAlignment origBamAlignment, AlignmentSummary originalAlignmentSummary, bool realignmentUnchanged, bool isPairAware) { if (realignmentUnchanged) { if (realignResult.NifiedAt.Any()) { return(true); } _statusCounter.AppendStatusStringTag("RX", "Not taking realignment: unchanged", origBamAlignment); _statusCounter.AddStatusCount("Not taking realignment: unchanged"); return(false); } if (!_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, isPairAware)) { _statusCounter.AppendStatusStringTag("RX", $"Realignment failed:not better ({originalAlignmentSummary.Cigar}->{realignResult.Cigar}): {realignResult.Conclusion}", origBamAlignment); _statusCounter.UpdateStatusStringTag("OS", $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q", origBamAlignment); _statusCounter.UpdateStatusStringTag("RS", $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q", origBamAlignment); _statusCounter.AddStatusCount("Not taking realignment: not better"); return(false); } return(true); }
public int GetAlignmentScore(AlignmentSummary summary) { return (MismatchCoefficient * summary.NumMismatches + IndelCoefficient * summary.NumIndels + IndelLengthCoefficient * summary.NumIndelBases + NonNSoftclipCoefficient * summary.NumNonNSoftclips + AnchorLengthCoefficient * summary.AnchorLength); }
public override int CompareAlignmentsWithOriginal(AlignmentSummary other, AlignmentSummary original, bool treatKindly = false) { if (treatKindly) { if (other.NumMismatches <= 1 && other.NumMismatchesIncludeSoftclip <= original.NumMismatchesIncludeSoftclip) { return(1); } } return(CompareAlignmentsWithOriginal2(other, original)); }
public override int CompareAlignmentsWithOriginal(AlignmentSummary realignResult, AlignmentSummary originalAlignmentSummary) { var originalScore = _alignmentScorer.GetAlignmentScore(originalAlignmentSummary); var realignedScore = _alignmentScorer.GetAlignmentScore(realignResult); if (realignedScore > originalScore) { return(1); } if (originalScore > realignedScore) { return(-1); } return(0); }
private void UpdateIndelOutcomes(int numIndels, List <HashableIndel> orderedIndels, bool hasAnyIndels, List <int> acceptedIndels, List <HashableIndel> confirmedAcceptedIndels, bool acceptedRealignment, AlignmentSummary realignResult) { for (int i = 0; i < numIndels; i++) { var indel = orderedIndels[i]; int[] outcomesForIndel; if (!_indelOutcomes.TryGetValue(indel, out outcomesForIndel)) { // success, failure, Rank, numIndels, multis, confirmed, acceptRealn, otherAccepted outcomesForIndel = new int[8]; _indelOutcomes.Add(indel, outcomesForIndel); } if (hasAnyIndels && acceptedIndels.Contains(i)) { outcomesForIndel[0]++; outcomesForIndel[2] += i + 1; if (acceptedRealignment) { outcomesForIndel[6]++; } var realignmentIsVeryConfident = _judger.IsVeryConfident(realignResult); if (realignmentIsVeryConfident) { confirmedAcceptedIndels?.Add(indel); } } else { outcomesForIndel[1]++; if (acceptedRealignment) { outcomesForIndel[7]++; } } outcomesForIndel[3] += numIndels; outcomesForIndel[4] += acceptedIndels?.Count > 1 ? 1 : 0; } }
/// <summary> /// When comparing results: /// - Always minimize number of mismatches, regardless of number of indels /// - Given same number of mismatches, prefer fewer non-N softclips (0 better than 1, 1 better than 2) /// - Given same number of mismatches, prefer fewer indels (0 better than 1, 1 better than 2) /// /// This maps to the following scenarios (written out to be explicit) /// indels =, mismatch =, 0 /// indels =, mismatch <, 1 /// indels =, mismatch >, -1 /// indels <, mismatch =, 1 /// indels <, mismatch <, 1 /// indels <, mismatch >, -1 /// indels >, mismatch =, -1 /// indels >, mismatch <, 1 /// indels >, mismatch >, -1 /// </summary> /// <param name="other"></param> /// <returns></returns> public override int CompareAlignments(AlignmentSummary original, AlignmentSummary other) { if (other == null) { return(1); } if (original.NumMismatches == 1 && original.NumIndels == 0 && other.NumIndels > 1) { return(1); } if (other.NumMismatches == 1 && other.NumIndels == 0 && original.NumIndels > 1) { return(-1); } if (original.NumMismatches < other.NumMismatches) { return(1); } if (original.NumMismatches > other.NumMismatches) { return(-1); } if (original.NumNonNSoftclips < other.NumNonNSoftclips) { return(1); } if (original.NumNonNSoftclips > other.NumNonNSoftclips) { return(-1); } if (original.NumIndels < other.NumIndels) { return(1); } if (original.NumIndels > other.NumIndels) { return(-1); } return(0); }
private bool PassesSuspicion(AlignmentSummary originalResult) { var isRealignableSoftclip = _tryRealignCleanSoftclippedReads && originalResult.NumNonNSoftclips > 0; if (isRealignableSoftclip) { return(false); } if (originalResult.NumMismatchesIncludeSoftclip == 0 && originalResult.NumIndels == 0) { return(true); } // need to try against one of the priors // if (originalResult.NumIndels > 0) return false; // if there are only just mismatches and some are at the tail end of the read, flag it! // jg todo make this threshold configurable //return originalResult.MinNumAnchorMatches.HasValue // && originalResult.MinNumAnchorMatches > _anchorSizeThreshold; return(false); }
public void AlignmentScorer() { var scorer = new AlignmentScorer(); var perfect = new AlignmentSummary(); var oneIndel = new AlignmentSummary() { NumIndels = 1 }; var twoIndels = new AlignmentSummary() { NumIndels = 2, }; var oneMismatch = new AlignmentSummary() { NumMismatches = 1 }; var twoMismatches = new AlignmentSummary() { NumMismatches = 2, }; var oneIndelOneMismatch = new AlignmentSummary() { NumIndels = 1, NumMismatches = 1 }; var everything = new AlignmentSummary() { NumIndels = 1, NumMismatches = 1, NumIndelBases = 1, NumNonNSoftclips = 1, AnchorLength = 1 }; // By default, everything is 0 Assert.Equal(0, scorer.GetAlignmentScore(perfect)); Assert.Equal(0, scorer.GetAlignmentScore(oneIndel)); Assert.Equal(0, scorer.GetAlignmentScore(twoIndels)); Assert.Equal(0, scorer.GetAlignmentScore(oneIndelOneMismatch)); // Count against mismatches: -1 score for each scorer = new AlignmentScorer() { MismatchCoefficient = -1 }; Assert.Equal(-1, scorer.GetAlignmentScore(oneMismatch)); Assert.Equal(-1, scorer.GetAlignmentScore(oneIndelOneMismatch)); Assert.Equal(-2, scorer.GetAlignmentScore(twoMismatches)); Assert.Equal(0, scorer.GetAlignmentScore(oneIndel)); Assert.Equal(0, scorer.GetAlignmentScore(twoIndels)); // Count against indels: -1 score for each scorer = new AlignmentScorer() { IndelCoefficient = -1 }; Assert.Equal(0, scorer.GetAlignmentScore(oneMismatch)); Assert.Equal(-1, scorer.GetAlignmentScore(oneIndelOneMismatch)); Assert.Equal(0, scorer.GetAlignmentScore(twoMismatches)); Assert.Equal(-1, scorer.GetAlignmentScore(oneIndel)); Assert.Equal(-2, scorer.GetAlignmentScore(twoIndels)); // Count against indels and mismatches scorer = new AlignmentScorer() { IndelCoefficient = -3, MismatchCoefficient = -1 }; Assert.Equal(-1, scorer.GetAlignmentScore(oneMismatch)); Assert.Equal(-4, scorer.GetAlignmentScore(oneIndelOneMismatch)); Assert.Equal(-2, scorer.GetAlignmentScore(twoMismatches)); Assert.Equal(-3, scorer.GetAlignmentScore(oneIndel)); Assert.Equal(-6, scorer.GetAlignmentScore(twoIndels)); // Make sure the other stuff is working scorer = new AlignmentScorer() { IndelLengthCoefficient = 1 }; Assert.Equal(1, scorer.GetAlignmentScore(everything)); scorer.IndelCoefficient = 1; Assert.Equal(2, scorer.GetAlignmentScore(everything)); scorer.MismatchCoefficient = 1; Assert.Equal(3, scorer.GetAlignmentScore(everything)); scorer.NonNSoftclipCoefficient = 1; Assert.Equal(4, scorer.GetAlignmentScore(everything)); scorer.AnchorLengthCoefficient = 1; Assert.Equal(5, scorer.GetAlignmentScore(everything)); }
public void CompareAlignments() { var comparer = new BasicAlignmentComparer(); var perfect = new AlignmentSummary(); var otherPerfect = new AlignmentSummary(); var oneMismatch = new AlignmentSummary(); oneMismatch.NumMismatches = 1; var oneIndel = new AlignmentSummary(); oneIndel.NumIndels = 1; var oneIndelOneMismatch = new AlignmentSummary(); oneIndelOneMismatch.NumMismatches = 1; oneIndelOneMismatch.NumIndels = 1; Assert.Equal(1, comparer.CompareAlignments(perfect, null)); // indels =, mismatch =, 0 Assert.Equal(0, comparer.CompareAlignments(perfect, otherPerfect)); // indels =, mismatch <, 1 Assert.Equal(1, comparer.CompareAlignments(perfect, oneMismatch)); // indels =, mismatch >, -1 Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, perfect)); // indels <, mismatch =, 1 Assert.Equal(1, comparer.CompareAlignments(perfect, oneIndel)); // indels <, mismatch <, 1 Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneIndelOneMismatch)); // indels <, mismatch >, -1 Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, oneIndel)); // indels >, mismatch =, -1 Assert.Equal(-1, comparer.CompareAlignments(oneIndel, perfect)); // indels >, mismatch <, 1 Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneIndelOneMismatch)); // indels >, mismatch >, -1 Assert.Equal(-1, comparer.CompareAlignments(oneIndelOneMismatch, perfect)); var twoIndels = new AlignmentSummary() { NumIndels = 2, NumMismatches = 0 }; var twoMismatches = new AlignmentSummary() { NumIndels = 0, NumMismatches = 2 }; // 1 mismatch and 0 indels in first, 2 indels and 0 mismatches in second, favor the first Assert.Equal(1, comparer.CompareAlignments(oneMismatch, twoIndels)); // 1 mismatch and 0 indels in first, 1 indel in second, favor the second Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, oneIndel)); // 2 mismatches and 0 indels in first, 2 indels in second, favor the second -- special rule only applies to single-mismatch reads (with no indels) Assert.Equal(-1, comparer.CompareAlignments(twoMismatches, twoIndels)); // 1 mismatch and 1 indel in first, 2 indels and 0 mismatches in second, favor the second -- special rule only applies to single-mismatch reads (with no indels) Assert.Equal(-1, comparer.CompareAlignments(oneIndelOneMismatch, twoIndels)); var oneMismatchOneSoftclip = new AlignmentSummary() { NumNonNSoftclips = 1, NumMismatches = 1 }; var oneSoftclip = new AlignmentSummary() { NumNonNSoftclips = 1 }; var oneIndelOneSoftclip = new AlignmentSummary() { NumIndels = 1, NumNonNSoftclips = 1 }; // 1 mismatch and 1 softclip, 0 mismatch and 1 softclip, favor the second Assert.Equal(-1, comparer.CompareAlignments(oneMismatchOneSoftclip, oneSoftclip)); // 1 mismatch and 0 softclip, 0 mimatch and 1 softclip, favor the second Assert.Equal(-1, comparer.CompareAlignments(oneMismatch, oneSoftclip)); // 0 mismatch and 0 softclip, 0 mismatch and 1 softclip, favor the first Assert.Equal(1, comparer.CompareAlignments(perfect, oneSoftclip)); // 1 indel and 0 mismatch/sc, 1 indel and 1 softclip, favor the first Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneIndelOneSoftclip)); // 2 indel and 0 mismatch/sc, 1 indel and 1 softclip, favor the first Assert.Equal(1, comparer.CompareAlignments(twoIndels, oneIndelOneSoftclip)); // 1 indel and 0 mismatch/sc, 0 indel and 1 softclip, favor the first Assert.Equal(1, comparer.CompareAlignments(oneIndel, oneSoftclip)); // 2 indel and 0 mismatch/sc, 0 indel and 1 softclip, favor the first Assert.Equal(1, comparer.CompareAlignments(twoIndels, oneSoftclip)); }
public void CompareAlignmentsWithOriginal() { var comparer = new BasicAlignmentComparer(); var perfect = new AlignmentSummary(); var otherPerfect = new AlignmentSummary(); var oneMismatch = new AlignmentSummary(); oneMismatch.NumMismatchesIncludeSoftclip = 1; var oneIndel = new AlignmentSummary(); oneIndel.NumIndels = 1; var oneIndel2 = new AlignmentSummary(); oneIndel2.NumIndels = 1; var oneIndelOneMismatch = new AlignmentSummary(); oneIndelOneMismatch.NumMismatchesIncludeSoftclip = 1; oneIndelOneMismatch.NumIndels = 1; var twoIndels = new AlignmentSummary() { NumIndels = 2, NumMismatchesIncludeSoftclip = 0 }; var twoIndels2 = new AlignmentSummary() { NumIndels = 2, NumMismatchesIncludeSoftclip = 0 }; var twoMismatches = new AlignmentSummary() { NumIndels = 0, NumMismatchesIncludeSoftclip = 2 }; Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(perfect, null)); // -------------- // realignment has zero mismatch // -------------- // both perfect, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(perfect, otherPerfect)); // indels both 0, mismatch smaller by 1, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(perfect, oneMismatch)); // special rule for one indel vs. one mismatch , pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(oneIndel, oneMismatch)); // gain one indel, mismatch both 0, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(oneIndel, perfect)); // gain one indel, mismatch smaller by 2, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(oneIndel, twoMismatches)); // indels both 1, mismatch both 0, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(oneIndel, oneIndel2)); // indels both 1, mismatch smaller by 1, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(oneIndel, oneIndelOneMismatch)); // special rule doesn't apply to two indels, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, oneMismatch)); // gain two indels, mismatch both 0, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(twoIndels, perfect)); // gain two indels, mismatch smaller by 2, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, twoMismatches)); // indels both 2, mismatch both 0, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, twoIndels2)); // gain one indel, mismatch smaller by 1, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, oneIndelOneMismatch)); // gain one indel, mismatch both 0, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(twoIndels, oneIndel)); // -------------- // realignment has >=1 mismatch // -------------- // -------------- // short indels <= 3bp // -------------- var shortIndelOneMismatchNew = new AlignmentSummary() { NumIndelBases = 3, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> { "5_A_C" } }; var shortIndelOneMismatchShared = new AlignmentSummary() { NumIndelBases = 3, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> { "3_A_C" } }; var zeroIndelWithFourMismatch = new AlignmentSummary() { NumMismatchesIncludeSoftclip = 4, MismatchesIncludeSoftclip = new List <string> { "0_A_C", "1_A_C", "2_A_C", "3_A_C" } }; var zeroIndelWithThreeMismatch = new AlignmentSummary() { NumMismatchesIncludeSoftclip = 3, MismatchesIncludeSoftclip = new List <string> { "1_A_C", "2_A_C", "3_A_C" } }; // realignment introduced a new mismatch, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(shortIndelOneMismatchNew, zeroIndelWithFourMismatch)); // the one mismatch exists in both original and realignment, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(shortIndelOneMismatchShared, zeroIndelWithFourMismatch)); // reduction of mismatch < 3, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(shortIndelOneMismatchShared, zeroIndelWithThreeMismatch)); // -------------- // indels > 3bp // -------------- var MediumIndelOneMismatchNew = new AlignmentSummary() { NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> { "5_A_C" } }; var MediumIndelOneMismatchShared = new AlignmentSummary() { NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> { "3_A_C" } }; var MediumIndelTwoMismatchBothNew = new AlignmentSummary() { NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "5_A_C", "6_A_C" } }; var MediumIndelTwoMismatchOneShared = new AlignmentSummary() { NumIndelBases = 4, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "2_A_C", "5_A_C" } }; var zeroIndelWithFiveMismatch = new AlignmentSummary() { NumMismatchesIncludeSoftclip = 5, MismatchesIncludeSoftclip = new List <string> { "0_A_C", "1_A_C", "2_A_C", "3_A_C", "4_A_C" } }; // mismatch smaller by 2, realignment has one mismatch, shared with original, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchShared, zeroIndelWithThreeMismatch)); // mismatch smaller by 3, realignment has one mismatch, shared with original, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchShared, zeroIndelWithFourMismatch)); // mismatch smaller by 2, realignment created 1 new mismatch, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchNew, zeroIndelWithThreeMismatch)); // mismatch smaller by 3, realignment created 1 new mismatch, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(MediumIndelOneMismatchNew, zeroIndelWithFourMismatch)); // mismatch smaller by 3, but realignment created 2 new mismatches, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelTwoMismatchBothNew, zeroIndelWithFiveMismatch)); // mismatch smaller by 3, realignment has two mismatches, one introduced by indel, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(MediumIndelTwoMismatchOneShared, zeroIndelWithFiveMismatch)); // mismatch smaller by 2, realignment has two mismatches, one introduced by indel, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(MediumIndelTwoMismatchOneShared, zeroIndelWithFourMismatch)); var LongIndelOneMismatchShared = new AlignmentSummary() { NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> { "5_A_C" } }; var LongIndelTwoMismatchOneShared = new AlignmentSummary() { NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "2_A_C", "5_A_C" } }; var LongIndelTwoMismatchBothNew = new AlignmentSummary() { NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "5_A_C", "6_A_C" } }; var HighFrequencyIndelOneMismatchShared = new AlignmentSummary() { NumIndelBases = 4, NumIndels = 1, HasHighFrequencyIndel = true, NumMismatchesIncludeSoftclip = 1, MismatchesIncludeSoftclip = new List <string> { "5_A_C" } }; var HighFrequencyIndelTwoMismatchOneShared = new AlignmentSummary() { NumIndelBases = 4, NumIndels = 1, HasHighFrequencyIndel = true, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "2_A_C", "5_A_C" } }; var HighFrequencyIndelTwoMismatchBothNew = new AlignmentSummary() { NumIndelBases = 9, NumIndels = 1, NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "5_A_C", "6_A_C" } }; var zeroIndelWithTwoMismatch = new AlignmentSummary() { NumMismatchesIncludeSoftclip = 2, MismatchesIncludeSoftclip = new List <string> { "1_A_C", "2_A_C" } }; // mismatch smaller by 2 (3->1), realignment has one mismatch, shared with original, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(LongIndelOneMismatchShared, zeroIndelWithThreeMismatch)); // mismatch smaller by 1 (2->1), realignment has one mismatch, shared with original, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(LongIndelOneMismatchShared, zeroIndelWithTwoMismatch)); // mismatch smaller by 2 (4->2), realignment has two mismatches, one introduced by indel, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(LongIndelTwoMismatchOneShared, zeroIndelWithFourMismatch)); // mismatch smaller by 1 (3->2), realignment has two mismatches, one introduced by indel, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(LongIndelTwoMismatchOneShared, zeroIndelWithThreeMismatch)); // mismatch smaller by 2 (4->2), but realignment created 2 new mismatches, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(LongIndelTwoMismatchBothNew, zeroIndelWithFourMismatch)); // mismatch smaller by 2 (3->1), realignment has one mismatch, shared with original, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelOneMismatchShared, zeroIndelWithThreeMismatch)); // mismatch smaller by 1 (2->1), realignment has one mismatch, shared with original, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelOneMismatchShared, zeroIndelWithTwoMismatch)); // mismatch smaller by 2 (4->2), realignment has two mismatches, one introduced by indel, pick new Assert.Equal(1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelTwoMismatchOneShared, zeroIndelWithFourMismatch)); // mismatch smaller by 1 (3->2), realignment has two mismatches, one introduced by indel, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelTwoMismatchOneShared, zeroIndelWithThreeMismatch)); // mismatch smaller by 2 (4->2), but realignment created 2 new mismatches, pick original Assert.Equal(-1, comparer.CompareAlignmentsWithOriginal(HighFrequencyIndelTwoMismatchBothNew, zeroIndelWithFourMismatch)); }
public abstract int CompareAlignmentsWithOriginal(AlignmentSummary preferred, AlignmentSummary other);
private BamAlignment AcceptRealignment(BamAlignment origBamAlignment, out bool changed, List <PreIndel> selectedIndels, List <PreIndel> existingIndels, RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, out bool confirmed) { HandleAcceptedRealignment(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment, hasExistingUnsanctionedIndels, originalAlignmentSummary); confirmed = false; changed = true; return(bamAlignment); }
public abstract int CompareAlignmentsWithOriginal(AlignmentSummary preferred, AlignmentSummary other, bool treatKindly = false);
public override int CompareAlignmentsWithOriginal(AlignmentSummary other, AlignmentSummary original, bool treatKindly = false) { if (original == null) { return(1); } // when realignment has zero mismatch if (other.NumMismatchesIncludeSoftclip == 0) { // special rule for one indel vs. one mismatch if (other.NumIndels == 1 && original.NumMismatchesIncludeSoftclip == 1 && original.NumIndels == 0) { return(-1); } if (original.NumIndels > 0) { return(1); } if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= 1) { return(1); } else { return(-1); } } // mismatches not changed by indel realignment var numSharedMismatch = original.MismatchesIncludeSoftclip != null?original.MismatchesIncludeSoftclip.Intersect(other.MismatchesIncludeSoftclip).ToList().Count() : 0; // More strict with short indels if (other.NumIndelBases <= 3) { if (other.NumMismatchesIncludeSoftclip - numSharedMismatch <= MaxMismatchCreatedByIndelShortIndel && other.NumMismatchesIncludeSoftclip <= MaxTotalMismatchShortIndel && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatch) { return(1); } else { return(-1); } } // all other cases if (other.NumMismatchesIncludeSoftclip - numSharedMismatch <= MaxMismatchCreatedByIndel && other.NumMismatchesIncludeSoftclip <= MaxTotalMismatch) { if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatch) { return(1); } // give preference to long indels if (other.NumIndelBases - original.NumIndelBases >= 9 && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatchPreferred) { return(1); } // give preference to indels that have high frequency to start with if (other.HasHighFrequencyIndel && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= MinReductionInMismatchPreferred) { return(1); } } return(-1); }
public BamAlignment GetFinalAlignment(BamAlignment origBamAlignment, out bool changed, out bool forcedSoftclip, out bool confirmed, out bool sketchy, List <PreIndel> selectedIndels = null, List <PreIndel> existingIndels = null, bool assumeImperfect = true, List <HashableIndel> confirmedAccepteds = null, List <PreIndel> mateIndels = null) { sketchy = false; forcedSoftclip = false; bool forcedAlignment = false; var presumeStartPositionForForcedAlignment = 0; if (origBamAlignment.CigarData.Count == 0) { // This was something weird that came up in the halo dataset... mapq is 0 but is still mapped, no cigar if (origBamAlignment.Position <= 0 && origBamAlignment.FragmentLength != 0) // No sense trying to fiddle with the position otherwise { // TODO does this really even move the needle? Is it helping enough to outweigh its weirdness? var presumedEndPosition = origBamAlignment.MatePosition < origBamAlignment.Position ? origBamAlignment.MatePosition - origBamAlignment.FragmentLength : origBamAlignment.MatePosition + origBamAlignment.FragmentLength; presumeStartPositionForForcedAlignment = presumedEndPosition - origBamAlignment.Bases.Length; forcedAlignment = true; } else { presumeStartPositionForForcedAlignment = origBamAlignment.Position; forcedAlignment = true; } } var anyIndelsAtAll = _regionFilterer.AnyIndelsNearby(origBamAlignment.Position); bool isRealignable = true; if (anyIndelsAtAll) { var isImperfectRead = false || ((origBamAlignment.ContainsDisallowedCigarOps(_suspectCigarOps) || origBamAlignment.GetIntTag("NM") > 0 || forcedAlignment)); var isReadWorthCaringAbout = !origBamAlignment.IsDuplicate() && !origBamAlignment.IsSecondary(); isRealignable = isImperfectRead && isReadWorthCaringAbout && origBamAlignment.Bases.Distinct().Count() > 1; } else { _statusCounter.AddStatusCount("No indels nearby at all"); isRealignable = false; } if (!isRealignable) { confirmed = false; changed = false; sketchy = false; return(origBamAlignment); } // TODO maybe flag (or return all) if there's a lot or high quality stuff that we're missing! Esp with pair specific var indels = _indelSource.GetRelevantIndels(forcedAlignment ? presumeStartPositionForForcedAlignment : origBamAlignment.Position, mateIndels, confirmedAccepteds); // Don't realign around single indels if we already have them bool hasExistingUnsanctionedIndels = false; bool existingSanctionedIndelIsBest = false; bool hasVeryGoodIndel = false; bool hasHardToCallIndel = false; var existingMatches = new List <PreIndel>(); HashableIndel existingConfirmedIndel = new HashableIndel(); var existingMatchHashables = new List <HashableIndel>(); if (indels.Any() && existingIndels != null && existingIndels.Any()) { var topScore = (float)(indels.Max(x => x.Key.Score)); var matchesFound = 0; var nonPreExistingIndels = new List <KeyValuePair <HashableIndel, GenomeSnippet> >(); var index = 0; foreach (var kvp in indels) { var indel = kvp.Key; var matches = existingIndels.Where(e => Helper.IsMatch(e, indel)); var isMatch = matches.Any(); if (isMatch) { matchesFound++; if (!indel.InMulti && index == 0) { existingSanctionedIndelIsBest = true; existingConfirmedIndel = indel; } var proportionOfTopScore = indel.Score / (float)topScore; if (proportionOfTopScore >= 0.75) { hasVeryGoodIndel = true; } if (indel.HardToCall) { hasHardToCallIndel = true; } existingMatches.AddRange(matches); // TODO do we need special handling of multis? existingMatchHashables.Add(indel); } if (!isMatch || indel.InMulti) { nonPreExistingIndels.Add(kvp); } index++; } // TODO do we actually want to replace indels with non-pre-existing only? indels = nonPreExistingIndels; if (matchesFound == 0) { hasExistingUnsanctionedIndels = true; } } // TODO this precludes us from having good multis if (existingSanctionedIndelIsBest) { // If it already had the top ranked indel, there's not really any point in trying to realign around others (here we assume that it's also the best fitting indel for the read, hence why it was originally called by the regular aligner). _statusCounter.AddStatusCount("Existing indel is already the best available"); changed = false; confirmed = true; UpdateOutcomeForConfirmed(existingConfirmedIndel); if (confirmedAccepteds == null) { confirmedAccepteds = new List <HashableIndel>(); } confirmedAccepteds.Add(existingConfirmedIndel); return(origBamAlignment); } if (!indels.Any() || origBamAlignment.EndPosition - origBamAlignment.Position > 500) { if (!indels.Any()) { // TODO maybe do the forced softclip here if the read did have indels? _statusCounter.AddStatusCount("No indels to realign to"); _statusCounter.AppendStatusStringTag("RX", $"{origBamAlignment.GetStringTag("RX")},No indels to realign to", origBamAlignment); } else { _statusCounter.AddStatusCount("Alignment reference span longer than we can realign to"); } changed = false; confirmed = false; return(origBamAlignment); } // TODO this should relate to cap on indel size... introducing too large of an indel will make us go beyond this context. var context = indels.First().Value; var orderedIndels = indels.Select(x => x.Key).ToList(); var numIndels = orderedIndels.Count; _statusCounter.AddStatusCount("Realigning to " + numIndels); var bamAlignment = new BamAlignment(origBamAlignment); if (forcedAlignment) { bamAlignment.CigarData = new CigarAlignment(origBamAlignment.Bases.Length + "M"); bamAlignment.Position = presumeStartPositionForForcedAlignment; } var realignResult = _readRealigner.Realign(new Read(_chromosome, bamAlignment), orderedIndels, indels.ToDictionary(x => x.Key, x => x.Value), confirmedAccepteds != null && confirmedAccepteds.Any()); var acceptedIndels = realignResult?.AcceptedIndels; var hasAnyIndels = acceptedIndels != null && acceptedIndels.Any(); if (realignResult != null) { _statusCounter.AddStatusCount("Able to realign at all (may still be worse than original)"); _statusCounter.AppendStatusStringTag("RX", "Able to realign at all(may still be worse than original)", bamAlignment); } else { _statusCounter.AddStatusCount("Not able to realign at all"); _statusCounter.AppendStatusStringTag("RX", "Not able to realign at all", origBamAlignment); } AlignmentSummary originalAlignmentSummary = null; var realignmentUnchanged = true; if (realignResult != null) { originalAlignmentSummary = Extensions.GetAlignmentSummary((new Read(_chromosome, origBamAlignment)), context.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches, context.StartPosition); realignmentUnchanged = _judger.RealignmentIsUnchanged(realignResult, origBamAlignment); if (originalAlignmentSummary.NumMismatches > 0) { // TODO PERF do we still want to use this ever? var sumMismatch = Helper.GetSumOfMismatchQualities(origBamAlignment.Qualities, origBamAlignment.Bases, new Read(_chromosome, origBamAlignment).PositionMap, context.Sequence, context.StartPosition); originalAlignmentSummary.SumOfMismatchingQualities = sumMismatch; } // Within this logic also checking the same as "!realignmentUnchanged" above.. consolidate this. if (selectedIndels != null && (_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, confirmedAccepteds != null && confirmedAccepteds.Any())) || ResultIsGoodEnough(realignResult, origBamAlignment, originalAlignmentSummary, realignmentUnchanged, confirmedAccepteds != null && confirmedAccepteds.Any())) { UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, true, realignResult); if (realignResult.IsSketchy) { sketchy = true; } return(AcceptRealignment(origBamAlignment, out changed, selectedIndels, existingIndels, realignResult, originalAlignmentSummary, bamAlignment, hasExistingUnsanctionedIndels, out confirmed)); } } // At this point, any good realignment would have been returned. If it's realigned and changed now, it's an unaccepted (not good enough) realignment. // If it had an indel to begin with, it's basically a vote that we don't trust that indel. Optionally softclip it out. if (!realignmentUnchanged) { changed = false; confirmed = false; HandleFailedRealignment(origBamAlignment, ref forcedSoftclip, existingIndels, realignResult, hasExistingUnsanctionedIndels, existingMatches); if ((hasVeryGoodIndel || (hasHardToCallIndel && _judger.IsVeryConfident(originalAlignmentSummary))) && !hasExistingUnsanctionedIndels && existingMatchHashables.Any()) { // It didn't have the tip-top indel, but it had one that was very close, and we tried realigning around the top guys and failed - this one looks better. Give it credit. confirmed = true; foreach (var indel in existingMatchHashables) { UpdateOutcomeForConfirmed(indel); if (confirmedAccepteds != null) { confirmedAccepteds.Add(indel); } } } UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, false, realignResult); } else { if (acceptedIndels != null) { foreach (var indelNum in acceptedIndels) { var indel = orderedIndels[indelNum]; UpdateOutcomeForConfirmed(indel); } } _statusCounter.AddStatusCount("INDEL STATUS\tUnchanged\t" + realignResult?.Indels); _statusCounter.AppendStatusStringTag("RX", "Unchanged: " + realignResult?.Indels, origBamAlignment); confirmed = true; changed = false; return(origBamAlignment); } if (realignResult == null) { if (_softclipUnknownIndels && hasExistingUnsanctionedIndels) { var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x)); foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition)) { var reverseClip = false; var clipLength = preIndel.RightAnchor; if (preIndel.LeftAnchor < preIndel.RightAnchor) { reverseClip = true; clipLength = preIndel.LeftAnchor; } // TODO arbitrary number here... // If it's pretty well-anchored, don't remove the indel if (clipLength > 20) { continue; } forcedSoftclip = true; _statusCounter.AddStatusCount("Softclipped out bad indel"); _statusCounter.AppendStatusStringTag("RX", $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}... No realignment", origBamAlignment); _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels)); OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment, reverseClip, preIndel.ReferencePosition); } } } _statusCounter.AppendStatusStringTag("RX", "Realignment failed", origBamAlignment); _statusCounter.AddStatusCount("Realignment failed"); return(origBamAlignment); }
public bool IsVeryConfident(AlignmentSummary realignResult) { return(realignResult.AnchorLength > 10 && realignResult.NumMismatches <= 1); }
public bool RealignmentBetterOrEqual(RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary, bool isPairAware) { return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary, isPairAware) >= 0); }
public abstract int CompareAlignments(AlignmentSummary preferred, AlignmentSummary other);
public int CompareAlignmentsWithOriginal2(AlignmentSummary other, AlignmentSummary original) { if (original == null) { return(1); } // Looks a lot worse if (other.NumMismatches > original.NumMismatches + 3) { return(-1); } if (other.NumMismatches + other.NumSoftclips + other.NumIndelBases == original.NumMismatches + original.NumSoftclips + original.NumIndelBases) { // Haven't moved the needle much, and for a short indel(s) that probably would have been called originally. if (other.NumDeletedBases < 3 && other.NumInsertedBases == 0) { return(-1); } } // TODO consider re-instating? // Short edge insertion should not be allowed if it doesn't make the read any better (TODO play with this. commenting out for now til I give it more thought.) //if (other.AnchorLength == 0 && other.NumIndels == 1 && other.NumInsertedBases <= 2 && // original.NumMismatchesIncludeSoftclip < other.NumInsertedBases) //{ // return -1; //} // TODO maybe tighter restrictions if stuff is not anchored. if (other.NumMismatchesIncludeSoftclip == 0) { // special rule for one indel vs. one mismatch // Tweaked this from Xiao's to be specific to single-base indels if (other.NumIndels == 1 && other.NumIndelBases == 1 && original.NumMismatchesIncludeSoftclip == 1 && original.NumIndels == 0) { return(-1); } if (original.NumIndels > 0) { return(1); } if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= 1) { return(1); } return(-1); } // Be nice to large indels, if they fit well in the new read and the old read was messy to begin with // It has to actually look at least a little better, though. // There may be some philosophy here with original gap penalties and indel size and placement... TBD if (original.NumMismatches > 2 && (other.NumMismatches - original.NumMismatches <= 2) && other.NumIndels - original.NumIndels <= 2 && other.NumIndelBases > 10 && (other.NumMismatches < original.NumMismatches || other.NumMismatchesIncludeSoftclip < (original.NumMismatchesIncludeSoftclip * 0.9) || other.NumSoftclips < original.NumSoftclips)) { return(1); } if (other.NumIndelBases <= 2 && other.NumIndelBases > original.NumIndelBases && other.NumMismatches >= original.NumMismatches - 1 && (original.NumMismatchesIncludeSoftclip > 10 && ((!_trustSoftclips && original.NumSoftclips * 0.8 <= other.NumSoftclips) || original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip <= original.NumMismatchesIncludeSoftclip / 5))) { //Short indel introduced where there were a lot of softclips and didn't improve a lot return(-1); } // If original had tons of mismatches/softclips, and realign is better but only a little, this may just be chance (ex: polyT) -> don't accept realignment if (original.NumMismatchesIncludeSoftclip > 10 && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip <= original.NumMismatchesIncludeSoftclip / 10) { return(-1); } // Super long original softclip and num mismatches // Better be a lot shorter softclip and not add mismatches, or have a bunch more matches from softclips being unmasked. // ?Need to have added at least 1 match for every 2 softclips removed. const int numSoftclipsToBeConsideredSuperLong = 20; // TODO un-magic these numbers if (original.NumSoftclips > numSoftclipsToBeConsideredSuperLong && ((other.NumSoftclips / (float)original.NumSoftclips >= 0.75 && other.NumMismatches >= original.NumMismatches) || (other.NumMatches - original.NumMatches) < (original.NumSoftclips - other.NumSoftclips) / 2f)) { return(-1); } // Really doesn't look better if (original.NumMismatches - other.NumMismatches <= 0 && other.NumMatches - original.NumMatches <= 2 && other.NumIndels >= original.NumIndels && original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip <= 2) { return(-1); } var benefitOfDoubtForOrigScMismatches = 0.75; if (other.NumMismatches > original.NumMismatches && (other.NumMismatchesIncludeSoftclip > (original.NumMismatchesIncludeSoftclip * benefitOfDoubtForOrigScMismatches)) && other.AnchorLength < 3) { return(-1); } //var threshNumSharedMismatch = 8; var threshnumNotSharedMismatch = 2; var threshReductionInMismatches = 1; var threshReductionInmMismatchesForSmall = 2; var numSharedMismatch = 0; if (_trackActualMismatches) { if (original.MismatchesIncludeSoftclip == null || other.MismatchesIncludeSoftclip == null) { numSharedMismatch = 0; } else { numSharedMismatch = original.MismatchesIncludeSoftclip.Intersect(other.MismatchesIncludeSoftclip).ToList().Count(); } } else { numSharedMismatch = Math.Min(original.NumMismatchesIncludeSoftclip, other.NumMismatchesIncludeSoftclip); // Use an approximation if we don't want to do the whole thing } // Be more wary of shorter indels if (other.NumIndelBases <= 3 && (original.NumIndelBases == 0 || original.NumIndelBases > 3)) { if (other.NumMismatchesIncludeSoftclip - numSharedMismatch == 0 && // the only mismatches in the new one are shared //numSharedMismatch <= threshNumSharedMismatch && // what was the point of this? original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= threshReductionInmMismatchesForSmall) // the new one has less mismatches overall { //wary of shorter indel and shared mismatches return(1); } if (other.NumMismatchesIncludeSoftclip - original.NumMismatchesIncludeSoftclip <= 1) { return(1); } return(-1); } if (other.NumMismatchesIncludeSoftclip - numSharedMismatch <= threshnumNotSharedMismatch) { // most of the mismatches are shared and num mismatches is small if (original.NumMismatchesIncludeSoftclip - other.NumMismatchesIncludeSoftclip >= threshReductionInMismatches) { // fewer mismatches than original return(1); } } return(-1 * CompareAlignments(original, other)); }
private bool RealignmentBetterOrEqual(RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary) { return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary) >= 0); }
public override int CompareAlignments(AlignmentSummary original, AlignmentSummary other) { if (other == null) { return(1); } // Original was much better if (other.NumMismatches > original.NumMismatches + 3) { return(1); } if (original.NumMismatches == 1 && original.NumIndels == 0 && other.NumIndels > 1) { return(1); } if (other.NumMismatches == 1 && other.NumIndels == 0 && original.NumIndels > 1) { return(-1); } // Original wasn't that bad, and it's better than new if (original.NumMismatchesIncludeSoftclip < 5 && original.NumMismatchesIncludeSoftclip < other.NumMismatchesIncludeSoftclip) { return(1); } // Original was bad, but is reasonably better than new if (original.NumMismatchesIncludeSoftclip >= 5 && original.NumMismatchesIncludeSoftclip < other.NumMismatchesIncludeSoftclip * 0.8) { return(1); } // New is reasonably better than original if (original.NumMismatchesIncludeSoftclip > other.NumMismatchesIncludeSoftclip + 1) { return(-1); } if (original.NumIndelBases == other.NumIndelBases) { if (original.NumIndels == 1 && other.NumIndels > 1 && original.NumMismatches <= 2) { return(1); } if (other.NumIndels == 1 && original.NumIndels > 1 && other.NumMismatches <= 2) { return(-1); } if (original.NumMismatches > 0 && other.NumMismatches > 0 && original.NumMismatches <= 5 && other.NumMismatches <= 5) { // Rather have extra mismatches be low quality, as it's more likely they are illegitmate and this is in the right place if (original.SumOfMismatchingQualities <= other.SumOfMismatchingQualities) { return(1); } if (original.SumOfMismatchingQualities > other.SumOfMismatchingQualities) { return(-1); } } } if (original.NumMismatchesIncludeSoftclip > 0 && other.NumMismatchesIncludeSoftclip == 0) { return(-1); } if (original.NumIndels < other.NumIndels) { return(1); } if (original.NumIndels > other.NumIndels) { return(-1); } return(0); }
public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0) { var summary = new AlignmentSummary(); summary.Cigar = cigarData; if (checkSoftclipsForMismatches) { startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip(); } var startIndexInRead = 0; var anchorLength = 0; var endAnchorLength = 0; var hasHitNonMatch = false; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; switch (operation.Type) { case 'S': // soft-clip for (var i = 0; i < operation.Length; i++) { summary.NumSoftclips++; if (readSequence[startIndexInRead + i] != 'N') { summary.NumNonNSoftclips++; if (checkSoftclipsForMismatches) { if (startIndexInReference + i < 0 || startIndexInReference + i >= refSequence.Length) { summary.NumMismatchesIncludeSoftclip++; } else if (readSequence[startIndexInRead + i] != refSequence[startIndexInReference + i]) { summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } } } } } break; case 'M': // match or mismatch for (var i = 0; i < operation.Length; i++) { if (startIndexInReference + i > refSequence.Length - 1) { return(null); throw new InvalidDataException( "Read goes off the end of the genome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } var baseAtIndex = readSequence[startIndexInRead + i]; if (baseAtIndex != 'N' && baseAtIndex != refSequence[startIndexInReference + i]) { summary.NumMismatches++; summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } hasHitNonMatch = true; endAnchorLength = 0; } else { if (baseAtIndex != 'N') { summary.NumMatches++; } if (!hasHitNonMatch) { anchorLength++; } endAnchorLength++; } } break; case 'I': // insertion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += (int)operation.Length; summary.NumInsertedBases += (int)operation.Length; break; case 'D': // deletion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += (int)operation.Length; summary.NumDeletedBases += (int)operation.Length; break; } if (operation.IsReadSpan()) { startIndexInRead += (int)operation.Length; } if (operation.IsReferenceSpan()) { startIndexInReference += (int)operation.Length; } if (checkSoftclipsForMismatches && operation.Type == 'S') { startIndexInReference += (int)operation.Length; } } summary.AnchorLength = Math.Min(anchorLength, endAnchorLength); return(summary); }
public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0) { var summary = new AlignmentSummary(); summary.Cigar = cigarData; if (checkSoftclipsForMismatches) { startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip(); } var startIndexInRead = 0; var anchorLength = 0; var endAnchorLength = 0; var hasHitNonMatch = false; var hasHitNonNSoftclip = false; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; var opLength = (int)(operation.Length); switch (operation.Type) { case 'S': // soft-clip for (var i = 0; i < opLength; i++) { summary.NumSoftclips++; // No special treatement for Ns that are inside the softclip. Because the whole N-softclip distinction was meant to deal with padding-type softclips, I think. if (readSequence[startIndexInRead + i] != 'N' || hasHitNonNSoftclip) { hasHitNonNSoftclip = true; summary.NumNonNSoftclips++; if (checkSoftclipsForMismatches) { if (startIndexInReference + i < 0 || startIndexInReference + i >= refSequence.Length) { summary.NumMismatchesIncludeSoftclip++; } else if (readSequence[startIndexInRead + i] != refSequence[startIndexInReference + i] && readSequence[startIndexInRead + i] != 'N') { summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } } } } //else //{ // if (!hasHitNonNSoftclip) // { // nSoftclipLength++; // } //} } break; case 'M': // match or mismatch for (var i = 0; i < opLength; i++) { if (startIndexInReference + i > refSequence.Length - 1) { return(null); throw new InvalidDataException( "Read goes off the end of the genome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } if (startIndexInReference + i < 0) { throw new InvalidDataException( "Read would be before beginning of the chromosome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } var baseAtIndex = readSequence[startIndexInRead + i]; if (baseAtIndex != 'N' && baseAtIndex != refSequence[startIndexInReference + i]) { summary.NumMismatches++; summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } hasHitNonMatch = true; endAnchorLength = 0; } else { if (baseAtIndex != 'N') { summary.NumMatches++; } if (!hasHitNonMatch) { anchorLength++; } endAnchorLength++; } } break; case 'I': // insertion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += opLength; summary.NumInsertedBases += opLength; break; case 'D': // deletion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += opLength; summary.NumDeletedBases += opLength; break; } if (operation.IsReadSpan()) { startIndexInRead += opLength; } if (operation.IsReferenceSpan()) { startIndexInReference += opLength; } if (checkSoftclipsForMismatches && operation.Type == 'S') { startIndexInReference += opLength; } } summary.AnchorLength = Math.Min(anchorLength, endAnchorLength); return(summary); }