public static CigarAlignment GetReverse(this CigarAlignment cigar) { var reverseCigar = new CigarAlignment(cigar.ToString()); reverseCigar.Reverse(); return(reverseCigar); }
public void CigarString_Manipulation_Tests() { var cigarstring = new CigarAlignment("7M3I2D1S11M2S"); Assert.Equal(6, cigarstring.Count); Assert.Equal("7M3I2D1S11M2S", cigarstring.ToString()); cigarstring.Add(new CigarOp('M', 6)); Assert.Equal("7M3I2D1S11M2S6M", cigarstring.ToString()); Assert.Equal(7, cigarstring.Count); cigarstring.Reverse(); Assert.Equal("6M2S11M1S2D3I7M", cigarstring.ToString()); Assert.Equal(7, cigarstring.Count); cigarstring.Clear(); Assert.Equal("", cigarstring.ToString()); Assert.Equal(0, cigarstring.Count); }
public void Compress_Tests() { var cigarAlignment1 = new CigarAlignment("5M2M"); Assert.Equal(true, cigarAlignment1.Compress()); Assert.Equal("7M", cigarAlignment1.ToString()); var cigarAlignment2 = new CigarAlignment("5M0M"); Assert.Equal(true, cigarAlignment2.Compress()); Assert.Equal("5M", cigarAlignment2.ToString()); var cigarAlignment3 = new CigarAlignment("5I2D1I3D"); Assert.Equal(true, cigarAlignment3.Compress()); Assert.Equal("6I5D", cigarAlignment3.ToString()); }
public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0) { var summary = new AlignmentSummary(); summary.Cigar = cigarData; if (checkSoftclipsForMismatches) { startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip(); } var startIndexInRead = 0; var anchorLength = 0; var endAnchorLength = 0; var hasHitNonMatch = false; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; switch (operation.Type) { case 'S': // soft-clip for (var i = 0; i < operation.Length; i++) { summary.NumSoftclips++; if (readSequence[startIndexInRead + i] != 'N') { summary.NumNonNSoftclips++; if (checkSoftclipsForMismatches) { if (startIndexInReference + i < 0 || startIndexInReference + i >= refSequence.Length) { summary.NumMismatchesIncludeSoftclip++; } else if (readSequence[startIndexInRead + i] != refSequence[startIndexInReference + i]) { summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } } } } } break; case 'M': // match or mismatch for (var i = 0; i < operation.Length; i++) { if (startIndexInReference + i > refSequence.Length - 1) { return(null); throw new InvalidDataException( "Read goes off the end of the genome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } var baseAtIndex = readSequence[startIndexInRead + i]; if (baseAtIndex != 'N' && baseAtIndex != refSequence[startIndexInReference + i]) { summary.NumMismatches++; summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } hasHitNonMatch = true; endAnchorLength = 0; } else { if (baseAtIndex != 'N') { summary.NumMatches++; } if (!hasHitNonMatch) { anchorLength++; } endAnchorLength++; } } break; case 'I': // insertion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += (int)operation.Length; summary.NumInsertedBases += (int)operation.Length; break; case 'D': // deletion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += (int)operation.Length; summary.NumDeletedBases += (int)operation.Length; break; } if (operation.IsReadSpan()) { startIndexInRead += (int)operation.Length; } if (operation.IsReferenceSpan()) { startIndexInReference += (int)operation.Length; } if (checkSoftclipsForMismatches && operation.Type == 'S') { startIndexInReference += (int)operation.Length; } } summary.AnchorLength = Math.Min(anchorLength, endAnchorLength); return(summary); }
public void SoftclipCigar() { // ---- Softclip Prefix ---- // // Original cigar = 2S3M, Realignment adds I outside of S region // Ref: AAAAA // Alt: TTAAG var rawRealignedCigar = new CigarAlignment("4M1I"); var mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped }; var softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0); Assert.Equal("2S2M1I", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal("4M1I", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 2, suffixNs: 0); Assert.Equal("2S2M1I", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns for only part of original softclip softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 1, suffixNs: 0); Assert.Equal("1S3M1I", softclippedCigar.ToString()); // Original cigar = 2S3M, With terminal Ns, Realignment adds I outside of S region // Ref: AAAAA // Alt: NNAAG mismatchMap = new[] { MatchType.NMismatch, MatchType.NMismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0); Assert.Equal("2S2M1I", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 2, suffixNs: 0); Assert.Equal("2S2M1I", softclippedCigar.ToString()); // Original cigar = 5M, Realignment adds I, realigned cigar should be unchanged by softclipping // Ref: AAAAA // Alt: TTAAG mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0); Assert.Equal("4M1I", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal("4M1I", softclippedCigar.ToString()); // Original cigar = 5M, still 5M, realigned cigar should be unchanged by softclipping // Ref: AAAAA // Alt: TTAAG var rawCigarAllMatches = new CigarAlignment("5M"); mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawCigarAllMatches, mismatchMap, 2, 0); Assert.Equal("5M", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawCigarAllMatches, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal("5M", softclippedCigar.ToString()); // Original cigar = 3S2M, Realignment adds I outside of S region // Ref: AAAAA // Alt: TATAG mismatchMap = new[] { MatchType.Mismatch, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0); Assert.Equal("3S1M1I", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal("4M1I", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0); Assert.Equal("3S1M1I", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns for only part of original softclip softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 1, suffixNs: 0); Assert.Equal("1S3M1I", softclippedCigar.ToString()); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 2, suffixNs: 0); Assert.Equal("2S2M1I", softclippedCigar.ToString()); // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: TTTAG var rawRealignedCigar_StoI = new CigarAlignment("2M1I2M"); mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0); Assert.Equal("2S1I2M", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal("2M1I2M", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0); Assert.Equal("2S1I2M", softclippedCigar.ToString()); // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: TATAG mismatchMap = new[] { MatchType.Mismatch, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0); Assert.Equal("1S1M1I2M", softclippedCigar.ToString()); // If allow shortening of softclip if bases match //Assert.Equal("2S1I2M", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoI.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0); Assert.Equal("2S1I2M", softclippedCigar.ToString()); // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: TTTAG mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped }; var rawRealignedCigar_StoID = new CigarAlignment("2M1D1I2M"); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0); Assert.Equal("2S1D1I2M", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoID.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0); Assert.Equal("2S1D1I2M", softclippedCigar.ToString()); // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: TTTAG mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped }; rawRealignedCigar_StoID = new CigarAlignment("2M1I1D2M"); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0); Assert.Equal("2S1I1D2M", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoID.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0); Assert.Equal("2S1I1D2M", softclippedCigar.ToString()); // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: TTTAG var rawRealignedCigar_StoD = new CigarAlignment("1M2D4M"); mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoD, mismatchMap, 4, 0); Assert.Equal("1S2D4M", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoD, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoD.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoD, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0); Assert.Equal("1S2D4M", softclippedCigar.ToString()); // Original cigar = 4S1M, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: TTTAG var rawRealignedCigar_noM = new CigarAlignment("4M1I"); mismatchMap = new[] { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Unmapped }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_noM, mismatchMap, 4, 0); Assert.Equal("3S1M1I", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_noM, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_noM.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_noM, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 4, suffixNs: 0); Assert.Equal("3S1M1I", softclippedCigar.ToString()); // ---- Softclip Suffix ---- // // Original cigar = 3M2S, Realignment adds I outside of S region // Ref: AAAAA // Alt: GAATT rawRealignedCigar = new CigarAlignment("1I4M"); mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2); Assert.Equal("1I2M2S", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 2); Assert.Equal("1I2M2S", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns for only part of original softclip softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 1); Assert.Equal("1I3M1S", softclippedCigar.ToString()); // Original cigar = 3M2S, With terminal Ns, Realignment adds I outside of S region // Ref: AAAAA // Alt: GAANN mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Match, MatchType.NMismatch, MatchType.NMismatch }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2); Assert.Equal("1I2M2S", softclippedCigar.ToString()); // Remask Ns Only - has Ns, so should be same softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 2); Assert.Equal("1I2M2S", softclippedCigar.ToString()); // Original cigar = 5M, Realignment adds I, realigned cigar should be unchanged by softclipping // Ref: AAAAA // Alt: GAATT mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0); Assert.Equal("1I4M", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString()); // Original cigar = 2M3S, Realignment adds I outside of S region // Ref: AAAAA // Alt: GATTT mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3); Assert.Equal("1I1M3S", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3); Assert.Equal("1I1M3S", softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns for only part of original softclip softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 2); Assert.Equal("1I2M2S", softclippedCigar.ToString()); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 1); Assert.Equal("1I3M1S", softclippedCigar.ToString()); // Original cigar = 2M3S, Realignment adds I outside of S region; MXM-type-softclip. No shortening of softclip. // Ref: AAAAA // Alt: GATAT mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Mismatch }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3); Assert.Equal("1I1M3S", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3); Assert.Equal("1I1M3S", softclippedCigar.ToString()); // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: GATTT mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch }; var rawRealignedCigar_StoI_suffix = new CigarAlignment("2M1I2M"); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI_suffix, mismatchMap, 0, 3); Assert.Equal("2M1I2S", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoI.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3); Assert.Equal("2M1I2S", softclippedCigar.ToString()); // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: GATAT mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Mismatch }; softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3); Assert.Equal("2M1I1M1S", softclippedCigar.ToString()); // If allow shortening of softclip if bases match //Assert.Equal("2M1I2S", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoI.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3); Assert.Equal("2M1I2S", softclippedCigar.ToString()); // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: GATAT var rawRealignedCigar_StoID_suffix = new CigarAlignment("2M1I1D2M"); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3); Assert.Equal("2M1I1D1M1S", softclippedCigar.ToString()); // If allow shortening of softclip if bases match //Assert.Equal("2M1I1D2S", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoID_suffix.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3); Assert.Equal("2M1I1D2S", softclippedCigar.ToString()); // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I // Ref: AAAAA // Alt: GATTT mismatchMap = new[] { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch }; rawRealignedCigar_StoID_suffix = new CigarAlignment("2M1I1D2M"); softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3); Assert.Equal("2M1I1D2S", softclippedCigar.ToString()); // Remask Ns Only softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0); Assert.Equal(rawRealignedCigar_StoID_suffix.ToString(), softclippedCigar.ToString()); // Remask Ns Only - pretend we had Ns where orig softclip was softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3); Assert.Equal("2M1I1D2S", softclippedCigar.ToString()); // Real example mismatchMap = CreateMatchTypeArray(new List <Tuple <int, MatchType> >() { new Tuple <int, MatchType>(1, MatchType.NMismatch), new Tuple <int, MatchType>(100, MatchType.Match), }); softclippedCigar = Helper.SoftclipCigar(new CigarAlignment("90M18D11M"), mismatchMap, 5, 14, maskNsOnly: true, prefixNs: 1, suffixNs: 0); Assert.Equal("1S89M18D11M", softclippedCigar.ToString()); mismatchMap = CreateMatchTypeArray(new List <Tuple <int, MatchType> >() { new Tuple <int, MatchType>(100, MatchType.Match), new Tuple <int, MatchType>(1, MatchType.NMismatch) }); softclippedCigar = Helper.SoftclipCigar(new CigarAlignment("96M18D5M"), mismatchMap, 0, 8, maskNsOnly: true, prefixNs: 0, suffixNs: 1); Assert.Equal("96M18D4M1S", softclippedCigar.ToString()); }
public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0) { var summary = new AlignmentSummary(); summary.Cigar = cigarData; if (checkSoftclipsForMismatches) { startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip(); } var startIndexInRead = 0; var anchorLength = 0; var endAnchorLength = 0; var hasHitNonMatch = false; var hasHitNonNSoftclip = false; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; var opLength = (int)(operation.Length); switch (operation.Type) { case 'S': // soft-clip for (var i = 0; i < opLength; i++) { summary.NumSoftclips++; // No special treatement for Ns that are inside the softclip. Because the whole N-softclip distinction was meant to deal with padding-type softclips, I think. if (readSequence[startIndexInRead + i] != 'N' || hasHitNonNSoftclip) { hasHitNonNSoftclip = true; summary.NumNonNSoftclips++; if (checkSoftclipsForMismatches) { if (startIndexInReference + i < 0 || startIndexInReference + i >= refSequence.Length) { summary.NumMismatchesIncludeSoftclip++; } else if (readSequence[startIndexInRead + i] != refSequence[startIndexInReference + i] && readSequence[startIndexInRead + i] != 'N') { summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } } } } //else //{ // if (!hasHitNonNSoftclip) // { // nSoftclipLength++; // } //} } break; case 'M': // match or mismatch for (var i = 0; i < opLength; i++) { if (startIndexInReference + i > refSequence.Length - 1) { return(null); throw new InvalidDataException( "Read goes off the end of the genome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } if (startIndexInReference + i < 0) { throw new InvalidDataException( "Read would be before beginning of the chromosome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } var baseAtIndex = readSequence[startIndexInRead + i]; if (baseAtIndex != 'N' && baseAtIndex != refSequence[startIndexInReference + i]) { summary.NumMismatches++; summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } hasHitNonMatch = true; endAnchorLength = 0; } else { if (baseAtIndex != 'N') { summary.NumMatches++; } if (!hasHitNonMatch) { anchorLength++; } endAnchorLength++; } } break; case 'I': // insertion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += opLength; summary.NumInsertedBases += opLength; break; case 'D': // deletion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += opLength; summary.NumDeletedBases += opLength; break; } if (operation.IsReadSpan()) { startIndexInRead += opLength; } if (operation.IsReferenceSpan()) { startIndexInReference += opLength; } if (checkSoftclipsForMismatches && operation.Type == 'S') { startIndexInReference += opLength; } } summary.AnchorLength = Math.Min(anchorLength, endAnchorLength); return(summary); }