Example #1
0
        public static CigarAlignment GetReverse(this CigarAlignment cigar)
        {
            var reverseCigar = new CigarAlignment(cigar.ToString());

            reverseCigar.Reverse();

            return(reverseCigar);
        }
Example #2
0
        public void CigarString_Manipulation_Tests()
        {
            var cigarstring = new CigarAlignment("7M3I2D1S11M2S");

            Assert.Equal(6, cigarstring.Count);
            Assert.Equal("7M3I2D1S11M2S", cigarstring.ToString());

            cigarstring.Add(new CigarOp('M', 6));
            Assert.Equal("7M3I2D1S11M2S6M", cigarstring.ToString());
            Assert.Equal(7, cigarstring.Count);

            cigarstring.Reverse();
            Assert.Equal("6M2S11M1S2D3I7M", cigarstring.ToString());
            Assert.Equal(7, cigarstring.Count);

            cigarstring.Clear();
            Assert.Equal("", cigarstring.ToString());
            Assert.Equal(0, cigarstring.Count);
        }
Example #3
0
        public void Compress_Tests()
        {
            var cigarAlignment1 = new CigarAlignment("5M2M");

            Assert.Equal(true, cigarAlignment1.Compress());
            Assert.Equal("7M", cigarAlignment1.ToString());

            var cigarAlignment2 = new CigarAlignment("5M0M");

            Assert.Equal(true, cigarAlignment2.Compress());
            Assert.Equal("5M", cigarAlignment2.ToString());

            var cigarAlignment3 = new CigarAlignment("5I2D1I3D");

            Assert.Equal(true, cigarAlignment3.Compress());
            Assert.Equal("6I5D", cigarAlignment3.ToString());
        }
Example #4
0
        public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0)
        {
            var summary = new AlignmentSummary();

            summary.Cigar = cigarData;

            if (checkSoftclipsForMismatches)
            {
                startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip();
            }

            var startIndexInRead = 0;
            var anchorLength     = 0;
            var endAnchorLength  = 0;
            var hasHitNonMatch   = false;

            for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++)
            {
                var operation = cigarData[cigarOpIndex];
                switch (operation.Type)
                {
                case 'S':     // soft-clip
                    for (var i = 0; i < operation.Length; i++)
                    {
                        summary.NumSoftclips++;

                        if (readSequence[startIndexInRead + i] != 'N')
                        {
                            summary.NumNonNSoftclips++;

                            if (checkSoftclipsForMismatches)
                            {
                                if (startIndexInReference + i < 0 ||
                                    startIndexInReference + i >= refSequence.Length)
                                {
                                    summary.NumMismatchesIncludeSoftclip++;
                                }
                                else if (readSequence[startIndexInRead + i] !=
                                         refSequence[startIndexInReference + i])
                                {
                                    summary.NumMismatchesIncludeSoftclip++;

                                    if (trackActualMismatches)
                                    {
                                        if (summary.MismatchesIncludeSoftclip == null)
                                        {
                                            summary.MismatchesIncludeSoftclip = new List <string> {
                                            };
                                        }

                                        var mismatch = string.Format("{0}_{1}_{2}",
                                                                     startIndexInReference + i,
                                                                     refSequence[startIndexInReference + i],
                                                                     readSequence[startIndexInRead + i]);
                                        summary.MismatchesIncludeSoftclip.Add(mismatch);
                                    }
                                }
                            }
                        }
                    }
                    break;

                case 'M':     // match or mismatch
                    for (var i = 0; i < operation.Length; i++)
                    {
                        if (startIndexInReference + i > refSequence.Length - 1)
                        {
                            return(null);

                            throw new InvalidDataException(
                                      "Read goes off the end of the genome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        var baseAtIndex = readSequence[startIndexInRead + i];
                        if (baseAtIndex != 'N' && baseAtIndex !=
                            refSequence[startIndexInReference + i])
                        {
                            summary.NumMismatches++;
                            summary.NumMismatchesIncludeSoftclip++;

                            if (trackActualMismatches)
                            {
                                if (summary.MismatchesIncludeSoftclip == null)
                                {
                                    summary.MismatchesIncludeSoftclip = new List <string> {
                                    };
                                }

                                var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i,
                                                             refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]);
                                summary.MismatchesIncludeSoftclip.Add(mismatch);
                            }

                            hasHitNonMatch  = true;
                            endAnchorLength = 0;
                        }
                        else
                        {
                            if (baseAtIndex != 'N')
                            {
                                summary.NumMatches++;
                            }

                            if (!hasHitNonMatch)
                            {
                                anchorLength++;
                            }
                            endAnchorLength++;
                        }
                    }
                    break;

                case 'I':     // insertion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases    += (int)operation.Length;
                    summary.NumInsertedBases += (int)operation.Length;
                    break;

                case 'D':     // deletion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases   += (int)operation.Length;
                    summary.NumDeletedBases += (int)operation.Length;
                    break;
                }


                if (operation.IsReadSpan())
                {
                    startIndexInRead += (int)operation.Length;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += (int)operation.Length;
                }
                if (checkSoftclipsForMismatches && operation.Type == 'S')
                {
                    startIndexInReference += (int)operation.Length;
                }
            }

            summary.AnchorLength = Math.Min(anchorLength, endAnchorLength);

            return(summary);
        }
Example #5
0
        public void SoftclipCigar()
        {
            // ---- Softclip Prefix ---- //
            // Original cigar = 2S3M, Realignment adds I outside of S region
            //  Ref:    AAAAA
            //  Alt:    TTAAG
            var rawRealignedCigar = new CigarAlignment("4M1I");
            var mismatchMap       = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped };
            var softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0);

            Assert.Equal("2S2M1I", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal("4M1I", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 2, suffixNs: 0);
            Assert.Equal("2S2M1I", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns for only part of original softclip
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 1, suffixNs: 0);
            Assert.Equal("1S3M1I", softclippedCigar.ToString());

            // Original cigar = 2S3M, With terminal Ns, Realignment adds I outside of S region
            //  Ref:    AAAAA
            //  Alt:    NNAAG
            mismatchMap = new[]
            { MatchType.NMismatch, MatchType.NMismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0);
            Assert.Equal("2S2M1I", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 2, suffixNs: 0);
            Assert.Equal("2S2M1I", softclippedCigar.ToString());

            // Original cigar = 5M, Realignment adds I, realigned cigar should be unchanged by softclipping
            //  Ref:    AAAAA
            //  Alt:    TTAAG
            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0);
            Assert.Equal("4M1I", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal("4M1I", softclippedCigar.ToString());

            // Original cigar = 5M, still 5M, realigned cigar should be unchanged by softclipping
            //  Ref:    AAAAA
            //  Alt:    TTAAG
            var rawCigarAllMatches = new CigarAlignment("5M");

            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Match, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawCigarAllMatches, mismatchMap, 2, 0);
            Assert.Equal("5M", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawCigarAllMatches, mismatchMap, 2, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal("5M", softclippedCigar.ToString());

            // Original cigar = 3S2M, Realignment adds I outside of S region
            //  Ref:    AAAAA
            //  Alt:    TATAG
            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0);
            Assert.Equal("3S1M1I", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal("4M1I", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0);
            Assert.Equal("3S1M1I", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns for only part of original softclip
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 1, suffixNs: 0);
            Assert.Equal("1S3M1I", softclippedCigar.ToString());
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 2, suffixNs: 0);
            Assert.Equal("2S2M1I", softclippedCigar.ToString());

            // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    TTTAG
            var rawRealignedCigar_StoI = new CigarAlignment("2M1I2M");

            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0);
            Assert.Equal("2S1I2M", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal("2M1I2M", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0);
            Assert.Equal("2S1I2M", softclippedCigar.ToString());

            // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    TATAG
            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0);
            Assert.Equal("1S1M1I2M", softclippedCigar.ToString()); // If allow shortening of softclip if bases match
            //Assert.Equal("2S1I2M", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoI.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0);
            Assert.Equal("2S1I2M", softclippedCigar.ToString());

            // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    TTTAG
            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped };
            var rawRealignedCigar_StoID = new CigarAlignment("2M1D1I2M");

            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0);
            Assert.Equal("2S1D1I2M", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoID.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0);
            Assert.Equal("2S1D1I2M", softclippedCigar.ToString());

            // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    TTTAG
            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Match, MatchType.Unmapped };
            rawRealignedCigar_StoID = new CigarAlignment("2M1I1D2M");
            softclippedCigar        = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0);
            Assert.Equal("2S1I1D2M", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoID.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID, mismatchMap, 3, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0);
            Assert.Equal("2S1I1D2M", softclippedCigar.ToString());

            // Original cigar = 3S2M, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    TTTAG
            var rawRealignedCigar_StoD = new CigarAlignment("1M2D4M");

            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoD, mismatchMap, 4, 0);
            Assert.Equal("1S2D4M", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoD, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoD.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoD, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 3, suffixNs: 0);
            Assert.Equal("1S2D4M", softclippedCigar.ToString());

            // Original cigar = 4S1M, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    TTTAG
            var rawRealignedCigar_noM = new CigarAlignment("4M1I");

            mismatchMap = new[]
            { MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch, MatchType.Unmapped };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_noM, mismatchMap, 4, 0);
            Assert.Equal("3S1M1I", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_noM, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_noM.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_noM, mismatchMap, 4, 0, maskNsOnly: true, prefixNs: 4, suffixNs: 0);
            Assert.Equal("3S1M1I", softclippedCigar.ToString());

            // ---- Softclip Suffix ---- //
            // Original cigar = 3M2S, Realignment adds I outside of S region
            //  Ref:    AAAAA
            //  Alt:    GAATT
            rawRealignedCigar = new CigarAlignment("1I4M");
            mismatchMap       = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2);
            Assert.Equal("1I2M2S", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 2);
            Assert.Equal("1I2M2S", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns for only part of original softclip
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 1);
            Assert.Equal("1I3M1S", softclippedCigar.ToString());

            // Original cigar = 3M2S, With terminal Ns, Realignment adds I outside of S region
            //  Ref:    AAAAA
            //  Alt:    GAANN
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Match, MatchType.NMismatch, MatchType.NMismatch };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2);
            Assert.Equal("1I2M2S", softclippedCigar.ToString());
            // Remask Ns Only - has Ns, so should be same
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 2, maskNsOnly: true, prefixNs: 0, suffixNs: 2);
            Assert.Equal("1I2M2S", softclippedCigar.ToString());

            // Original cigar = 5M, Realignment adds I, realigned cigar should be unchanged by softclipping
            //  Ref:    AAAAA
            //  Alt:    GAATT
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0);
            Assert.Equal("1I4M", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 0, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString());

            // Original cigar = 2M3S, Realignment adds I outside of S region
            //  Ref:    AAAAA
            //  Alt:    GATTT
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3);
            Assert.Equal("1I1M3S", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3);
            Assert.Equal("1I1M3S", softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns for only part of original softclip
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 2);
            Assert.Equal("1I2M2S", softclippedCigar.ToString());
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 1);
            Assert.Equal("1I3M1S", softclippedCigar.ToString());

            // Original cigar = 2M3S, Realignment adds I outside of S region; MXM-type-softclip. No shortening of softclip.
            //  Ref:    AAAAA
            //  Alt:    GATAT
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Mismatch };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3);
            Assert.Equal("1I1M3S", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3);
            Assert.Equal("1I1M3S", softclippedCigar.ToString());

            // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    GATTT
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch };
            var rawRealignedCigar_StoI_suffix = new CigarAlignment("2M1I2M");

            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI_suffix, mismatchMap, 0, 3);
            Assert.Equal("2M1I2S", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoI.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3);
            Assert.Equal("2M1I2S", softclippedCigar.ToString());

            // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    GATAT
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Match, MatchType.Mismatch };
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3);
            Assert.Equal("2M1I1M1S", softclippedCigar.ToString()); // If allow shortening of softclip if bases match
            //Assert.Equal("2M1I2S", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoI.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoI, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3);
            Assert.Equal("2M1I2S", softclippedCigar.ToString());

            // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    GATAT
            var rawRealignedCigar_StoID_suffix = new CigarAlignment("2M1I1D2M");

            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3);
            Assert.Equal("2M1I1D1M1S", softclippedCigar.ToString()); // If allow shortening of softclip if bases match
            //Assert.Equal("2M1I1D2S", softclippedCigar.ToString()); // If mask whole original S that became M, regardless of matchiness
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoID_suffix.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3);
            Assert.Equal("2M1I1D2S", softclippedCigar.ToString());

            // Original cigar = 2M3S, Realignment adds I overlapping S region -> Shortening of softclip due to I
            //  Ref:    AAAAA
            //  Alt:    GATTT
            mismatchMap = new[]
            { MatchType.Unmapped, MatchType.Match, MatchType.Mismatch, MatchType.Mismatch, MatchType.Mismatch };
            rawRealignedCigar_StoID_suffix = new CigarAlignment("2M1I1D2M");
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3);
            Assert.Equal("2M1I1D2S", softclippedCigar.ToString());
            // Remask Ns Only
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 0);
            Assert.Equal(rawRealignedCigar_StoID_suffix.ToString(), softclippedCigar.ToString());
            // Remask Ns Only - pretend we had Ns where orig softclip was
            softclippedCigar = Helper.SoftclipCigar(rawRealignedCigar_StoID_suffix, mismatchMap, 0, 3, maskNsOnly: true, prefixNs: 0, suffixNs: 3);
            Assert.Equal("2M1I1D2S", softclippedCigar.ToString());

            // Real example
            mismatchMap =
                CreateMatchTypeArray(new List <Tuple <int, MatchType> >()
            {
                new Tuple <int, MatchType>(1, MatchType.NMismatch),
                new Tuple <int, MatchType>(100, MatchType.Match),
            });
            softclippedCigar = Helper.SoftclipCigar(new CigarAlignment("90M18D11M"), mismatchMap, 5, 14, maskNsOnly: true,
                                                    prefixNs: 1, suffixNs: 0);
            Assert.Equal("1S89M18D11M", softclippedCigar.ToString());

            mismatchMap =
                CreateMatchTypeArray(new List <Tuple <int, MatchType> >()
            {
                new Tuple <int, MatchType>(100, MatchType.Match),
                new Tuple <int, MatchType>(1, MatchType.NMismatch)
            });
            softclippedCigar = Helper.SoftclipCigar(new CigarAlignment("96M18D5M"), mismatchMap, 0, 8, maskNsOnly: true,
                                                    prefixNs: 0, suffixNs: 1);
            Assert.Equal("96M18D4M1S", softclippedCigar.ToString());
        }
Example #6
0
        public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0)
        {
            var summary = new AlignmentSummary();

            summary.Cigar = cigarData;

            if (checkSoftclipsForMismatches)
            {
                startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip();
            }

            var startIndexInRead   = 0;
            var anchorLength       = 0;
            var endAnchorLength    = 0;
            var hasHitNonMatch     = false;
            var hasHitNonNSoftclip = false;

            for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++)
            {
                var operation = cigarData[cigarOpIndex];
                var opLength  = (int)(operation.Length);
                switch (operation.Type)
                {
                case 'S':     // soft-clip
                    for (var i = 0; i < opLength; i++)
                    {
                        summary.NumSoftclips++;

                        // No special treatement for Ns that are inside the softclip. Because the whole N-softclip distinction was meant to deal with padding-type softclips, I think.
                        if (readSequence[startIndexInRead + i] != 'N' || hasHitNonNSoftclip)
                        {
                            hasHitNonNSoftclip = true;

                            summary.NumNonNSoftclips++;

                            if (checkSoftclipsForMismatches)
                            {
                                if (startIndexInReference + i < 0 ||
                                    startIndexInReference + i >= refSequence.Length)
                                {
                                    summary.NumMismatchesIncludeSoftclip++;
                                }
                                else if (readSequence[startIndexInRead + i] !=
                                         refSequence[startIndexInReference + i] && readSequence[startIndexInRead + i] != 'N')
                                {
                                    summary.NumMismatchesIncludeSoftclip++;

                                    if (trackActualMismatches)
                                    {
                                        if (summary.MismatchesIncludeSoftclip == null)
                                        {
                                            summary.MismatchesIncludeSoftclip = new List <string> {
                                            };
                                        }

                                        // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time
                                        var mismatch = string.Format("{0}_{1}_{2}",
                                                                     startIndexInReference + i,
                                                                     refSequence[startIndexInReference + i],
                                                                     readSequence[startIndexInRead + i]);
                                        summary.MismatchesIncludeSoftclip.Add(mismatch);
                                    }
                                }
                            }
                        }
                        //else
                        //{
                        //    if (!hasHitNonNSoftclip)
                        //    {
                        //        nSoftclipLength++;
                        //    }
                        //}
                    }
                    break;

                case 'M':     // match or mismatch
                    for (var i = 0; i < opLength; i++)
                    {
                        if (startIndexInReference + i > refSequence.Length - 1)
                        {
                            return(null);

                            throw new InvalidDataException(
                                      "Read goes off the end of the genome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        if (startIndexInReference + i < 0)
                        {
                            throw new InvalidDataException(
                                      "Read would be before beginning of the chromosome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        var baseAtIndex = readSequence[startIndexInRead + i];
                        if (baseAtIndex != 'N' && baseAtIndex !=
                            refSequence[startIndexInReference + i])
                        {
                            summary.NumMismatches++;
                            summary.NumMismatchesIncludeSoftclip++;

                            if (trackActualMismatches)
                            {
                                if (summary.MismatchesIncludeSoftclip == null)
                                {
                                    summary.MismatchesIncludeSoftclip = new List <string> {
                                    };
                                }

                                // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time
                                var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i,
                                                             refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]);
                                summary.MismatchesIncludeSoftclip.Add(mismatch);
                            }

                            hasHitNonMatch  = true;
                            endAnchorLength = 0;
                        }
                        else
                        {
                            if (baseAtIndex != 'N')
                            {
                                summary.NumMatches++;
                            }

                            if (!hasHitNonMatch)
                            {
                                anchorLength++;
                            }
                            endAnchorLength++;
                        }
                    }
                    break;

                case 'I':     // insertion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases    += opLength;
                    summary.NumInsertedBases += opLength;
                    break;

                case 'D':     // deletion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases   += opLength;
                    summary.NumDeletedBases += opLength;
                    break;
                }


                if (operation.IsReadSpan())
                {
                    startIndexInRead += opLength;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += opLength;
                }
                if (checkSoftclipsForMismatches && operation.Type == 'S')
                {
                    startIndexInReference += opLength;
                }
            }

            summary.AnchorLength = Math.Min(anchorLength, endAnchorLength);

            return(summary);
        }