Example #1
0
        public void MaskPartialInsertion(HashableIndel[] indels, Read read, string refSequence, RealignmentResult result, int refSequenceStartIndex = 0)
        {
            // Softclip partial insertions at read ends
            // Assumption: there should be no softclips in the cigar by this time
            // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar".
            var  firstIndel   = indels[0];
            var  lastIndel    = indels[indels.Length - 1];
            bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion);

            if (hasInsertion)
            {
                if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion)
                {
                    var newCigar = new CigarAlignment {
                    };
                    for (int i = 0; i < result.Cigar.Count; i++)
                    {
                        if (result.Cigar[i].Type == 'S')
                        {
                            throw new InvalidDataException(
                                      string.Format(
                                          "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar));
                        }
                        else if (i == 0 && EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else if (i == result.Cigar.Count - 1 && EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else
                        {
                            newCigar.Add(result.Cigar[i]);
                        }
                    }

                    newCigar.Compress();
                    result.Cigar = newCigar;
                }
            }


            var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - refSequenceStartIndex, result.Cigar, refSequence,
                                                            read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches);

            result.NumIndels                    = newSummary.NumIndels;
            result.NumNonNMismatches            = newSummary.NumNonNMismatches;
            result.NumNonNSoftclips             = newSummary.NumNonNSoftclips;
            result.NumSoftclips                 = newSummary.NumSoftclips;
            result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip;
            result.NumIndelBases                = newSummary.NumIndelBases;
            result.NumInsertedBases             = newSummary.NumInsertedBases;
        }
        public void ValidateCigarAignment()
        {
            //valid cigar string
            Assert.True(new CigarAlignment("5M3D4M7I2S8M").IsSupported());
            //invalid cigar string
            var alignment = new CigarAlignment();

            alignment.Add(new CigarOp('M', 5));
            alignment.Add(new CigarOp('U', 7));
            alignment.Add(new CigarOp('I', 3));
            alignment.Add(new CigarOp('M', 7));
            Assert.False(alignment.IsSupported());
        }
Example #3
0
        public static CigarAlignment GetReadCigarFromStitched(string stitchedCigar, int readLength, bool reverse)
        {
            var cigar = new CigarAlignment(stitchedCigar);

            if (reverse)
            {
                cigar.Reverse();
            }

            var totalLengthSofar = 0;
            var newCigar         = new CigarAlignment();

            for (var i = 0; i < cigar.Count; i++)
            {
                var operation = cigar[i];
                if (operation.IsReadSpan())
                {
                    if (totalLengthSofar + operation.Length > readLength)
                    {
                        newCigar.Add(new CigarOp(operation.Type, (uint)(readLength - totalLengthSofar)));
                        break;
                    }

                    newCigar.Add(operation);
                    totalLengthSofar += (int)operation.Length;

                    if (totalLengthSofar == readLength)
                    {
                        break;
                    }
                }
                else
                {
                    newCigar.Add(operation);
                }
            }

            if (reverse)
            {
                newCigar.Reverse();
            }

            return(newCigar);
        }
Example #4
0
        // trim a cigar string up to a specified read length, cigar operations that don't span a read are taken as is
        public static CigarAlignment GetTrimmed(this CigarAlignment cigar, int readCycles, bool fromEnd = false, bool includeEndDels = true)
        {
            var numBases    = 0;
            var sourceCigar = fromEnd ? cigar.GetReverse() : cigar;

            var trimmedCigar = new CigarAlignment();

            if (readCycles > 0)
            {
                for (var i = 0; i < sourceCigar.Count; i++)
                {
                    var operation = sourceCigar[i];
                    if (!operation.IsReadSpan())
                    {
                        if (numBases < readCycles || includeEndDels)
                        {
                            trimmedCigar.Add(operation); // doesn't contribute any read cycles (e.g. deletion), just add
                        }
                    }
                    else if (operation.Length + numBases <= readCycles)
                    {
                        trimmedCigar.Add(operation);
                        numBases += (int)operation.Length;
                    }
                    else
                    {
                        if (readCycles - numBases > 0)
                        {
                            trimmedCigar.Add(new CigarOp(operation.Type, (uint)(readCycles - numBases)));
                        }
                        break;
                    }
                }
            }

            if (fromEnd)
            {
                trimmedCigar.Reverse();
            }

            return(trimmedCigar);
        }
Example #5
0
        public void HasSoftclips()
        {
            var cigar = new CigarAlignment("1S5M");

            Assert.True(cigar.HasSoftclips);

            cigar = new CigarAlignment("5M1S");
            Assert.True(cigar.HasSoftclips);

            cigar = new CigarAlignment("5M");
            Assert.False(cigar.HasSoftclips);

            cigar.Add(new CigarOp('S', 1));
            Assert.True(cigar.HasSoftclips);

            cigar.Clear();
            Assert.False(cigar.HasSoftclips);

            cigar.Add(new CigarOp('S', 1));
            Assert.True(cigar.HasSoftclips);
        }
Example #6
0
        public void HasIndels()
        {
            var cigar = new CigarAlignment("1M1I5M");

            Assert.True(cigar.HasIndels);

            cigar = new CigarAlignment("1M1D5M");
            Assert.True(cigar.HasIndels);

            cigar = new CigarAlignment("5M");
            Assert.False(cigar.HasIndels);

            cigar.Add(new CigarOp('I', 1));
            Assert.True(cigar.HasIndels);

            cigar.Clear();
            Assert.False(cigar.HasIndels);

            cigar.Add(new CigarOp('I', 1));
            Assert.True(cigar.HasIndels);
        }
Example #7
0
        public void CigarString_Manipulation_Tests()
        {
            var cigarstring = new CigarAlignment("7M3I2D1S11M2S");

            Assert.Equal(6, cigarstring.Count);
            Assert.Equal("7M3I2D1S11M2S", cigarstring.ToString());

            cigarstring.Add(new CigarOp('M', 6));
            Assert.Equal("7M3I2D1S11M2S6M", cigarstring.ToString());
            Assert.Equal(7, cigarstring.Count);

            cigarstring.Reverse();
            Assert.Equal("6M2S11M1S2D3I7M", cigarstring.ToString());
            Assert.Equal(7, cigarstring.Count);

            cigarstring.Clear();
            Assert.Equal("", cigarstring.ToString());
            Assert.Equal(0, cigarstring.Count);
        }
            public ReadToRealignDetails(Read read, int position, bool keepProbeSoftclips = false, bool keepBothSideSoftclips = false)
            {
                var freshCigarWithoutTerminalNsRaw = new CigarAlignment();

                NPrefixLength = read.GetNPrefix();

                NSuffixLength = read.GetNSuffix();

                if (keepProbeSoftclips)
                {
                    if (keepBothSideSoftclips || (!read.BamAlignment.IsReverseStrand() || !read.BamAlignment.IsPaired()) && NPrefixLength == 0)
                    {
                        NPrefixLength = (int)read.CigarData.GetPrefixClip();
                    }
                    if (keepBothSideSoftclips || (read.BamAlignment.IsReverseStrand() || !read.BamAlignment.IsPaired()) && NSuffixLength == 0)
                    {
                        NSuffixLength = (int)read.CigarData.GetSuffixClip();
                    }
                }

                // Only build up the cigar for the non-N middle. Add the N prefix back on after the realignment attempts.
                freshCigarWithoutTerminalNsRaw.Add(new CigarOp('M', (uint)(read.Sequence.Length - NPrefixLength - NSuffixLength)));
                freshCigarWithoutTerminalNsRaw.Compress();

                // start with fresh position map
                var positionMapWithoutTerminalNs = new PositionMap(read.ReadLength - NPrefixLength - NSuffixLength);

                Read.UpdatePositionMap(position, freshCigarWithoutTerminalNsRaw, positionMapWithoutTerminalNs);
                PrefixSoftclip = read.CigarData.GetPrefixClip();
                SuffixSoftclip = read.CigarData.GetSuffixClip();

                SequenceWithoutTerminalNs =
                    read.Sequence.Substring(NPrefixLength, read.Sequence.Length - NPrefixLength - NSuffixLength);

                PositionMapWithoutTerminalNs = positionMapWithoutTerminalNs;
                PositionMapLength            = positionMapWithoutTerminalNs.Length;
                FreshCigarWithoutTerminalNs  = freshCigarWithoutTerminalNsRaw;
                Position = position;
            }
Example #9
0
        public static CigarAlignment GetSubCigar(this CigarAlignment cigar, int start, int end)
        {
            if (start > end)
            {
                throw new ArgumentException("To get a sub-cigar between operation numbers, start must be less than or equal to end. Values supplied: " + start + ", " + end + ".");
            }

            var trimmedCigar = new CigarAlignment();

            var opsCount = 0;

            foreach (CigarOp op in cigar)
            {
                if (opsCount >= start && opsCount < end)
                {
                    trimmedCigar.Add(op);
                }

                opsCount++;
            }

            return(trimmedCigar);
        }
Example #10
0
        private RealignmentResult RealignForAnchor(CandidateIndel[] indels, Read read, string refSequence, bool anchorOnLeft)
        {
            var position = read.GetAdjustedPosition(anchorOnLeft);
            var freshCigarWithoutTerminalNs = new CigarAlignment();

            var nPrefixLength = read.GetNPrefix();
            var nSuffixLength = read.GetNSuffix();

            // Only build up the cigar for the non-N middle. Add the N prefix back on after the realignment attempts.
            freshCigarWithoutTerminalNs.Add(new CigarOp('M', (uint)(read.Sequence.Length - nPrefixLength - nSuffixLength)));
            freshCigarWithoutTerminalNs.Compress();

            // start with fresh position map
            var positionMapWithoutTerminalNs = new int[read.ReadLength - nPrefixLength - nSuffixLength];

            Read.UpdatePositionMap(position, freshCigarWithoutTerminalNs, positionMapWithoutTerminalNs);
            var prefixSoftclip = read.CigarData.GetPrefixClip();
            var suffixSoftclip = read.CigarData.GetSuffixClip();

            RealignmentResult result      = null;
            var sequenceWithoutTerminalNs = read.Sequence.Substring(nPrefixLength, read.Sequence.Length - nPrefixLength - nSuffixLength);

            // layer on indels one by one, indels already sorted by ascending position
            if (anchorOnLeft)
            {
                for (var i = 0; i < indels.Length; i++)
                {
                    result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], refSequence, true, positionMapWithoutTerminalNs);

                    if (result == null)
                    {
                        return(null);
                    }
                }
            }
            else
            {
                for (var i = indels.Length - 1; i >= 0; i--)
                {
                    result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], refSequence, false, positionMapWithoutTerminalNs);

                    if (result == null)
                    {
                        return(null);
                    }
                }
            }


            // Softclip partial insertions at read ends
            // Assumption: there should be no softclips in the cigar by this time
            // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar".
            var  firstIndel   = indels[0];
            var  lastIndel    = indels[indels.Length - 1];
            bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion);

            if (hasInsertion)
            {
                if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion)
                {
                    var newCigar = new CigarAlignment {
                    };
                    for (int i = 0; i < result.Cigar.Count; i++)
                    {
                        if (result.Cigar[i].Type == 'S')
                        {
                            throw new InvalidDataException(
                                      string.Format(
                                          "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar));
                        }
                        else if (i == 0 && Helper.EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else if (i == result.Cigar.Count - 1 && Helper.EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else
                        {
                            newCigar.Add(result.Cigar[i]);
                        }
                    }

                    newCigar.Compress();
                    result.Cigar = newCigar;
                }
            }


            // Re-append the N-prefix
            var nPrefixPositionMap = Enumerable.Repeat(-1, nPrefixLength);
            var nSuffixPositionMap = Enumerable.Repeat(-1, nSuffixLength);
            var finalPositionMap   = nPrefixPositionMap.Concat(positionMapWithoutTerminalNs).Concat(nSuffixPositionMap).ToArray();

            var finalCigar = new CigarAlignment {
                new CigarOp('S', (uint)nPrefixLength)
            };

            foreach (CigarOp op in result.Cigar)
            {
                finalCigar.Add(op);
            }
            finalCigar.Add(new CigarOp('S', (uint)nSuffixLength));
            finalCigar.Compress();
            result.Cigar = finalCigar;

            var UpdatedSummary = Extensions.GetAlignmentSummary(result.Position - 1, result.Cigar, refSequence, read.Sequence);

            result.NumIndels                    = UpdatedSummary.NumIndels;
            result.NumNonNMismatches            = UpdatedSummary.NumNonNMismatches;
            result.NumMismatchesIncludeSoftclip = UpdatedSummary.NumMismatchesIncludeSoftclip;
            result.NumNonNSoftclips             = UpdatedSummary.NumNonNSoftclips;
            result.NumSoftclips                 = UpdatedSummary.NumSoftclips;
            result.NumIndelBases                = UpdatedSummary.NumIndelBases;
            result.MismatchesIncludeSoftclip    = UpdatedSummary.MismatchesIncludeSoftclip;
            result.HasHighFrequencyIndel        = indels.Any(t => t.Frequency > HighFrequencyIndelCutoff);


            // In case realignment introduced a bunch of mismatch-Ms where there was previously softclipping, optionally re-mask them.
            if (result != null && _remaskSoftclips)
            {
                var mismatchMap = Helper.GetMismatchMap(read.Sequence, finalPositionMap, refSequence);

                var softclipAdjustedCigar = Helper.SoftclipCigar(result.Cigar, mismatchMap, prefixSoftclip, suffixSoftclip, maskNsOnly: true, prefixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', false), suffixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', true));

                // Update position map to account for any softclipping added
                var adjustedPrefixClip = softclipAdjustedCigar.GetPrefixClip();
                for (var i = 0; i < adjustedPrefixClip; i++)
                {
                    finalPositionMap[i] = -2;
                }
                var adjustedSuffixClip = softclipAdjustedCigar.GetSuffixClip();
                for (var i = 0; i < adjustedSuffixClip; i++)
                {
                    finalPositionMap[finalPositionMap.Length - 1 - i] = -2;
                }

                var editDistance = Helper.GetEditDistance(read.Sequence, finalPositionMap, refSequence);
                if (editDistance == null)
                {
                    // This shouldn't happen at this point - we already have a successful result
                    throw new InvalidDataException("Edit distance is null for :" + read.Name + " with position map " +
                                                   string.Join(",", finalPositionMap) + " and CIGAR " + softclipAdjustedCigar);
                }

                var readHasPosition = finalPositionMap.Any(p => p >= 0);
                if (!readHasPosition)
                {
                    throw new InvalidDataException(string.Format("Read does not have any alignable bases. ({2} --> {0} --> {3}, {1})", freshCigarWithoutTerminalNs, string.Join(",", finalPositionMap), read.CigarData, softclipAdjustedCigar));
                }

                result.Position      = finalPositionMap.First(p => p >= 0);
                result.Cigar         = softclipAdjustedCigar;
                result.NumMismatches = editDistance.Value;


                var newSummary = Extensions.GetAlignmentSummary(result.Position - 1, result.Cigar, refSequence,
                                                                read.Sequence);

                result.NumNonNMismatches            = newSummary.NumNonNMismatches;
                result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip;
                result.NumNonNSoftclips             = newSummary.NumNonNSoftclips;
                result.NumSoftclips              = newSummary.NumSoftclips;
                result.NumIndelBases             = newSummary.NumIndelBases;
                result.MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip;
                result.HasHighFrequencyIndel     = indels.Any(t => t.Frequency > HighFrequencyIndelCutoff);
                result.NumIndelBases             = UpdatedSummary.NumIndelBases;
            }

            return(result);
        }
Example #11
0
        public static CigarAlignment SoftclipCigar(CigarAlignment rawCigar, MatchType[] mismatchMap, uint originalSoftclipPrefix,
                                                   uint originalSoftclipSuffix, bool rescueEdgeMatches = true, bool maskNsOnly = false, int prefixNs       = 0, int suffixNs = 0,
                                                   bool softclipEvenIfMatch = false, bool softclipRepresentsMess = true, float allowOneSoftclipMismatchPer = 12)
        {
            // If realignment creates a bunch of mismatches at beginning where it was once softclipped,
            // can we softclip them?
            // Which bases should be softclipped?
            // - Things that were softclipped before and are mismatches? Or are Ms?
            // - Things that were softclipped before and are Ns
            // Softclips in new alignment can be shorter than before, but not longer
            // Softclips should be terminal
            // This is rooted in an assumption that the original softclips are terminal

            if (originalSoftclipPrefix == 0 && originalSoftclipSuffix == 0)
            {
                return(rawCigar);
            }

            var expandedCigar = rawCigar.Expand();
            var changed       = false;

            // Start at end of potential prefix softclip region and work backwards. This way we can rescue things that were matches previously sandwiched in softclips and now freed up by realignment.
            var mismatchMapIndex = (int)originalSoftclipPrefix;
            var startedSoftclip  = false;

            var maxSoftclipPrefixLength = Math.Min(expandedCigar.FindIndex(x => x.Type != 'M' && x.Type != 'S') + 1, originalSoftclipPrefix);
            var maxSoftclipSuffixLength = Math.Min(expandedCigar.Count - expandedCigar.FindLastIndex(x => x.Type != 'M' && x.Type != 'S'), originalSoftclipSuffix);

            var minMismatchesToSoftclipPrefix = originalSoftclipPrefix / allowOneSoftclipMismatchPer;

            var minMismatchesToSoftclipSuffix = originalSoftclipSuffix / allowOneSoftclipMismatchPer;

            var numMismatchesInOrigPrefixClip = 0;
            var tmpMismatchMapIndex           = mismatchMapIndex;

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                tmpMismatchMapIndex--;
                var foundMismatch = (mismatchMap[tmpMismatchMapIndex] == MatchType.Mismatch || mismatchMap[tmpMismatchMapIndex] == MatchType.NMismatch);

                if (foundMismatch)
                {
                    numMismatchesInOrigPrefixClip++;
                }
            }

            var prefixTooMessyToRescue = numMismatchesInOrigPrefixClip > minMismatchesToSoftclipPrefix;

            var previousOp         = 'N';
            var previousPreviousOp = 'N';

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                var index = (int)maxSoftclipPrefixLength - 1 - i;

                mismatchMapIndex--;

                var opAtIndex = expandedCigar[index].Type;
                if (opAtIndex != 'M')
                {
                    previousOp = opAtIndex;
                    continue;
                }

                bool shouldSoftclip;

                if (maskNsOnly)
                {
                    shouldSoftclip = index < prefixNs;
                }
                else
                {
                    shouldSoftclip = softclipEvenIfMatch || !rescueEdgeMatches || startedSoftclip || prefixTooMessyToRescue;
                    // Rescue edge matches if we haven't seen any mismatches yet
                    if (!shouldSoftclip)
                    {
                        var foundMismatch = (mismatchMap[mismatchMapIndex] == MatchType.Mismatch || mismatchMap[mismatchMapIndex] == MatchType.NMismatch);
                        if (foundMismatch)
                        {
                            shouldSoftclip = true;
                        }
                    }

                    // Don't resoftclip if we are <1 base from the end.
                    if (previousOp == 'D' || previousOp == 'I' || (softclipRepresentsMess && (previousPreviousOp == 'D' || previousPreviousOp == 'I')))
                    {
                        // Always provide an anchor
                        shouldSoftclip = false;
                    }
                }

                if (shouldSoftclip)
                {
                    changed              = true;
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }

                previousPreviousOp = previousOp;
                previousOp         = opAtIndex;
            }

            // Start at beginning of potential suffix softclip region and work forwards
            startedSoftclip  = false;
            mismatchMapIndex = mismatchMap.Length - (int)maxSoftclipSuffixLength - 1;

            var numMismatchesInOrigSuffixClip = 0;

            tmpMismatchMapIndex = mismatchMapIndex;
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                tmpMismatchMapIndex++;
                var foundMismatch = (mismatchMap[tmpMismatchMapIndex] == MatchType.Mismatch || mismatchMap[tmpMismatchMapIndex] == MatchType.NMismatch);
                if (foundMismatch)
                {
                    numMismatchesInOrigSuffixClip++;
                }
            }

            var suffixTooMessyToRescue = numMismatchesInOrigSuffixClip > minMismatchesToSoftclipSuffix;

            previousOp = 'N';
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                var index = expandedCigar.Count() - ((int)maxSoftclipSuffixLength - i);
                mismatchMapIndex++;

                var opAtIndex = expandedCigar[index].Type;

                if (opAtIndex != 'M')
                {
                    previousOp = opAtIndex;
                    continue;
                }
                bool shouldSoftclip;
                if (maskNsOnly)
                {
                    shouldSoftclip = suffixNs > 0 && mismatchMapIndex >= rawCigar.GetReadSpan() - suffixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || suffixTooMessyToRescue;

                    // Rescue edge matches if we haven't seen any mismatches yet
                    if (!shouldSoftclip)
                    {
                        var foundMismatch = (mismatchMap[mismatchMapIndex] == MatchType.Mismatch || mismatchMap[mismatchMapIndex] == MatchType.NMismatch);
                        if (foundMismatch)
                        {
                            shouldSoftclip = true;
                        }
                    }
                    if (previousOp == 'D' || previousOp == 'I')
                    {
                        // Always provide an anchor
                        shouldSoftclip = false;
                    }
                }
                if (shouldSoftclip)
                {
                    changed              = true;
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }

                previousOp = opAtIndex;
            }

            // We can only anchor a read on an M, so if we've softclipped everything away we're in trouble! Add back one.
            if (!expandedCigar.Any(o => o.Type == 'M'))
            {
                var hasAnyNonSoftclipPos = expandedCigar.Any(o => o.Type != 'S');
                var firstNonSoftclipPos  = hasAnyNonSoftclipPos
                    ? expandedCigar.FindIndex(o => o.Type != 'S')
                    : (expandedCigar.Count);
                // Set the last position of softclip to M.
                expandedCigar[firstNonSoftclipPos - 1] = new CigarOp('M', expandedCigar[firstNonSoftclipPos - 1].Length);
            }

            if (!changed)
            {
                return(rawCigar);
            }

            // Re-compile back into a revised cigar.
            var revisedCigar = new CigarAlignment();

            foreach (var cigarOp in expandedCigar)
            {
                revisedCigar.Add(cigarOp);
            }
            revisedCigar.Compress();

            return(revisedCigar);
        }
Example #12
0
        public static CigarAlignment GetClippedCigar(this CigarAlignment cigar, int start, int end, bool includeEndDels = true, bool includeWholeEndIns = false)
        {
            var numBases    = 0;
            var sourceCigar = cigar;

            var readCycles = end - start;

            var trimmedCigar = new CigarAlignment();

            var  prefixDels      = new CigarAlignment();
            bool lastWasDeletion = false;

            if (readCycles > 0)
            {
                for (var i = 0; i < sourceCigar.Count; i++)
                {
                    var operation = sourceCigar[i];

                    if (operation.IsReadSpan() && numBases + operation.Length - 1 < start)
                    {
                        lastWasDeletion = false;
                        numBases       += (int)operation.Length;
                        continue;
                    }

                    if (!operation.IsReadSpan())
                    {
                        if (prefixDels.Count > 0 && !lastWasDeletion)
                        {
                            prefixDels.Clear();
                        }
                        if (trimmedCigar.Count == 0 && includeEndDels)
                        {
                            prefixDels.Add(operation);
                        }

                        if (trimmedCigar.Count > 0 && (numBases < readCycles || includeEndDels))
                        {
                            trimmedCigar.Add(operation); // doesn't contribute any read cycles (e.g. deletion), just add
                        }
                    }
                    else if (operation.Length + numBases <= end)
                    {
                        if (lastWasDeletion && prefixDels.Count > 0)
                        {
                            foreach (CigarOp prefixDel in prefixDels)
                            {
                                trimmedCigar.Add(prefixDel);
                            }
                        }
                        trimmedCigar.Add(operation);
                        numBases += (int)operation.Length;
                    }
                    else
                    {
                        if (lastWasDeletion && prefixDels.Count > 0)
                        {
                            foreach (CigarOp prefixDel in prefixDels)
                            {
                                trimmedCigar.Add(prefixDel);
                            }
                        }
                        if (end - numBases > 0)
                        {
                            if (includeWholeEndIns && operation.Type == 'I')
                            {
                                trimmedCigar.Add(new CigarOp(operation.Type, operation.Length));
                            }
                            else
                            {
                                trimmedCigar.Add(new CigarOp(operation.Type, (uint)(end - numBases)));
                            }
                        }
                        break;
                    }

                    lastWasDeletion = operation.Type == 'D';
                }
            }

            //for (var i = 0; i < sourceCigar.Count; i++)
            //{
            //    if (numBases >= end)
            //    {
            //        break;
            //    }

            //    var operation = sourceCigar[i];
            //    if (!operation.IsReadSpan())
            //    {
            //        if (numBases >= start && numBases <= end || includeEndDels)
            //            trimmedCigar.Add(operation); // doesn't contribute any read cycles (e.g. deletion), just add
            //        continue;
            //    }

            //    if (operation.Length + numBases >= end)
            //    {
            //        if (end - numBases > 0)
            //            trimmedCigar.Add(new CigarOp(operation.Type, (uint)(end - numBases - start)));
            //        //break;
            //    }
            //    else
            //    {
            //        if (operation.Length + numBases >= start)
            //        {
            //            trimmedCigar.Add(operation);
            //        }
            //    }


            //    if (operation.IsReadSpan())
            //    {
            //        numBases += (int)operation.Length;
            //    }

            //}

            trimmedCigar.Compress();

            return(trimmedCigar);
        }
Example #13
0
        public void ReapplySoftclips(Read read, int nPrefixLength, int nSuffixLength, PositionMap positionMapWithoutTerminalNs,
                                     RealignmentResult result, GenomeSnippet context, uint prefixSoftclip, uint suffixSoftclip,
                                     CigarAlignment freshCigarWithoutTerminalNs)
        {
            // Re-append the N-prefix
            var nPrefixPositionMap = Enumerable.Repeat(-1, nPrefixLength);
            var nSuffixPositionMap = Enumerable.Repeat(-1, nSuffixLength);
            // TODO maybe have a function for combining pos maps instead
            var finalPositionMap = new PositionMap(nPrefixPositionMap.Concat(positionMapWithoutTerminalNs.Map).Concat(nSuffixPositionMap).ToArray());


            var finalCigar = new CigarAlignment {
                new CigarOp('S', (uint)nPrefixLength)
            };

            foreach (CigarOp op in result.Cigar)
            {
                finalCigar.Add(op);
            }

            finalCigar.Add(new CigarOp('S', (uint)nSuffixLength));
            finalCigar.Compress();
            result.Cigar = finalCigar;



            // In case realignment introduced a bunch of mismatch-Ms where there was previously softclipping, optionally re-mask them.
            if (result != null && _remaskSoftclips)
            {
                var mismatchMap =
                    Helper.GetMismatchMap(read.Sequence, finalPositionMap, context.Sequence, context.StartPosition);

                var softclipAdjustedCigar = Helper.SoftclipCigar(result.Cigar, mismatchMap, prefixSoftclip, suffixSoftclip,
                                                                 maskNsOnly: _maskNsOnly, prefixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', false),
                                                                 suffixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', true), softclipEvenIfMatch: _keepProbeSoftclips || _keepBothSideSoftclips, softclipRepresentsMess: (!(_keepBothSideSoftclips || _keepProbeSoftclips)));

                // Update position map to account for any softclipping added
                var adjustedPrefixClip = softclipAdjustedCigar.GetPrefixClip();
                for (var i = 0; i < adjustedPrefixClip; i++)
                {
                    finalPositionMap.UpdatePositionAtIndex(i, -2, true);
                }

                var adjustedSuffixClip = softclipAdjustedCigar.GetSuffixClip();
                for (var i = 0; i < adjustedSuffixClip; i++)
                {
                    finalPositionMap.UpdatePositionAtIndex(finalPositionMap.Length - 1 - i, -2, true);
                }

                var editDistance =
                    Helper.GetNumMismatches(read.Sequence, finalPositionMap, context.Sequence, context.StartPosition);
                if (editDistance == null)
                {
                    // This shouldn't happen at this point - we already have a successful result
                    throw new InvalidDataException("Edit distance is null for :" + read.Name + " with position map " +
                                                   string.Join(",", finalPositionMap) + " and CIGAR " + softclipAdjustedCigar);
                }

                // TODO PERF - See how much this really helps analytically. I'm thinking maybe kill this altogether and remove from eval
                var sumOfMismatching = Helper.GetSumOfMismatchQualities(mismatchMap, read.Qualities);

                var readHasPosition = finalPositionMap.HasAnyMappableBases();
                if (!readHasPosition)
                {
                    throw new InvalidDataException(string.Format(
                                                       "Read does not have any alignable bases. ({2} --> {0} --> {3}, {1})", freshCigarWithoutTerminalNs,
                                                       string.Join(",", finalPositionMap), read.CigarData, softclipAdjustedCigar));
                }

                result.Position      = finalPositionMap.FirstMappableBase(); // TODO this used to be >= 0 but changed to > 0. Confirm correct.
                result.Cigar         = softclipAdjustedCigar;
                result.NumMismatches = editDistance.Value;

                var addedAtFinal = new List <int>();
                foreach (var i in result.IndelsAddedAt)
                {
                    addedAtFinal.Add(i + nPrefixLength);
                }
                result.IndelsAddedAt = addedAtFinal;
                var nifiedAtFinal = new List <int>();
                foreach (var i in result.NifiedAt)
                {
                    nifiedAtFinal.Add(i + nPrefixLength);
                }
                result.NifiedAt = nifiedAtFinal;

                var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - context.StartPosition, result.Cigar,
                                                                context.Sequence,
                                                                read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches);

                result.NumNonNMismatches            = newSummary.NumNonNMismatches;
                result.NumNonNSoftclips             = newSummary.NumNonNSoftclips;
                result.NumSoftclips                 = newSummary.NumSoftclips;
                result.NumInsertedBases             = newSummary.NumInsertedBases;
                result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip;
                //result.MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip;
                result.SumOfMismatchingQualities = sumOfMismatching;
                result.AnchorLength = newSummary.AnchorLength;
            }
        }
Example #14
0
        public static CigarAlignment SoftclipCigar(CigarAlignment rawCigar, MatchType[] mismatchMap, uint originalSoftclipPrefix,
                                                   uint originalSoftclipSuffix, bool rescueEdgeMatches = true, bool maskNsOnly = false, int prefixNs = 0, int suffixNs = 0)
        {
            // If realignment creates a bunch of mismatches at beginning where it was once softclipped,
            // can we softclip them?
            // Which bases should be softclipped?
            // - Things that were softclipped before and are mismatches? Or are Ms?
            // - Things that were softclipped before and are Ns
            // Softclips in new alignment can be shorter than before, but not longer
            // Softclips should be terminal
            // This is rooted in an assumption that the original softclips are terminal

            if (originalSoftclipPrefix == 0 && originalSoftclipSuffix == 0)
            {
                return(rawCigar);
            }

            var expandedCigar = rawCigar.Expand();

            // Start at end of potential prefix softclip region and work backwards. This way we can rescue things that were matches previously sandwiched in softclips and now freed up by realignment.
            var mismatchMapIndex = (int)originalSoftclipPrefix;
            var startedSoftclip  = false;

            var maxSoftclipPrefixLength = Math.Min(expandedCigar.FindIndex(x => x.Type != 'M') + 1, originalSoftclipPrefix);
            var maxSoftclipSuffixLength = Math.Min(expandedCigar.Count - expandedCigar.FindLastIndex(x => x.Type != 'M'), originalSoftclipSuffix);

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                var index = (int)maxSoftclipPrefixLength - 1 - i;

                mismatchMapIndex--;

                if (expandedCigar[index].Type != 'M')
                {
                    continue;
                }

                bool shouldSoftclip;

                if (maskNsOnly)
                {
                    shouldSoftclip = index < prefixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || mismatchMap[mismatchMapIndex] != MatchType.Match;
                }

                if (shouldSoftclip)
                {
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }
            }

            // Start at beginning of potential suffix softclip region and work forwards
            startedSoftclip  = false;
            mismatchMapIndex = mismatchMap.Length - (int)maxSoftclipSuffixLength - 1;
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                var index = expandedCigar.Count() - ((int)maxSoftclipSuffixLength - i);
                mismatchMapIndex++;

                if (expandedCigar[index].Type != 'M')
                {
                    continue;
                }
                bool shouldSoftclip;
                if (maskNsOnly)
                {
                    shouldSoftclip = suffixNs > 0 && mismatchMapIndex >= rawCigar.GetReadSpan() - suffixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || mismatchMap[mismatchMapIndex] != MatchType.Match;
                }
                if (shouldSoftclip)
                {
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }
            }

            // We can only anchor a read on an M, so if we've softclipped everything away we're in trouble! Add back one.
            if (!expandedCigar.Any(o => o.Type == 'M'))
            {
                var hasAnyNonSoftclipPos = expandedCigar.Any(o => o.Type != 'S');
                var firstNonSoftclipPos  = hasAnyNonSoftclipPos
                    ? expandedCigar.FindIndex(o => o.Type != 'S')
                    : (expandedCigar.Count);
                // Set the last position of softclip to M.
                expandedCigar[firstNonSoftclipPos - 1] = new CigarOp('M', expandedCigar[firstNonSoftclipPos - 1].Length);
            }

            // Re-compile back into a revised cigar.
            var revisedCigar = new CigarAlignment();

            foreach (var cigarOp in expandedCigar)
            {
                revisedCigar.Add(cigarOp);
            }
            revisedCigar.Compress();

            return(revisedCigar);
        }