Пример #1
0
        private CigarOp GetCombinedOp(CigarOp r1Op, CigarOp r2Op)
        {
            if (r1Op == null && r2Op == null)
            {
                return(null);
            }
            if (r1Op == null)
            {
                return(r2Op);
            }
            if (r2Op == null)
            {
                return(r1Op);
            }
            if (r1Op.Type == r2Op.Type)
            {
                return(r1Op);
            }

            // TODO - more nuanced resolution
            if (r1Op.Type == 'S')
            {
                return(r2Op);
            }
            if (r2Op.Type == 'S')
            {
                return(r1Op);
            }
            else
            {
                return(null);
            }
        }
Пример #2
0
 // Evaluate insertions at read ends to determine if they are partial or unanchored
 // minimumUnanchoredInsertionLength applies to the indel target that is being realigned against.
 public static bool EvaluateInsertionAtReadEnds(CigarOp cigar, HashableIndel indel, int minimumUnanchoredInsertionLength, bool maskPartialInsertion)
 {
     if (cigar.Type == 'I')
     {
         var isPartial    = maskPartialInsertion && cigar.Length < indel.Length;
         var isUnanchored = indel.Length < minimumUnanchoredInsertionLength; // TODO is this really the right move? Why not count this against the observation rather than the expected?
         return(isPartial || isUnanchored);
     }
     return(false);
 }
Пример #3
0
 // Evaluate insertions at read ends to determine if they are partial or unanchored
 // minimumUnanchoredInsertionLength applies to the indel target that is being realigned against.
 public static bool EvaluateInsertionAtReadEnds(CigarOp cigar, CandidateIndel indel, int minimumUnanchoredInsertionLength, bool maskPartialInsertion)
 {
     if (cigar.Type == 'I')
     {
         var isPartial    = maskPartialInsertion && cigar.Length < indel.Length;
         var isUnanchored = indel.Length < minimumUnanchoredInsertionLength;
         return(isPartial || isUnanchored);
     }
     return(false);
 }
Пример #4
0
        public void CigarOp_Equality_Tests()
        {
            CigarOp cigarOp = new CigarOp();

            Assert.True(cigarOp.Equals(new CigarOp()));
            cigarOp = new CigarOp('M', 21);
            Assert.True(cigarOp.Equals(new CigarOp('M', 21)));
            Assert.False(cigarOp.Equals(null));                 //null check
            Assert.False(cigarOp.Equals(new CigarOp('I', 21))); //Type mismatch
            Assert.False(cigarOp.Equals(new CigarOp('M', 22))); //Length mismatch
            Assert.False(cigarOp.Equals(new CigarOp('D', 22))); //Both mismatch
        }
Пример #5
0
        public static void SoftclipAfterIndel(BamAlignment alignment, bool reverse, int firstCollision)
        {
            // TODO Test this intensely
            var cigarOps    = alignment.CigarData;
            var hitIndel    = false;
            var hitAnyMatch = false;

            if (reverse)
            {
                var endPosition        = alignment.EndPosition;
                var positionAdjustment = 0;
                for (int i = 0; i < cigarOps.Count; i++)
                {
                    var currentIndex = cigarOps.Count - 1 - i;
                    var op           = cigarOps[currentIndex];
                    var type         = op.Type;
                    var length       = op.Length;
                    if ((type == 'D' || type == 'I') &&
                        endPosition - (op.IsReferenceSpan() ? op.Length : 1) <= firstCollision)
                    {
                        hitIndel = true;
                    }
                    else if (type == 'M' && !hitIndel)
                    {
                        hitAnyMatch = true;
                    }

                    if (hitIndel && hitAnyMatch)
                    {
                        if (type == 'S' || type == 'H')
                        {
                            continue;
                        }

                        if (cigarOps[currentIndex].IsReferenceSpan())
                        {
                            positionAdjustment += (int)length;
                        }

                        cigarOps[currentIndex] = new CigarOp('S', type == 'D' ? 0 : length);
                    }

                    if (op.IsReferenceSpan())
                    {
                        endPosition -= (int)op.Length;
                    }
                }

                alignment.Position = alignment.Position + positionAdjustment;
            }
            else
            {
                int startIndexInReference = alignment.Position;

                for (int i = 0; i < cigarOps.Count; i++)
                {
                    var operation = cigarOps[i];
                    var type      = cigarOps[i].Type;
                    var length    = cigarOps[i].Length;
                    if ((type == 'D' || type == 'I') && startIndexInReference >= firstCollision)
                    {
                        hitIndel = true;
                    }
                    else if (type == 'M' && !hitIndel)
                    {
                        hitAnyMatch = true;
                    }


                    if (hitIndel && hitAnyMatch)
                    {
                        if (type != 'S' && type != 'H')
                        {
                            cigarOps[i] = new CigarOp('S', type == 'D' ? 0 : length);
                        }
                    }


                    if (operation.IsReferenceSpan())
                    {
                        startIndexInReference += (int)operation.Length;
                    }
                }
            }

            // TODO could just do this in place
            cigarOps.Compress();
        }
Пример #6
0
        public static CigarAlignment ConstructCigar(int[] positionMap, bool softClip = false)
        {
            var cigarBuilder = new StringBuilder();

            var lastRefPosition = -1;

            var lastOperation       = String.Empty;
            var lastOperationLength = 0;

            for (var i = 0; i < positionMap.Length; i++)
            {
                var position    = positionMap[i];
                var myOperation = position == -1 ? "I" : "M";

                if (myOperation == "M")
                {
                    // check if we need to write a deletion
                    if (lastRefPosition != -1 && position > lastRefPosition + 1)
                    {
                        cigarBuilder.Append(lastOperationLength + lastOperation);  // dump out last op
                        cigarBuilder.Append((position - lastRefPosition - 1) + "D");

                        lastOperation       = "D";
                        lastOperationLength = 0;
                    }

                    lastRefPosition = position;
                }

                if (myOperation != lastOperation)
                {
                    if (!string.IsNullOrEmpty(lastOperation) && lastOperation != "D")
                    {
                        cigarBuilder.Append(lastOperationLength + lastOperation);  // dump out last op
                    }
                    lastOperation       = myOperation;
                    lastOperationLength = 1;
                }
                else
                {
                    lastOperationLength++;
                }
            }

            cigarBuilder.Append(lastOperationLength + lastOperation);

            var cigar = new CigarAlignment(cigarBuilder.ToString());

            if (softClip)
            {
                if (cigar[0].Type != 'M')
                {
                    cigar[0] = new CigarOp('S', cigar[0].Length);
                }

                if (cigar[cigar.Count - 1].Type != 'M')
                {
                    cigar[cigar.Count - 1] = new CigarOp('S', cigar[cigar.Count - 1].Length);
                }
            }

            return(cigar);
        }
Пример #7
0
        public static CigarAlignment SoftclipCigar(CigarAlignment rawCigar, MatchType[] mismatchMap, uint originalSoftclipPrefix,
                                                   uint originalSoftclipSuffix, bool rescueEdgeMatches = true, bool maskNsOnly = false, int prefixNs       = 0, int suffixNs = 0,
                                                   bool softclipEvenIfMatch = false, bool softclipRepresentsMess = true, float allowOneSoftclipMismatchPer = 12)
        {
            // If realignment creates a bunch of mismatches at beginning where it was once softclipped,
            // can we softclip them?
            // Which bases should be softclipped?
            // - Things that were softclipped before and are mismatches? Or are Ms?
            // - Things that were softclipped before and are Ns
            // Softclips in new alignment can be shorter than before, but not longer
            // Softclips should be terminal
            // This is rooted in an assumption that the original softclips are terminal

            if (originalSoftclipPrefix == 0 && originalSoftclipSuffix == 0)
            {
                return(rawCigar);
            }

            var expandedCigar = rawCigar.Expand();
            var changed       = false;

            // Start at end of potential prefix softclip region and work backwards. This way we can rescue things that were matches previously sandwiched in softclips and now freed up by realignment.
            var mismatchMapIndex = (int)originalSoftclipPrefix;
            var startedSoftclip  = false;

            var maxSoftclipPrefixLength = Math.Min(expandedCigar.FindIndex(x => x.Type != 'M' && x.Type != 'S') + 1, originalSoftclipPrefix);
            var maxSoftclipSuffixLength = Math.Min(expandedCigar.Count - expandedCigar.FindLastIndex(x => x.Type != 'M' && x.Type != 'S'), originalSoftclipSuffix);

            var minMismatchesToSoftclipPrefix = originalSoftclipPrefix / allowOneSoftclipMismatchPer;

            var minMismatchesToSoftclipSuffix = originalSoftclipSuffix / allowOneSoftclipMismatchPer;

            var numMismatchesInOrigPrefixClip = 0;
            var tmpMismatchMapIndex           = mismatchMapIndex;

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                tmpMismatchMapIndex--;
                var foundMismatch = (mismatchMap[tmpMismatchMapIndex] == MatchType.Mismatch || mismatchMap[tmpMismatchMapIndex] == MatchType.NMismatch);

                if (foundMismatch)
                {
                    numMismatchesInOrigPrefixClip++;
                }
            }

            var prefixTooMessyToRescue = numMismatchesInOrigPrefixClip > minMismatchesToSoftclipPrefix;

            var previousOp         = 'N';
            var previousPreviousOp = 'N';

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                var index = (int)maxSoftclipPrefixLength - 1 - i;

                mismatchMapIndex--;

                var opAtIndex = expandedCigar[index].Type;
                if (opAtIndex != 'M')
                {
                    previousOp = opAtIndex;
                    continue;
                }

                bool shouldSoftclip;

                if (maskNsOnly)
                {
                    shouldSoftclip = index < prefixNs;
                }
                else
                {
                    shouldSoftclip = softclipEvenIfMatch || !rescueEdgeMatches || startedSoftclip || prefixTooMessyToRescue;
                    // Rescue edge matches if we haven't seen any mismatches yet
                    if (!shouldSoftclip)
                    {
                        var foundMismatch = (mismatchMap[mismatchMapIndex] == MatchType.Mismatch || mismatchMap[mismatchMapIndex] == MatchType.NMismatch);
                        if (foundMismatch)
                        {
                            shouldSoftclip = true;
                        }
                    }

                    // Don't resoftclip if we are <1 base from the end.
                    if (previousOp == 'D' || previousOp == 'I' || (softclipRepresentsMess && (previousPreviousOp == 'D' || previousPreviousOp == 'I')))
                    {
                        // Always provide an anchor
                        shouldSoftclip = false;
                    }
                }

                if (shouldSoftclip)
                {
                    changed              = true;
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }

                previousPreviousOp = previousOp;
                previousOp         = opAtIndex;
            }

            // Start at beginning of potential suffix softclip region and work forwards
            startedSoftclip  = false;
            mismatchMapIndex = mismatchMap.Length - (int)maxSoftclipSuffixLength - 1;

            var numMismatchesInOrigSuffixClip = 0;

            tmpMismatchMapIndex = mismatchMapIndex;
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                tmpMismatchMapIndex++;
                var foundMismatch = (mismatchMap[tmpMismatchMapIndex] == MatchType.Mismatch || mismatchMap[tmpMismatchMapIndex] == MatchType.NMismatch);
                if (foundMismatch)
                {
                    numMismatchesInOrigSuffixClip++;
                }
            }

            var suffixTooMessyToRescue = numMismatchesInOrigSuffixClip > minMismatchesToSoftclipSuffix;

            previousOp = 'N';
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                var index = expandedCigar.Count() - ((int)maxSoftclipSuffixLength - i);
                mismatchMapIndex++;

                var opAtIndex = expandedCigar[index].Type;

                if (opAtIndex != 'M')
                {
                    previousOp = opAtIndex;
                    continue;
                }
                bool shouldSoftclip;
                if (maskNsOnly)
                {
                    shouldSoftclip = suffixNs > 0 && mismatchMapIndex >= rawCigar.GetReadSpan() - suffixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || suffixTooMessyToRescue;

                    // Rescue edge matches if we haven't seen any mismatches yet
                    if (!shouldSoftclip)
                    {
                        var foundMismatch = (mismatchMap[mismatchMapIndex] == MatchType.Mismatch || mismatchMap[mismatchMapIndex] == MatchType.NMismatch);
                        if (foundMismatch)
                        {
                            shouldSoftclip = true;
                        }
                    }
                    if (previousOp == 'D' || previousOp == 'I')
                    {
                        // Always provide an anchor
                        shouldSoftclip = false;
                    }
                }
                if (shouldSoftclip)
                {
                    changed              = true;
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }

                previousOp = opAtIndex;
            }

            // We can only anchor a read on an M, so if we've softclipped everything away we're in trouble! Add back one.
            if (!expandedCigar.Any(o => o.Type == 'M'))
            {
                var hasAnyNonSoftclipPos = expandedCigar.Any(o => o.Type != 'S');
                var firstNonSoftclipPos  = hasAnyNonSoftclipPos
                    ? expandedCigar.FindIndex(o => o.Type != 'S')
                    : (expandedCigar.Count);
                // Set the last position of softclip to M.
                expandedCigar[firstNonSoftclipPos - 1] = new CigarOp('M', expandedCigar[firstNonSoftclipPos - 1].Length);
            }

            if (!changed)
            {
                return(rawCigar);
            }

            // Re-compile back into a revised cigar.
            var revisedCigar = new CigarAlignment();

            foreach (var cigarOp in expandedCigar)
            {
                revisedCigar.Add(cigarOp);
            }
            revisedCigar.Compress();

            return(revisedCigar);
        }
Пример #8
0
        /// <summary>
        /// Use the CIGAR string to map bases to chromosome positions, and check whether we see the ref base or the
        /// variant allele for our variants of interest.
        /// </summary>
        private void ProcessReadBases(BamAlignment read, int nextVariantIndex)
        {
            int position   = read.Position;
            int baseIndex  = 0;
            int cigarCount = read.CigarData.Count;

            for (int opIndex = 0; opIndex < cigarCount; opIndex++)
            {
                CigarOp cigar = read.CigarData[opIndex];
                switch (cigar.Type)
                {
                case 'M':
                    // Loop over matches/mismatches:
                    for (int index = 0; index < cigar.Length; index++, position++, baseIndex++)
                    {
                        for (int varIndex = nextVariantIndex; varIndex < this.Variants.Count; varIndex++)
                        {
                            VcfVariant variant = this.Variants[varIndex];
                            // Subtract 1: Vcf positions are 1-based, bam file positions are 0-based:
                            if (variant.ReferencePosition - 1 > position)
                            {
                                break;
                            }
                            if (variant.ReferencePosition - 1 < position)
                            {
                                nextVariantIndex++;
                                continue;
                            }
                            if (read.Qualities[baseIndex] < MinimumBaseQScore)
                            {
                                continue;                                                    // Skip low-quality base calls.
                            }
                            char Base = read.Bases[baseIndex];
                            if (Base == variant.ReferenceAllele[0])
                            {
                                this.ReferenceCounts[varIndex]++;
                            }
                            if (Base == variant.VariantAlleles[0][0])
                            {
                                this.VariantCounts[varIndex]++;
                            }
                        }
                    }
                    break;

                case 'S':
                    baseIndex += (int)cigar.Length;
                    break;

                case 'I':
                    baseIndex += (int)cigar.Length;
                    break;

                case 'D':
                    position += (int)cigar.Length;
                    break;

                default:
                    // We don't know how to cope with this CIGAR operation; bail out!
                    return;
                }
            }
        }
Пример #9
0
 public StitchableItem(CigarOp op, char?seqBase, byte?quality)
 {
     CigarOp = op;
     Base    = seqBase;
     Quality = quality;
 }
Пример #10
0
        public static CigarAlignment SoftclipCigar(CigarAlignment rawCigar, MatchType[] mismatchMap, uint originalSoftclipPrefix,
                                                   uint originalSoftclipSuffix, bool rescueEdgeMatches = true, bool maskNsOnly = false, int prefixNs = 0, int suffixNs = 0)
        {
            // If realignment creates a bunch of mismatches at beginning where it was once softclipped,
            // can we softclip them?
            // Which bases should be softclipped?
            // - Things that were softclipped before and are mismatches? Or are Ms?
            // - Things that were softclipped before and are Ns
            // Softclips in new alignment can be shorter than before, but not longer
            // Softclips should be terminal
            // This is rooted in an assumption that the original softclips are terminal

            if (originalSoftclipPrefix == 0 && originalSoftclipSuffix == 0)
            {
                return(rawCigar);
            }

            var expandedCigar = rawCigar.Expand();

            // Start at end of potential prefix softclip region and work backwards. This way we can rescue things that were matches previously sandwiched in softclips and now freed up by realignment.
            var mismatchMapIndex = (int)originalSoftclipPrefix;
            var startedSoftclip  = false;

            var maxSoftclipPrefixLength = Math.Min(expandedCigar.FindIndex(x => x.Type != 'M') + 1, originalSoftclipPrefix);
            var maxSoftclipSuffixLength = Math.Min(expandedCigar.Count - expandedCigar.FindLastIndex(x => x.Type != 'M'), originalSoftclipSuffix);

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                var index = (int)maxSoftclipPrefixLength - 1 - i;

                mismatchMapIndex--;

                if (expandedCigar[index].Type != 'M')
                {
                    continue;
                }

                bool shouldSoftclip;

                if (maskNsOnly)
                {
                    shouldSoftclip = index < prefixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || mismatchMap[mismatchMapIndex] != MatchType.Match;
                }

                if (shouldSoftclip)
                {
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }
            }

            // Start at beginning of potential suffix softclip region and work forwards
            startedSoftclip  = false;
            mismatchMapIndex = mismatchMap.Length - (int)maxSoftclipSuffixLength - 1;
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                var index = expandedCigar.Count() - ((int)maxSoftclipSuffixLength - i);
                mismatchMapIndex++;

                if (expandedCigar[index].Type != 'M')
                {
                    continue;
                }
                bool shouldSoftclip;
                if (maskNsOnly)
                {
                    shouldSoftclip = suffixNs > 0 && mismatchMapIndex >= rawCigar.GetReadSpan() - suffixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || mismatchMap[mismatchMapIndex] != MatchType.Match;
                }
                if (shouldSoftclip)
                {
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }
            }

            // We can only anchor a read on an M, so if we've softclipped everything away we're in trouble! Add back one.
            if (!expandedCigar.Any(o => o.Type == 'M'))
            {
                var hasAnyNonSoftclipPos = expandedCigar.Any(o => o.Type != 'S');
                var firstNonSoftclipPos  = hasAnyNonSoftclipPos
                    ? expandedCigar.FindIndex(o => o.Type != 'S')
                    : (expandedCigar.Count);
                // Set the last position of softclip to M.
                expandedCigar[firstNonSoftclipPos - 1] = new CigarOp('M', expandedCigar[firstNonSoftclipPos - 1].Length);
            }

            // Re-compile back into a revised cigar.
            var revisedCigar = new CigarAlignment();

            foreach (var cigarOp in expandedCigar)
            {
                revisedCigar.Add(cigarOp);
            }
            revisedCigar.Compress();

            return(revisedCigar);
        }