private static string RecalculateApproximateStitchedDirections(CigarDirection cigarDirections, CigarAlignment cigarData, CigarAlignment newCigarData)
        {
            var cigarBaseDirectionMap = cigarDirections.Expand().ToArray();

            var cigarBaseAlleleMap    = cigarData.Expand();
            var newCigarBaseAlleleMap = newCigarData.Expand();

            var sequencedBaseDirectionMap = new DirectionType[cigarData.GetReadSpan()];

            var directions = new List <DirectionOp>();

            var sequencedBaseIndex = 0;

            var cigarBaseIndex    = 0;
            var newCigarBaseIndex = 0;

            while (true)
            {
                if (cigarBaseIndex >= cigarBaseAlleleMap.Count || newCigarBaseIndex >= newCigarBaseAlleleMap.Count)
                {
                    // If new is longer than old, fill out the rest with the last direction of the old cigar
                    if (newCigarBaseIndex < newCigarBaseAlleleMap.Count)
                    {
                        directions.Add(new DirectionOp(cigarBaseDirectionMap[cigarBaseIndex - 1], newCigarBaseAlleleMap.Count - newCigarBaseIndex));
                    }

                    break;
                }

                while (!cigarBaseAlleleMap[cigarBaseIndex].IsReadSpan())
                {
                    // Skip these
                    cigarBaseIndex++;

                    // TODO is it ever possible to go off the end here?
                }

                while (!newCigarBaseAlleleMap[newCigarBaseIndex].IsReadSpan())
                {
                    directions.Add(new DirectionOp(cigarBaseDirectionMap[cigarBaseIndex], 1)); // TODO perhaps something more nuanced here? unclear what the best solution is. For now, just be consistent: take the last one that we were on at this point in the old cigar
                    newCigarBaseIndex++;

                    // TODO is it ever possible to go off the end here?
                }

                sequencedBaseDirectionMap[sequencedBaseIndex] = cigarBaseDirectionMap[cigarBaseIndex];
                directions.Add(new DirectionOp(cigarBaseDirectionMap[cigarBaseIndex], 1));
                sequencedBaseIndex++;

                cigarBaseIndex++;
                newCigarBaseIndex++;
            }

            var compressedDirections = DirectionHelper.CompressDirections(directions);

            return(new CigarDirection(compressedDirections).ToString());
        }
Example #2
0
        private List <StitchedPosition> GetStitchedSites(CigarAlignment cigar1, CigarAlignment cigar2, long firstPos2, long firstPos1)
        {
            var expandedCigar1 = cigar1.Expand();
            var expandedCigar2 = cigar2.Expand();

            var posDict = new Dictionary <int, StitchedPosition>();

            _positionsUsed = 0;

            var refPos = 0;

            foreach (var op in expandedCigar1)
            {
                if (!posDict.ContainsKey(refPos))
                {
                    posDict[refPos] = GetFreshStitchedPosition();
                }
                if (op.IsReferenceSpan())
                {
                    posDict[refPos].MappedSite.R1Ops.Add(op);
                    refPos++;
                }
                else
                {
                    posDict[refPos].UnmappedPrefix.R1Ops.Add(op);
                }
            }

            // Reset the ref pos
            refPos = (int)(firstPos2 - firstPos1);

            foreach (var op in expandedCigar2)
            {
                if (!posDict.ContainsKey(refPos))
                {
                    posDict[refPos] = GetFreshStitchedPosition();
                }
                if (op.IsReferenceSpan())
                {
                    posDict[refPos].MappedSite.R2Ops.Add(op);
                    refPos++;
                }
                else
                {
                    posDict[refPos].UnmappedPrefix.R2Ops.Add(op);
                }
            }

            return(posDict.OrderBy(x => x.Key).Select(x => x.Value).ToList());
        }
        public void Expand()
        {
            var cigar = new CigarAlignment("2S3M1D1M");

            var expectedExpansion = new List <char>()
            {
                'S', 'S', 'M', 'M', 'M', 'D', 'M'
            };
            var actualExpansion = cigar.Expand();

            Assert.Equal(expectedExpansion.Count, actualExpansion.Count);

            for (var i = 0; i < expectedExpansion.Count; i++)
            {
                Assert.Equal(expectedExpansion[i], actualExpansion[i].Type);
            }
        }
Example #4
0
        public static DirectionType[] CreateSequencedBaseDirectionMap(DirectionType[] cigarBaseDirectionMap, CigarAlignment cigarData)
        {
            var cigarBaseAlleleMap        = cigarData.Expand();
            var sequencedBaseDirectionMap = new DirectionType[cigarData.GetReadSpan()];

            int sequencedBaseIndex = 0;

            for (int cigarBaseIndex = 0; cigarBaseIndex < cigarBaseDirectionMap.Length; cigarBaseIndex++)
            {
                var cigarOp = cigarBaseAlleleMap[cigarBaseIndex];

                if (cigarOp.IsReadSpan()) //choices: (MIDNSHP)
                {
                    sequencedBaseDirectionMap[sequencedBaseIndex] = cigarBaseDirectionMap[cigarBaseIndex];
                    sequencedBaseIndex++;
                }
            }
            return(sequencedBaseDirectionMap);
        }
Example #5
0
        public static CigarAlignment SoftclipCigar(CigarAlignment rawCigar, MatchType[] mismatchMap, uint originalSoftclipPrefix,
                                                   uint originalSoftclipSuffix, bool rescueEdgeMatches = true, bool maskNsOnly = false, int prefixNs       = 0, int suffixNs = 0,
                                                   bool softclipEvenIfMatch = false, bool softclipRepresentsMess = true, float allowOneSoftclipMismatchPer = 12)
        {
            // If realignment creates a bunch of mismatches at beginning where it was once softclipped,
            // can we softclip them?
            // Which bases should be softclipped?
            // - Things that were softclipped before and are mismatches? Or are Ms?
            // - Things that were softclipped before and are Ns
            // Softclips in new alignment can be shorter than before, but not longer
            // Softclips should be terminal
            // This is rooted in an assumption that the original softclips are terminal

            if (originalSoftclipPrefix == 0 && originalSoftclipSuffix == 0)
            {
                return(rawCigar);
            }

            var expandedCigar = rawCigar.Expand();
            var changed       = false;

            // Start at end of potential prefix softclip region and work backwards. This way we can rescue things that were matches previously sandwiched in softclips and now freed up by realignment.
            var mismatchMapIndex = (int)originalSoftclipPrefix;
            var startedSoftclip  = false;

            var maxSoftclipPrefixLength = Math.Min(expandedCigar.FindIndex(x => x.Type != 'M' && x.Type != 'S') + 1, originalSoftclipPrefix);
            var maxSoftclipSuffixLength = Math.Min(expandedCigar.Count - expandedCigar.FindLastIndex(x => x.Type != 'M' && x.Type != 'S'), originalSoftclipSuffix);

            var minMismatchesToSoftclipPrefix = originalSoftclipPrefix / allowOneSoftclipMismatchPer;

            var minMismatchesToSoftclipSuffix = originalSoftclipSuffix / allowOneSoftclipMismatchPer;

            var numMismatchesInOrigPrefixClip = 0;
            var tmpMismatchMapIndex           = mismatchMapIndex;

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                tmpMismatchMapIndex--;
                var foundMismatch = (mismatchMap[tmpMismatchMapIndex] == MatchType.Mismatch || mismatchMap[tmpMismatchMapIndex] == MatchType.NMismatch);

                if (foundMismatch)
                {
                    numMismatchesInOrigPrefixClip++;
                }
            }

            var prefixTooMessyToRescue = numMismatchesInOrigPrefixClip > minMismatchesToSoftclipPrefix;

            var previousOp         = 'N';
            var previousPreviousOp = 'N';

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                var index = (int)maxSoftclipPrefixLength - 1 - i;

                mismatchMapIndex--;

                var opAtIndex = expandedCigar[index].Type;
                if (opAtIndex != 'M')
                {
                    previousOp = opAtIndex;
                    continue;
                }

                bool shouldSoftclip;

                if (maskNsOnly)
                {
                    shouldSoftclip = index < prefixNs;
                }
                else
                {
                    shouldSoftclip = softclipEvenIfMatch || !rescueEdgeMatches || startedSoftclip || prefixTooMessyToRescue;
                    // Rescue edge matches if we haven't seen any mismatches yet
                    if (!shouldSoftclip)
                    {
                        var foundMismatch = (mismatchMap[mismatchMapIndex] == MatchType.Mismatch || mismatchMap[mismatchMapIndex] == MatchType.NMismatch);
                        if (foundMismatch)
                        {
                            shouldSoftclip = true;
                        }
                    }

                    // Don't resoftclip if we are <1 base from the end.
                    if (previousOp == 'D' || previousOp == 'I' || (softclipRepresentsMess && (previousPreviousOp == 'D' || previousPreviousOp == 'I')))
                    {
                        // Always provide an anchor
                        shouldSoftclip = false;
                    }
                }

                if (shouldSoftclip)
                {
                    changed              = true;
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }

                previousPreviousOp = previousOp;
                previousOp         = opAtIndex;
            }

            // Start at beginning of potential suffix softclip region and work forwards
            startedSoftclip  = false;
            mismatchMapIndex = mismatchMap.Length - (int)maxSoftclipSuffixLength - 1;

            var numMismatchesInOrigSuffixClip = 0;

            tmpMismatchMapIndex = mismatchMapIndex;
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                tmpMismatchMapIndex++;
                var foundMismatch = (mismatchMap[tmpMismatchMapIndex] == MatchType.Mismatch || mismatchMap[tmpMismatchMapIndex] == MatchType.NMismatch);
                if (foundMismatch)
                {
                    numMismatchesInOrigSuffixClip++;
                }
            }

            var suffixTooMessyToRescue = numMismatchesInOrigSuffixClip > minMismatchesToSoftclipSuffix;

            previousOp = 'N';
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                var index = expandedCigar.Count() - ((int)maxSoftclipSuffixLength - i);
                mismatchMapIndex++;

                var opAtIndex = expandedCigar[index].Type;

                if (opAtIndex != 'M')
                {
                    previousOp = opAtIndex;
                    continue;
                }
                bool shouldSoftclip;
                if (maskNsOnly)
                {
                    shouldSoftclip = suffixNs > 0 && mismatchMapIndex >= rawCigar.GetReadSpan() - suffixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || suffixTooMessyToRescue;

                    // Rescue edge matches if we haven't seen any mismatches yet
                    if (!shouldSoftclip)
                    {
                        var foundMismatch = (mismatchMap[mismatchMapIndex] == MatchType.Mismatch || mismatchMap[mismatchMapIndex] == MatchType.NMismatch);
                        if (foundMismatch)
                        {
                            shouldSoftclip = true;
                        }
                    }
                    if (previousOp == 'D' || previousOp == 'I')
                    {
                        // Always provide an anchor
                        shouldSoftclip = false;
                    }
                }
                if (shouldSoftclip)
                {
                    changed              = true;
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }

                previousOp = opAtIndex;
            }

            // We can only anchor a read on an M, so if we've softclipped everything away we're in trouble! Add back one.
            if (!expandedCigar.Any(o => o.Type == 'M'))
            {
                var hasAnyNonSoftclipPos = expandedCigar.Any(o => o.Type != 'S');
                var firstNonSoftclipPos  = hasAnyNonSoftclipPos
                    ? expandedCigar.FindIndex(o => o.Type != 'S')
                    : (expandedCigar.Count);
                // Set the last position of softclip to M.
                expandedCigar[firstNonSoftclipPos - 1] = new CigarOp('M', expandedCigar[firstNonSoftclipPos - 1].Length);
            }

            if (!changed)
            {
                return(rawCigar);
            }

            // Re-compile back into a revised cigar.
            var revisedCigar = new CigarAlignment();

            foreach (var cigarOp in expandedCigar)
            {
                revisedCigar.Add(cigarOp);
            }
            revisedCigar.Compress();

            return(revisedCigar);
        }
Example #6
0
        public static CigarAlignment SoftclipCigar(CigarAlignment rawCigar, MatchType[] mismatchMap, uint originalSoftclipPrefix,
                                                   uint originalSoftclipSuffix, bool rescueEdgeMatches = true, bool maskNsOnly = false, int prefixNs = 0, int suffixNs = 0)
        {
            // If realignment creates a bunch of mismatches at beginning where it was once softclipped,
            // can we softclip them?
            // Which bases should be softclipped?
            // - Things that were softclipped before and are mismatches? Or are Ms?
            // - Things that were softclipped before and are Ns
            // Softclips in new alignment can be shorter than before, but not longer
            // Softclips should be terminal
            // This is rooted in an assumption that the original softclips are terminal

            if (originalSoftclipPrefix == 0 && originalSoftclipSuffix == 0)
            {
                return(rawCigar);
            }

            var expandedCigar = rawCigar.Expand();

            // Start at end of potential prefix softclip region and work backwards. This way we can rescue things that were matches previously sandwiched in softclips and now freed up by realignment.
            var mismatchMapIndex = (int)originalSoftclipPrefix;
            var startedSoftclip  = false;

            var maxSoftclipPrefixLength = Math.Min(expandedCigar.FindIndex(x => x.Type != 'M') + 1, originalSoftclipPrefix);
            var maxSoftclipSuffixLength = Math.Min(expandedCigar.Count - expandedCigar.FindLastIndex(x => x.Type != 'M'), originalSoftclipSuffix);

            for (var i = 0; i < maxSoftclipPrefixLength; i++)
            {
                var index = (int)maxSoftclipPrefixLength - 1 - i;

                mismatchMapIndex--;

                if (expandedCigar[index].Type != 'M')
                {
                    continue;
                }

                bool shouldSoftclip;

                if (maskNsOnly)
                {
                    shouldSoftclip = index < prefixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || mismatchMap[mismatchMapIndex] != MatchType.Match;
                }

                if (shouldSoftclip)
                {
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }
            }

            // Start at beginning of potential suffix softclip region and work forwards
            startedSoftclip  = false;
            mismatchMapIndex = mismatchMap.Length - (int)maxSoftclipSuffixLength - 1;
            for (var i = 0; i < maxSoftclipSuffixLength; i++)
            {
                var index = expandedCigar.Count() - ((int)maxSoftclipSuffixLength - i);
                mismatchMapIndex++;

                if (expandedCigar[index].Type != 'M')
                {
                    continue;
                }
                bool shouldSoftclip;
                if (maskNsOnly)
                {
                    shouldSoftclip = suffixNs > 0 && mismatchMapIndex >= rawCigar.GetReadSpan() - suffixNs;
                }
                else
                {
                    shouldSoftclip = !rescueEdgeMatches || startedSoftclip || mismatchMap[mismatchMapIndex] != MatchType.Match;
                }
                if (shouldSoftclip)
                {
                    startedSoftclip      = true;
                    expandedCigar[index] = new CigarOp('S', 1);
                }
            }

            // We can only anchor a read on an M, so if we've softclipped everything away we're in trouble! Add back one.
            if (!expandedCigar.Any(o => o.Type == 'M'))
            {
                var hasAnyNonSoftclipPos = expandedCigar.Any(o => o.Type != 'S');
                var firstNonSoftclipPos  = hasAnyNonSoftclipPos
                    ? expandedCigar.FindIndex(o => o.Type != 'S')
                    : (expandedCigar.Count);
                // Set the last position of softclip to M.
                expandedCigar[firstNonSoftclipPos - 1] = new CigarOp('M', expandedCigar[firstNonSoftclipPos - 1].Length);
            }

            // Re-compile back into a revised cigar.
            var revisedCigar = new CigarAlignment();

            foreach (var cigarOp in expandedCigar)
            {
                revisedCigar.Add(cigarOp);
            }
            revisedCigar.Compress();

            return(revisedCigar);
        }