Exemplo n.º 1
0
        public List <BamAlignment> GetRestitchedReads(ReadPair pair, BamAlignment origRead1, BamAlignment origRead2, int?r1Nm, int?r2Nm, bool realignedAroundPairSpecific, INmCalculator nmCalculator, bool recalculateNm, bool realignmentIsSketchy = false)
        {
            var reads = new List <BamAlignment>();
            var badRestitchAfterPairSpecificRealign = false;

            var disagreeingReads = realignedAroundPairSpecific ? ShouldRestitch(pair) : null;

            if (disagreeingReads == null)
            {
                // TODO in some cases, we already tried stitching and then are realigning.
                // ... If nothing realigned, we don't need to try stitching again as we know it already failed.
                // ... However, in some cases, we haven't tried stitching yet as we were deferring until realignment.
                // ... For now, it's ok to leave this here, but know that we are wasting time in some cases trying to stitch the same pair again.
                int nmStitched    = 0;
                var stitchedReads = _stitchedPairHandler.ExtractReads(pair);
                if (stitchedReads.Count == 1)
                {
                    if (recalculateNm)
                    {
                        nmStitched      = nmCalculator.GetNm(stitchedReads[0]);
                        pair.StitchedNm = nmStitched;
                    }

                    if (realignedAroundPairSpecific)
                    {
                        // Check that the stitched read represents an overall improvement over the original reads
                        var scStitched = stitchedReads[0].CigarData.GetPrefixClip() +
                                         stitchedReads[0].CigarData.GetSuffixClip();
                        var r1Sc = origRead1.CigarData.GetPrefixClip() +
                                   origRead1.CigarData.GetSuffixClip();
                        var r2Sc = origRead2.CigarData.GetPrefixClip() +
                                   origRead2.CigarData.GetSuffixClip();

                        var origMess1    = r1Nm + r1Sc;
                        var origMess2    = r2Nm + r2Sc;
                        var stitchedMess = nmStitched + scStitched;

                        if (stitchedMess > (origMess1 + origMess2))
                        {
                            badRestitchAfterPairSpecificRealign = true;
                        }
                    }

                    if (!badRestitchAfterPairSpecificRealign)
                    {
                        _statusHandler.AddStatusCount($"Successfully stitched after realign (ps: {realignedAroundPairSpecific})");
                        _statusHandler.AddCombinedStatusStringTags("OC", pair.Read1, pair.Read2, stitchedReads[0]);
                        _statusHandler.AddCombinedStatusStringTags("OS", pair.Read1, pair.Read2, stitchedReads[0]);
                        _statusHandler.AddCombinedStatusStringTags("RS", pair.Read1, pair.Read2, stitchedReads[0]);
                        _statusHandler.AddCombinedStatusStringTags("RC", pair.Read1, pair.Read2, stitchedReads[0]);
                        _statusHandler.AddCombinedStatusStringTags("RX", pair.Read1, pair.Read2, stitchedReads[0]);

                        stitchedReads[0].ReplaceOrAddStringTag("SC",
                                                               pair.Read1.GetStringTag("SC") + pair.Read1.CigarData + "," + pair.Read2.CigarData);
                        stitchedReads[0].ReplaceOrAddIntTag("NM", nmStitched, true);

                        foreach (var tag in _tagsToKeepFromR1)
                        {
                            var r1Tag = pair.Read1.GetStringTag(tag);

                            if (r1Tag != null)
                            {
                                stitchedReads[0].ReplaceOrAddStringTag(tag, r1Tag);
                            }
                        }
                    }
                }
                else
                {
                    pair.FailForOtherReason = true;
                    if (realignmentIsSketchy)
                    {
                        badRestitchAfterPairSpecificRealign = true;
                    }
                    _statusHandler.AddStatusCount($"Failed stitching after realign (ps: {realignedAroundPairSpecific})");
                    _statusHandler.AppendStatusStringTag("RC", $"Bad after attempted pair-specific realign (ps: {realignedAroundPairSpecific})", origRead1);
                    _statusHandler.AppendStatusStringTag("RC", $"Bad after attempted pair-specific realign (ps: {realignedAroundPairSpecific})", origRead2);
                }

                if (badRestitchAfterPairSpecificRealign)
                {
                    _statusHandler.AddStatusCount($"Restitching was too messy after realign (ps: {realignedAroundPairSpecific})");
                    _statusHandler.AppendStatusStringTag("RC", "BadRestitchAfterPairSpecificRealign", origRead1);
                    _statusHandler.AppendStatusStringTag("RC", "BadRestitchAfterPairSpecificRealign", origRead2);

                    pair.BadRestitch = true;
                    // Go back to the originals
                    reads.Add(origRead1);
                    reads.Add(origRead2);
                }
                else
                {
                    reads = stitchedReads;
                }
            }
            else
            {
                pair.Disagree = true;
                return(disagreeingReads);
            }

            return(reads);
        }
Exemplo n.º 2
0
        public BamAlignment GetFinalAlignment(BamAlignment origBamAlignment, out bool changed, out bool forcedSoftclip, out bool confirmed, out bool sketchy,
                                              List <PreIndel> selectedIndels = null, List <PreIndel> existingIndels          = null,
                                              bool assumeImperfect           = true, List <HashableIndel> confirmedAccepteds = null, List <PreIndel> mateIndels = null)
        {
            sketchy        = false;
            forcedSoftclip = false;
            bool forcedAlignment = false;
            var  presumeStartPositionForForcedAlignment = 0;

            if (origBamAlignment.CigarData.Count == 0)
            {
                // This was something weird that came up in the halo dataset... mapq is 0 but is still mapped, no cigar

                if (origBamAlignment.Position <= 0 && origBamAlignment.FragmentLength != 0) // No sense trying to fiddle with the position otherwise
                {
                    // TODO does this really even move the needle? Is it helping enough to outweigh its weirdness?
                    var presumedEndPosition = origBamAlignment.MatePosition < origBamAlignment.Position
                        ? origBamAlignment.MatePosition - origBamAlignment.FragmentLength
                        : origBamAlignment.MatePosition + origBamAlignment.FragmentLength;
                    presumeStartPositionForForcedAlignment = presumedEndPosition - origBamAlignment.Bases.Length;
                    forcedAlignment = true;
                }
                else
                {
                    presumeStartPositionForForcedAlignment = origBamAlignment.Position;
                    forcedAlignment = true;
                }
            }

            var  anyIndelsAtAll = _regionFilterer.AnyIndelsNearby(origBamAlignment.Position);
            bool isRealignable  = true;

            if (anyIndelsAtAll)
            {
                var isImperfectRead = false || ((origBamAlignment.ContainsDisallowedCigarOps(_suspectCigarOps) ||
                                                 origBamAlignment.GetIntTag("NM") > 0 || forcedAlignment));
                var isReadWorthCaringAbout = !origBamAlignment.IsDuplicate() && !origBamAlignment.IsSecondary();
                isRealignable = isImperfectRead && isReadWorthCaringAbout && origBamAlignment.Bases.Distinct().Count() > 1;
            }
            else
            {
                _statusCounter.AddStatusCount("No indels nearby at all");
                isRealignable = false;
            }

            if (!isRealignable)
            {
                confirmed = false;
                changed   = false;
                sketchy   = false;
                return(origBamAlignment);
            }

            // TODO maybe flag (or return all) if there's a lot or high quality stuff that we're missing! Esp with pair specific
            var indels = _indelSource.GetRelevantIndels(forcedAlignment ? presumeStartPositionForForcedAlignment : origBamAlignment.Position,
                                                        mateIndels, confirmedAccepteds);

            // Don't realign around single indels if we already have them
            bool          hasExistingUnsanctionedIndels = false;
            bool          existingSanctionedIndelIsBest = false;
            bool          hasVeryGoodIndel       = false;
            bool          hasHardToCallIndel     = false;
            var           existingMatches        = new List <PreIndel>();
            HashableIndel existingConfirmedIndel = new HashableIndel();
            var           existingMatchHashables = new List <HashableIndel>();

            if (indels.Any() && existingIndels != null && existingIndels.Any())
            {
                var topScore             = (float)(indels.Max(x => x.Key.Score));
                var matchesFound         = 0;
                var nonPreExistingIndels = new List <KeyValuePair <HashableIndel, GenomeSnippet> >();

                var index = 0;
                foreach (var kvp in indels)
                {
                    var indel   = kvp.Key;
                    var matches = existingIndels.Where(e => Helper.IsMatch(e, indel));
                    var isMatch = matches.Any();
                    if (isMatch)
                    {
                        matchesFound++;

                        if (!indel.InMulti && index == 0)
                        {
                            existingSanctionedIndelIsBest = true;
                            existingConfirmedIndel        = indel;
                        }

                        var proportionOfTopScore = indel.Score / (float)topScore;
                        if (proportionOfTopScore >= 0.75)
                        {
                            hasVeryGoodIndel = true;
                        }

                        if (indel.HardToCall)
                        {
                            hasHardToCallIndel = true;
                        }

                        existingMatches.AddRange(matches);

                        // TODO do we need special handling of multis?
                        existingMatchHashables.Add(indel);
                    }

                    if (!isMatch || indel.InMulti)
                    {
                        nonPreExistingIndels.Add(kvp);
                    }


                    index++;
                }

                // TODO do we actually want to replace indels with non-pre-existing only?
                indels = nonPreExistingIndels;

                if (matchesFound == 0)
                {
                    hasExistingUnsanctionedIndels = true;
                }
            }

            // TODO this precludes us from having good multis
            if (existingSanctionedIndelIsBest)
            {
                // If it already had the top ranked indel, there's not really any point in trying to realign around others (here we assume that it's also the best fitting indel for the read, hence why it was originally called by the regular aligner).
                _statusCounter.AddStatusCount("Existing indel is already the best available");
                changed   = false;
                confirmed = true;

                UpdateOutcomeForConfirmed(existingConfirmedIndel);

                if (confirmedAccepteds == null)
                {
                    confirmedAccepteds = new List <HashableIndel>();
                }

                confirmedAccepteds.Add(existingConfirmedIndel);

                return(origBamAlignment);
            }


            if (!indels.Any() || origBamAlignment.EndPosition - origBamAlignment.Position > 500)
            {
                if (!indels.Any())
                {
                    // TODO maybe do the forced softclip here if the read did have indels?
                    _statusCounter.AddStatusCount("No indels to realign to");
                    _statusCounter.AppendStatusStringTag("RX", $"{origBamAlignment.GetStringTag("RX")},No indels to realign to", origBamAlignment);
                }
                else
                {
                    _statusCounter.AddStatusCount("Alignment reference span longer than we can realign to");
                }
                changed   = false;
                confirmed = false;
                return(origBamAlignment);
            }



            // TODO this should relate to cap on indel size... introducing too large of an indel will make us go beyond this context.
            var context       = indels.First().Value;
            var orderedIndels = indels.Select(x => x.Key).ToList();
            var numIndels     = orderedIndels.Count;

            _statusCounter.AddStatusCount("Realigning to " + numIndels);

            var bamAlignment = new BamAlignment(origBamAlignment);

            if (forcedAlignment)
            {
                bamAlignment.CigarData = new CigarAlignment(origBamAlignment.Bases.Length + "M");
                bamAlignment.Position  = presumeStartPositionForForcedAlignment;
            }

            var realignResult = _readRealigner.Realign(new Read(_chromosome, bamAlignment),
                                                       orderedIndels, indels.ToDictionary(x => x.Key, x => x.Value), confirmedAccepteds != null && confirmedAccepteds.Any());

            var acceptedIndels = realignResult?.AcceptedIndels;
            var hasAnyIndels   = acceptedIndels != null && acceptedIndels.Any();

            if (realignResult != null)
            {
                _statusCounter.AddStatusCount("Able to realign at all (may still be worse than original)");
                _statusCounter.AppendStatusStringTag("RX", "Able to realign at all(may still be worse than original)", bamAlignment);
            }
            else
            {
                _statusCounter.AddStatusCount("Not able to realign at all");
                _statusCounter.AppendStatusStringTag("RX", "Not able to realign at all", origBamAlignment);
            }

            AlignmentSummary originalAlignmentSummary = null;
            var realignmentUnchanged = true;

            if (realignResult != null)
            {
                originalAlignmentSummary =
                    Extensions.GetAlignmentSummary((new Read(_chromosome, origBamAlignment)), context.Sequence,
                                                   _trackActualMismatches, _checkSoftclipsForMismatches, context.StartPosition);

                realignmentUnchanged = _judger.RealignmentIsUnchanged(realignResult, origBamAlignment);

                if (originalAlignmentSummary.NumMismatches > 0)
                {
                    // TODO PERF do we still want to use this ever?
                    var sumMismatch = Helper.GetSumOfMismatchQualities(origBamAlignment.Qualities,
                                                                       origBamAlignment.Bases, new Read(_chromosome, origBamAlignment).PositionMap, context.Sequence,
                                                                       context.StartPosition);
                    originalAlignmentSummary.SumOfMismatchingQualities = sumMismatch;
                }

                // Within this logic also checking the same as "!realignmentUnchanged" above.. consolidate this.
                if (selectedIndels != null &&
                    (_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, confirmedAccepteds != null && confirmedAccepteds.Any())) ||
                    ResultIsGoodEnough(realignResult, origBamAlignment, originalAlignmentSummary,
                                       realignmentUnchanged, confirmedAccepteds != null && confirmedAccepteds.Any()))
                {
                    UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, true, realignResult);

                    if (realignResult.IsSketchy)
                    {
                        sketchy = true;
                    }
                    return(AcceptRealignment(origBamAlignment, out changed, selectedIndels, existingIndels, realignResult, originalAlignmentSummary, bamAlignment, hasExistingUnsanctionedIndels, out confirmed));
                }
            }


            // At this point, any good realignment would have been returned. If it's realigned and changed now, it's an unaccepted (not good enough) realignment.
            // If it had an indel to begin with, it's basically a vote that we don't trust that indel. Optionally softclip it out.

            if (!realignmentUnchanged)
            {
                changed   = false;
                confirmed = false;

                HandleFailedRealignment(origBamAlignment, ref forcedSoftclip, existingIndels, realignResult, hasExistingUnsanctionedIndels, existingMatches);

                if ((hasVeryGoodIndel || (hasHardToCallIndel && _judger.IsVeryConfident(originalAlignmentSummary))) && !hasExistingUnsanctionedIndels && existingMatchHashables.Any())
                {
                    // It didn't have the tip-top indel, but it had one that was very close, and we tried realigning around the top guys and failed - this one looks better. Give it credit.
                    confirmed = true;
                    foreach (var indel in existingMatchHashables)
                    {
                        UpdateOutcomeForConfirmed(indel);

                        if (confirmedAccepteds != null)
                        {
                            confirmedAccepteds.Add(indel);
                        }
                    }
                }
                UpdateIndelOutcomes(numIndels, orderedIndels, hasAnyIndels, acceptedIndels, confirmedAccepteds, false, realignResult);
            }
            else
            {
                if (acceptedIndels != null)
                {
                    foreach (var indelNum in acceptedIndels)
                    {
                        var indel = orderedIndels[indelNum];

                        UpdateOutcomeForConfirmed(indel);
                    }
                }

                _statusCounter.AddStatusCount("INDEL STATUS\tUnchanged\t" + realignResult?.Indels);
                _statusCounter.AppendStatusStringTag("RX", "Unchanged: " + realignResult?.Indels, origBamAlignment);

                confirmed = true;
                changed   = false;
                return(origBamAlignment);
            }

            if (realignResult == null)
            {
                if (_softclipUnknownIndels && hasExistingUnsanctionedIndels)
                {
                    var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x));

                    foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition))
                    {
                        var reverseClip = false;
                        var clipLength  = preIndel.RightAnchor;
                        if (preIndel.LeftAnchor < preIndel.RightAnchor)
                        {
                            reverseClip = true;
                            clipLength  = preIndel.LeftAnchor;
                        }

                        // TODO arbitrary number here...
                        // If it's pretty well-anchored, don't remove the indel
                        if (clipLength > 20)
                        {
                            continue;
                        }

                        forcedSoftclip = true;
                        _statusCounter.AddStatusCount("Softclipped out bad indel");
                        _statusCounter.AppendStatusStringTag("RX",
                                                             $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}... No realignment",
                                                             origBamAlignment);
                        _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels));
                        OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment,
                                                                   reverseClip, preIndel.ReferencePosition);
                    }
                }
            }

            _statusCounter.AppendStatusStringTag("RX", "Realignment failed", origBamAlignment);
            _statusCounter.AddStatusCount("Realignment failed");

            return(origBamAlignment);
        }