public bool RealignmentIsUnchanged(RealignmentResult realignResult,
                                           BamAlignment originalAlignment)
        {
            if (realignResult.Position - 1 != originalAlignment.Position)
            {
                return(false);
            }

            if (realignResult.Cigar.Count != originalAlignment.CigarData.Count)
            {
                return(false);
            }

            for (int i = 0; i < realignResult.Cigar.Count; i++)
            {
                if (realignResult.Cigar[i].Type != originalAlignment.CigarData[i].Type)
                {
                    return(false);
                }
                if (realignResult.Cigar[i].Length != originalAlignment.CigarData[i].Length)
                {
                    return(false);
                }
            }

            return(true);
        }
Пример #2
0
        private bool ResultIsGoodEnough(RealignmentResult realignResult, BamAlignment origBamAlignment,
                                        AlignmentSummary originalAlignmentSummary, bool realignmentUnchanged, bool isPairAware)
        {
            if (realignmentUnchanged)
            {
                if (realignResult.NifiedAt.Any())
                {
                    return(true);
                }
                _statusCounter.AppendStatusStringTag("RX", "Not taking realignment: unchanged", origBamAlignment);
                _statusCounter.AddStatusCount("Not taking realignment: unchanged");
                return(false);
            }

            if (!_judger.RealignmentBetterOrEqual(realignResult, originalAlignmentSummary, isPairAware))
            {
                _statusCounter.AppendStatusStringTag("RX", $"Realignment failed:not better ({originalAlignmentSummary.Cigar}->{realignResult.Cigar}): {realignResult.Conclusion}", origBamAlignment);
                _statusCounter.UpdateStatusStringTag("OS", $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q", origBamAlignment);
                _statusCounter.UpdateStatusStringTag("RS", $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q", origBamAlignment);

                _statusCounter.AddStatusCount("Not taking realignment: not better");
                return(false);
            }

            return(true);
        }
Пример #3
0
        private void AddStatusInfo(BamAlignment origBamAlignment, List <PreIndel> selectedIndels, List <PreIndel> existingIndels,
                                   RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels,
                                   AlignmentSummary originalAlignmentSummary)
        {
            _statusCounter.AddStatusCount("INDEL STATUS\tAccepted\t" + realignResult.Indels);

            _statusCounter.AddStatusCount($"Successfully realigned (ps: {selectedIndels != null})");
            _statusCounter.AppendStatusStringTag("RX",
                                                 $"Successfully realigned after {realignResult.Attempts} attempts, indel is {string.Join("|", realignResult.AcceptedIndels)}",
                                                 bamAlignment);

            if (existingIndels != null && existingIndels.Any())
            {
                _statusCounter.AppendStatusStringTag("RX",
                                                     $"Orig indels:{string.Join("|", existingIndels)}__New indels:{realignResult.Indels}",
                                                     bamAlignment);
                _statusCounter.AddStatusCount(
                    $"Replaced existing indels (nonsanctioned: {hasExistingUnsanctionedIndels})");
            }

            bamAlignment.ReplaceOrAddStringTag("OC", $"{origBamAlignment.CigarData}");
            bamAlignment.ReplaceOrAddStringTag("OS",
                                               $"{originalAlignmentSummary.NumMatches}M-{originalAlignmentSummary.NumNonNSoftclips}S-{originalAlignmentSummary.NumMismatches}X-{originalAlignmentSummary.NumMismatchesIncludeSoftclip}x-{originalAlignmentSummary.NumInsertedBases}i-{originalAlignmentSummary.NumIndels}Z-{originalAlignmentSummary.SumOfMismatchingQualities}Q");
            bamAlignment.ReplaceOrAddStringTag("RS",
                                               $"{realignResult.NumMatches}M-{realignResult.NumNonNSoftclips}S-{realignResult.NumMismatches}X-{realignResult.NumMismatchesIncludeSoftclip}x-{realignResult.NumInsertedBases}i-{realignResult.NumIndels}Z-{realignResult.SumOfMismatchingQualities}Q");
        }
Пример #4
0
        private void HandleAcceptedRealignment(BamAlignment origBamAlignment, List <PreIndel> selectedIndels,
                                               List <PreIndel> existingIndels,
                                               RealignmentResult realignResult, BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels,
                                               AlignmentSummary originalAlignmentSummary)
        {
            bamAlignment.Position  = realignResult.Position - 1; // 0 base
            bamAlignment.CigarData = realignResult.Cigar;

            if (_lightDebug)
            {
                AddStatusInfo(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment, hasExistingUnsanctionedIndels, originalAlignmentSummary);
            }

            _statusCounter.AppendStatusStringTag("RC", bamAlignment.GetStringTag("RC"), bamAlignment);
            if (bamAlignment.MapQuality <= 20 && realignResult.NumMismatches == 0 &&
                (_allowRescoringOrig0 || bamAlignment.MapQuality > 0))
            {
                bamAlignment.MapQuality = 40; // todo what to set this to?
            }
            // Nify if using pair-specific indels
            if (realignResult.NifiedAt != null && realignResult.NifiedAt.Any())
            {
                foreach (var i in realignResult.NifiedAt)
                {
                    bamAlignment.Qualities[i] = 0;
                }

                _statusCounter.AddStatusCount(
                    $"Successfully realigned with mismatch-insertion quality adjusted (ps: {selectedIndels != null})");
                _statusCounter.AppendStatusStringTag("RX",
                                                     $"Successfully realigned with mismatch-insertion quality adjusted ({string.Join(",", realignResult.NifiedAt)}",
                                                     bamAlignment);
            }
        }
        private Mock <IReadRealigner> GetMockReadRealigner(RealignmentResult result, List <HashableIndel> callbackIndelsList)
        {
            var readRealigner = new Mock <IReadRealigner>();

            readRealigner.Setup(x => x.Realign(It.IsAny <Read>(), It.IsAny <List <HashableIndel> >(),
                                               It.IsAny <Dictionary <HashableIndel, GenomeSnippet> >(), It.IsAny <bool>(),
                                               It.IsAny <int>())).Returns <Read, List <HashableIndel>, Dictionary <HashableIndel, GenomeSnippet>, bool, int>((r, i, g, ps, m) => result).Callback <Read, List <HashableIndel>, Dictionary <HashableIndel, GenomeSnippet>, bool, int>((r, i, g, ps, m) => callbackIndelsList.AddRange(i));
            return(readRealigner);
        }
Пример #6
0
        private BamAlignment AcceptRealignment(BamAlignment origBamAlignment, out bool changed, List <PreIndel> selectedIndels,
                                               List <PreIndel> existingIndels, RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary,
                                               BamAlignment bamAlignment, bool hasExistingUnsanctionedIndels, out bool confirmed)
        {
            HandleAcceptedRealignment(origBamAlignment, selectedIndels, existingIndels, realignResult, bamAlignment,
                                      hasExistingUnsanctionedIndels, originalAlignmentSummary);

            confirmed = false;
            changed   = true;

            return(bamAlignment);
        }
Пример #7
0
        public void GetBetterResult()
        {
            var comparer  = new BasicAlignmentComparer();
            var preferred = new RealignmentResult();
            var other     = new RealignmentResult();

            Assert.Equal(preferred, comparer.GetBetterResult(preferred, other));
            Assert.Equal(other, comparer.GetBetterResult(other, preferred));

            Assert.Equal(preferred, comparer.GetBetterResult(preferred, null));
            Assert.Equal(preferred, comparer.GetBetterResult(null, preferred));

            Assert.Equal(null, comparer.GetBetterResult(null, null));

            preferred.NumMismatches = 5;
            Assert.Equal(other, comparer.GetBetterResult(preferred, other));
        }
Пример #8
0
        public RealignmentResult GetBetterResult(RealignmentResult preferred, RealignmentResult other)
        {
            if (preferred != null && other != null)
            {
                return(CompareAlignments(preferred, other) >= 0 ? preferred : other);  // prefer first if equal
            }

            if (preferred != null)
            {
                return(preferred);
            }

            if (other != null)
            {
                return(other);
            }

            return(null);
        }
Пример #9
0
        public static bool AttemptedAddingIndelInUnanchoredRepeat(Read read, RealignmentResult result, List <HashableIndel> indels)
        {
            {
                // TODO OBO should this be checking <= or < ?
                var rptPrefix = read.GetMonoRepeatPrefix();
                if (rptPrefix > 3 && result.IndelsAddedAt.Min() <= rptPrefix)
                {
                    return(true);
                }

                var rptSuffix = read.GetMonoRepeatSuffix();
                var lastIndel = indels[result.AcceptedIndels.Last()];

                if (rptSuffix > 3)
                {
                    if (lastIndel.Type == AlleleCategory.Insertion)
                    {
                        if (read.ReadLength - result.IndelsAddedAt.Max() <= rptSuffix && rptSuffix <= lastIndel.NumBasesInReferenceSuffixBeforeUnique &&
                            read.Sequence.Substring(read.Sequence.Length - rptSuffix, rptSuffix) ==
                            lastIndel.RefSuffix.Substring(0, rptSuffix))
                        {
                            return(true);
                        }
                    }
                    else
                    {
                        if (read.ReadLength - result.IndelsAddedAt.Max() - 1 <= rptSuffix &&
                            rptSuffix <= lastIndel.NumBasesInReferenceSuffixBeforeUnique &&
                            read.Sequence.Substring(read.Sequence.Length - rptSuffix, rptSuffix) ==
                            lastIndel.RefSuffix.Substring(0, rptSuffix)
                            )
                        {
                            return(true);
                        }
                    }

                    //return true;
                }
            }

            return(false);
        }
Пример #10
0
        private void HandleFailedRealignment(BamAlignment origBamAlignment, ref bool forcedSoftclip, List <PreIndel> existingIndels,
                                             RealignmentResult realignResult, bool hasExistingUnsanctionedIndels,
                                             List <PreIndel> existingMatches)
        {
            _statusCounter.AddStatusCount("INDEL STATUS\tRejected\t" + realignResult.Indels);
            _statusCounter.AppendStatusStringTag("RX", "Did not accept: " + realignResult.Indels, origBamAlignment);

            // TODO could this be happening because of a low-ranked indel? Maybe we should be allowing to realign against all indels...
            // TODO STILL should this actually be happening also to reads that had no indels to realign around (i.e. started with weak indel, and couldn't go anywhere), not just the ones that were changed?
            if (_softclipUnknownIndels && hasExistingUnsanctionedIndels)
            {
                var unsanctioned = existingIndels.Where(x => !existingMatches.Contains(x));

                foreach (var preIndel in unsanctioned.OrderBy(x => x.ReferencePosition))
                {
                    var reverseClip = false;
                    var clipLength  = preIndel.RightAnchor;
                    if (preIndel.LeftAnchor < preIndel.RightAnchor)
                    {
                        reverseClip = true;
                        clipLength  = preIndel.LeftAnchor;
                    }

                    // TODO arbitrary number here...
                    // If it's pretty well-anchored, don't remove the indel
                    if (clipLength > 20)
                    {
                        continue;
                    }

                    forcedSoftclip = true;
                    _statusCounter.AddStatusCount("Softclipped out bad indel");
                    _statusCounter.AppendStatusStringTag("RX",
                                                         $"Softclipped out bad indel({origBamAlignment.CigarData},{string.Join(",", existingIndels)}...{realignResult?.Indels}",
                                                         origBamAlignment);
                    _statusCounter.AddStatusCount("INDEL STATUS\tRemoved\t" + string.Join("|", existingIndels));
                    OverlappingIndelHelpers.SoftclipAfterIndel(origBamAlignment,
                                                               reverseClip, preIndel.ReferencePosition);
                }
            }
        }
Пример #11
0
        private RealignmentResult GetResult(string cigar)
        {
            var result = new RealignmentResult()
            {
                Cigar         = new CigarAlignment(cigar),
                IndelsAddedAt = new List <int>()
                {
                    8
                },
                NifiedAt       = new List <int>(),
                AcceptedIndels = new List <int>()
                {
                    0
                },
                AcceptedIndelsInSubList = new List <int>()
                {
                    0
                }
            };

            return(result);
        }
Пример #12
0
 private bool RealignmentBetterOrEqual(RealignmentResult realignResult, AlignmentSummary originalAlignmentSummary)
 {
     return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary) >= 0);
 }
Пример #13
0
 private bool RealignmentIsUnchanged(RealignmentResult realignResult,
                                     BamAlignment originalAlignment)
 {
     return(realignResult.Position - 1 == originalAlignment.Position &&
            realignResult.Cigar.ToString() == originalAlignment.CigarData.ToString());
 }
Пример #14
0
        public void ReapplySoftclips(Read read, int nPrefixLength, int nSuffixLength, PositionMap positionMapWithoutTerminalNs,
                                     RealignmentResult result, GenomeSnippet context, uint prefixSoftclip, uint suffixSoftclip,
                                     CigarAlignment freshCigarWithoutTerminalNs)
        {
            // Re-append the N-prefix
            var nPrefixPositionMap = Enumerable.Repeat(-1, nPrefixLength);
            var nSuffixPositionMap = Enumerable.Repeat(-1, nSuffixLength);
            // TODO maybe have a function for combining pos maps instead
            var finalPositionMap = new PositionMap(nPrefixPositionMap.Concat(positionMapWithoutTerminalNs.Map).Concat(nSuffixPositionMap).ToArray());


            var finalCigar = new CigarAlignment {
                new CigarOp('S', (uint)nPrefixLength)
            };

            foreach (CigarOp op in result.Cigar)
            {
                finalCigar.Add(op);
            }

            finalCigar.Add(new CigarOp('S', (uint)nSuffixLength));
            finalCigar.Compress();
            result.Cigar = finalCigar;



            // In case realignment introduced a bunch of mismatch-Ms where there was previously softclipping, optionally re-mask them.
            if (result != null && _remaskSoftclips)
            {
                var mismatchMap =
                    Helper.GetMismatchMap(read.Sequence, finalPositionMap, context.Sequence, context.StartPosition);

                var softclipAdjustedCigar = Helper.SoftclipCigar(result.Cigar, mismatchMap, prefixSoftclip, suffixSoftclip,
                                                                 maskNsOnly: _maskNsOnly, prefixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', false),
                                                                 suffixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', true), softclipEvenIfMatch: _keepProbeSoftclips || _keepBothSideSoftclips, softclipRepresentsMess: (!(_keepBothSideSoftclips || _keepProbeSoftclips)));

                // Update position map to account for any softclipping added
                var adjustedPrefixClip = softclipAdjustedCigar.GetPrefixClip();
                for (var i = 0; i < adjustedPrefixClip; i++)
                {
                    finalPositionMap.UpdatePositionAtIndex(i, -2, true);
                }

                var adjustedSuffixClip = softclipAdjustedCigar.GetSuffixClip();
                for (var i = 0; i < adjustedSuffixClip; i++)
                {
                    finalPositionMap.UpdatePositionAtIndex(finalPositionMap.Length - 1 - i, -2, true);
                }

                var editDistance =
                    Helper.GetNumMismatches(read.Sequence, finalPositionMap, context.Sequence, context.StartPosition);
                if (editDistance == null)
                {
                    // This shouldn't happen at this point - we already have a successful result
                    throw new InvalidDataException("Edit distance is null for :" + read.Name + " with position map " +
                                                   string.Join(",", finalPositionMap) + " and CIGAR " + softclipAdjustedCigar);
                }

                // TODO PERF - See how much this really helps analytically. I'm thinking maybe kill this altogether and remove from eval
                var sumOfMismatching = Helper.GetSumOfMismatchQualities(mismatchMap, read.Qualities);

                var readHasPosition = finalPositionMap.HasAnyMappableBases();
                if (!readHasPosition)
                {
                    throw new InvalidDataException(string.Format(
                                                       "Read does not have any alignable bases. ({2} --> {0} --> {3}, {1})", freshCigarWithoutTerminalNs,
                                                       string.Join(",", finalPositionMap), read.CigarData, softclipAdjustedCigar));
                }

                result.Position      = finalPositionMap.FirstMappableBase(); // TODO this used to be >= 0 but changed to > 0. Confirm correct.
                result.Cigar         = softclipAdjustedCigar;
                result.NumMismatches = editDistance.Value;

                var addedAtFinal = new List <int>();
                foreach (var i in result.IndelsAddedAt)
                {
                    addedAtFinal.Add(i + nPrefixLength);
                }
                result.IndelsAddedAt = addedAtFinal;
                var nifiedAtFinal = new List <int>();
                foreach (var i in result.NifiedAt)
                {
                    nifiedAtFinal.Add(i + nPrefixLength);
                }
                result.NifiedAt = nifiedAtFinal;

                var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - context.StartPosition, result.Cigar,
                                                                context.Sequence,
                                                                read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches);

                result.NumNonNMismatches            = newSummary.NumNonNMismatches;
                result.NumNonNSoftclips             = newSummary.NumNonNSoftclips;
                result.NumSoftclips                 = newSummary.NumSoftclips;
                result.NumInsertedBases             = newSummary.NumInsertedBases;
                result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip;
                //result.MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip;
                result.SumOfMismatchingQualities = sumOfMismatching;
                result.AnchorLength = newSummary.AnchorLength;
            }
        }
 public bool RealignmentBetterOrEqual(RealignmentResult realignResult,
                                      AlignmentSummary originalAlignmentSummary, bool isPairAware)
 {
     return(_alignmentComparer.CompareAlignmentsWithOriginal(realignResult, originalAlignmentSummary, isPairAware) >= 0);
 }
Пример #16
0
        public RealignmentResult GetBestAlignment(List <HashableIndel> rankedIndels,
                                                  Dictionary <HashableIndel, GenomeSnippet> indelContexts, Read read, out int attemptedTargetSides, bool fromPairSpecificIndels)
        {
            bool realign2 = true;
            RealignmentResult bestResultSoFar = null;

            attemptedTargetSides = 0;

            // Note this used to be in the loop... hopefully I'm not killing anything here...
            var nPrefixLength = read.GetNPrefix();

            if (_keepProbeSoftclips)
            {
                if ((_keepBothSideSoftclips || !read.BamAlignment.IsReverseStrand() || !read.BamAlignment.IsPaired()) && nPrefixLength == 0)
                {
                    nPrefixLength = (int)read.CigarData.GetPrefixClip();
                }
            }

            var details = new ReadToRealignDetails(read, read.GetAdjustedPosition(true, probePrefix: _keepProbeSoftclips ?
                                                                                  nPrefixLength : 0), _keepProbeSoftclips, _keepBothSideSoftclips);

            var positionFromRight =
                read.GetAdjustedPosition(false, probePrefix: _keepProbeSoftclips ? nPrefixLength : 0);
            ReadToRealignDetails rightAnchoredDetails = null;

            if (positionFromRight >= 0)
            {
                rightAnchoredDetails = new ReadToRealignDetails(read, positionFromRight, _keepProbeSoftclips, _keepBothSideSoftclips);
            }

            // align to all permutations of one indel, two indels, and three indels
            // try to skip alignment if we know it will fail
            for (var i = 0; i < rankedIndels.Count; i++)
            {
                var indel1  = rankedIndels[i];
                var indexes = new int[] { i };

                // try aligning to one indel
                _oneIndelSimpleTargets[0] = indel1;
                var indel1Result = RealignToTargets(read, _oneIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes);
                attemptedTargetSides += 2;

                // update best result so far for one indel
                bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel1Result);
                if (IsUnbeatable(bestResultSoFar))
                {
                    return(bestResultSoFar);
                }
                //if (bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0)
                //{
                //    return bestResultSoFar; // can't beat this
                //}

                if (realign2)
                {
                    var indexes2 = new int[2];
                    for (var j = i + 1; j < rankedIndels.Count; j++)
                    {
                        var indel2 = rankedIndels[j];
                        if (!CanCoexist(indel1, indel2, fromPairSpecificIndels))
                        {
                            continue;
                        }

                        _twoIndelSimpleTargets[0] = indel1;
                        _twoIndelSimpleTargets[1] = indel2;

                        indexes2[0] = i;
                        indexes2[1] = j;

                        Array.Sort(_twoIndelSimpleTargets, CompareSimple); // need to sort by position

                        // for optimization, don't try to align from a given side if we already failed aligning the indel on that side
                        var alreadyFailedFromLeft  = indel1Result == null && _twoIndelSimpleTargets[0].Equals(indel1);
                        var alreadyFailedFromRight = indel1Result == null && _twoIndelSimpleTargets[1].Equals(indel1);
                        if (!alreadyFailedFromLeft)
                        {
                            attemptedTargetSides++;
                        }
                        if (!alreadyFailedFromRight)
                        {
                            attemptedTargetSides++;
                        }

                        var indel2Result = RealignToTargets(read, _twoIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes2,
                                                            skipLeftAnchored: alreadyFailedFromLeft, skipRightAnchored: alreadyFailedFromRight);
                        bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel2Result);
                    }
                }
            }

            return(bestResultSoFar);
        }
Пример #17
0
 private bool IsUnbeatable(RealignmentResult bestResultSoFar)
 {
     return(bestResultSoFar != null && bestResultSoFar.NumIndels == 1 &&
            bestResultSoFar.NumMismatches == 0 &&
            bestResultSoFar.NumMismatchesIncludeSoftclip == 0);
 }
Пример #18
0
        public void MaskPartialInsertion(HashableIndel[] indels, Read read, string refSequence, RealignmentResult result, int refSequenceStartIndex = 0)
        {
            // Softclip partial insertions at read ends
            // Assumption: there should be no softclips in the cigar by this time
            // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar".
            var  firstIndel   = indels[0];
            var  lastIndel    = indels[indels.Length - 1];
            bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion);

            if (hasInsertion)
            {
                if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion)
                {
                    var newCigar = new CigarAlignment {
                    };
                    for (int i = 0; i < result.Cigar.Count; i++)
                    {
                        if (result.Cigar[i].Type == 'S')
                        {
                            throw new InvalidDataException(
                                      string.Format(
                                          "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar));
                        }
                        else if (i == 0 && EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else if (i == result.Cigar.Count - 1 && EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else
                        {
                            newCigar.Add(result.Cigar[i]);
                        }
                    }

                    newCigar.Compress();
                    result.Cigar = newCigar;
                }
            }


            var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - refSequenceStartIndex, result.Cigar, refSequence,
                                                            read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches);

            result.NumIndels                    = newSummary.NumIndels;
            result.NumNonNMismatches            = newSummary.NumNonNMismatches;
            result.NumNonNSoftclips             = newSummary.NumNonNSoftclips;
            result.NumSoftclips                 = newSummary.NumSoftclips;
            result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip;
            result.NumIndelBases                = newSummary.NumIndelBases;
            result.NumInsertedBases             = newSummary.NumInsertedBases;
        }
Пример #19
0
        private RealignmentResult GetBestAlignment(List <CandidateIndel> rankedIndels, Read read, string refSequence, HashSet <Tuple <string, string, string> > indelCandidateGroups, out int attemptedTargetSides)
        {
            RealignmentResult bestResultSoFar = null;

            attemptedTargetSides = 0;

            // align to all permutations of one indel, two indels, and three indels
            // try to skip alignment if we know it will fail
            for (var i = 0; i < rankedIndels.Count; i++)
            {
                var indel1 = rankedIndels[i];

                // try aligning to one indel
                _oneIndelTargets[0] = rankedIndels[i];
                var indel1Result = RealignToTargets(read, _oneIndelTargets, refSequence);
                attemptedTargetSides += 2;

                // update best result so far for one indel
                bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel1Result);
                if (bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0)
                {
                    return(bestResultSoFar); // can't beat this
                }

                // Do not realign to >1 indels if we haven't seen any coexisting indels.
                if (indelCandidateGroups == null)
                {
                    continue;
                }
                if (indelCandidateGroups.Count == 0)
                {
                    continue;
                }

                for (var j = i + 1; j < rankedIndels.Count; j++)
                {
                    var indel2 = rankedIndels[j];
                    var indelPair = new List <CandidateIndel> {
                        indel1, indel2
                    }.OrderBy(g => g.ReferencePosition).ThenBy(t => t.ReferenceAllele).Select(x => x.ToString()).ToList();
                    if (indelCandidateGroups.Contains(new Tuple <string, string, string>(indelPair[0], indelPair[1], null)))
                    {
                        if (!CanCoexist(indel1, indel2))
                        {
                            continue;
                        }

                        _twoIndelTargets[0] = indel1;
                        _twoIndelTargets[1] = indel2;
                        Array.Sort(_twoIndelTargets, Compare);  // need to sort by position

                        // for optimization, don't try to align from a given side if we already failed aligning the indel on that side
                        var alreadyFailedFromLeft  = indel1Result == null && _twoIndelTargets[0] == indel1;
                        var alreadyFailedFromRight = indel1Result == null && _twoIndelTargets[1] == indel1;
                        if (!alreadyFailedFromLeft)
                        {
                            attemptedTargetSides++;
                        }
                        if (!alreadyFailedFromRight)
                        {
                            attemptedTargetSides++;
                        }

                        var indel2Result = RealignToTargets(read, _twoIndelTargets, refSequence, alreadyFailedFromLeft, alreadyFailedFromRight);
                        bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel2Result);
                    }

                    if (_tryThree)
                    {
                        for (var k = j + 1; k < rankedIndels.Count; k++)
                        {
                            var indel3 = rankedIndels[k];
                            var indelList = new List <CandidateIndel> {
                                indel1, indel2, indel3
                            }.OrderBy(g => g.ReferencePosition).ThenBy(t => t.ReferenceAllele).Select(x => x.ToString()).ToList();
                            bool groupCoexist = indelCandidateGroups.Contains(new Tuple <string, string, string>(indelList[0], indelList[1], indelList[2]));
                            if (!groupCoexist)
                            {
                                continue;
                            }
                            if (!(CanCoexist(indel1, indel3) && CanCoexist(indel2, indel3)))
                            {
                                continue;
                            }

                            // only try to realign to three indels if bestResultSoFar is not good enough
                            if (NeedBetter(bestResultSoFar))
                            {
                                _threeIndelTargets[0] = indel1;
                                _threeIndelTargets[1] = indel2;
                                _threeIndelTargets[2] = indel3;
                                Array.Sort(_threeIndelTargets, Compare); // need to sort by position

                                var indel3Result = RealignToTargets(read, _threeIndelTargets, refSequence);
                                bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel3Result);
                            }
                        }
                    }
                }
            }

            return(bestResultSoFar);
        }
Пример #20
0
 private bool RealignmentIsWithinRange(RealignmentResult realignResult, BamAlignment bamAlignment)
 {
     return(Math.Abs((realignResult.Position - 1) - bamAlignment.Position) < _maxRealignShift);
 }
Пример #21
0
        public void GetBetterResult()
        {
            var perfect  = new RealignmentResult();
            var oneIndel = new RealignmentResult()
            {
                NumIndels = 1
            };
            var twoIndels = new RealignmentResult()
            {
                NumIndels = 2,
            };
            var oneMismatch = new RealignmentResult()
            {
                NumMismatches = 1
            };
            var twoMismatches = new RealignmentResult()
            {
                NumMismatches = 2,
            };
            var oneIndelOneMismatch = new RealignmentResult()
            {
                NumIndels     = 1,
                NumMismatches = 1
            };

            var comparer = new ScoredAlignmentComparer(new AlignmentScorer()
            {
                MismatchCoefficient = -1, IndelCoefficient = -1
            });

            Assert.Equal(perfect, comparer.GetBetterResult(perfect, oneIndel));
            Assert.Equal(perfect, comparer.GetBetterResult(perfect, twoIndels));
            Assert.Equal(perfect, comparer.GetBetterResult(perfect, oneMismatch));
            Assert.Equal(perfect, comparer.GetBetterResult(perfect, twoMismatches));
            Assert.Equal(perfect, comparer.GetBetterResult(perfect, oneIndelOneMismatch));

            // For ties, prefer the first one
            Assert.Equal(oneMismatch, comparer.GetBetterResult(oneMismatch, oneIndel));
            Assert.Equal(oneIndel, comparer.GetBetterResult(oneIndel, oneMismatch));
            Assert.Equal(twoIndels, comparer.GetBetterResult(twoIndels, twoMismatches));
            Assert.Equal(twoIndels, comparer.GetBetterResult(twoIndels, oneIndelOneMismatch));
            Assert.Equal(oneIndelOneMismatch, comparer.GetBetterResult(oneIndelOneMismatch, twoIndels));

            // Prefer the less negative score
            Assert.Equal(oneIndel, comparer.GetBetterResult(twoIndels, oneIndel));
            Assert.Equal(oneMismatch, comparer.GetBetterResult(twoIndels, oneMismatch));
            Assert.Equal(oneIndel, comparer.GetBetterResult(twoMismatches, oneIndel));
            Assert.Equal(oneMismatch, comparer.GetBetterResult(twoMismatches, oneMismatch));
            Assert.Equal(oneIndel, comparer.GetBetterResult(oneIndelOneMismatch, oneIndel));
            Assert.Equal(oneMismatch, comparer.GetBetterResult(oneIndelOneMismatch, oneMismatch));

            // Weight unevenly
            comparer = new ScoredAlignmentComparer(new AlignmentScorer()
            {
                MismatchCoefficient = -2, IndelCoefficient = -1
            });
            Assert.Equal(oneIndel, comparer.GetBetterResult(oneMismatch, oneIndel));
            Assert.Equal(twoIndels, comparer.GetBetterResult(twoIndels, oneMismatch));
            Assert.Equal(oneMismatch, comparer.GetBetterResult(oneMismatch, twoIndels)); // same score, take first
            Assert.Equal(oneIndel, comparer.GetBetterResult(twoMismatches, oneIndel));
            Assert.Equal(oneMismatch, comparer.GetBetterResult(twoMismatches, oneMismatch));
            Assert.Equal(oneIndel, comparer.GetBetterResult(oneIndelOneMismatch, oneIndel));
            Assert.Equal(oneMismatch, comparer.GetBetterResult(oneIndelOneMismatch, oneMismatch));
        }
Пример #22
0
        private RealignmentResult RealignForAnchor(CandidateIndel[] indels, Read read, string refSequence, bool anchorOnLeft)
        {
            var position = read.GetAdjustedPosition(anchorOnLeft);
            var freshCigarWithoutTerminalNs = new CigarAlignment();

            var nPrefixLength = read.GetNPrefix();
            var nSuffixLength = read.GetNSuffix();

            // Only build up the cigar for the non-N middle. Add the N prefix back on after the realignment attempts.
            freshCigarWithoutTerminalNs.Add(new CigarOp('M', (uint)(read.Sequence.Length - nPrefixLength - nSuffixLength)));
            freshCigarWithoutTerminalNs.Compress();

            // start with fresh position map
            var positionMapWithoutTerminalNs = new int[read.ReadLength - nPrefixLength - nSuffixLength];

            Read.UpdatePositionMap(position, freshCigarWithoutTerminalNs, positionMapWithoutTerminalNs);
            var prefixSoftclip = read.CigarData.GetPrefixClip();
            var suffixSoftclip = read.CigarData.GetSuffixClip();

            RealignmentResult result      = null;
            var sequenceWithoutTerminalNs = read.Sequence.Substring(nPrefixLength, read.Sequence.Length - nPrefixLength - nSuffixLength);

            // layer on indels one by one, indels already sorted by ascending position
            if (anchorOnLeft)
            {
                for (var i = 0; i < indels.Length; i++)
                {
                    result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], refSequence, true, positionMapWithoutTerminalNs);

                    if (result == null)
                    {
                        return(null);
                    }
                }
            }
            else
            {
                for (var i = indels.Length - 1; i >= 0; i--)
                {
                    result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i], refSequence, false, positionMapWithoutTerminalNs);

                    if (result == null)
                    {
                        return(null);
                    }
                }
            }


            // Softclip partial insertions at read ends
            // Assumption: there should be no softclips in the cigar by this time
            // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar".
            var  firstIndel   = indels[0];
            var  lastIndel    = indels[indels.Length - 1];
            bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion);

            if (hasInsertion)
            {
                if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion)
                {
                    var newCigar = new CigarAlignment {
                    };
                    for (int i = 0; i < result.Cigar.Count; i++)
                    {
                        if (result.Cigar[i].Type == 'S')
                        {
                            throw new InvalidDataException(
                                      string.Format(
                                          "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar));
                        }
                        else if (i == 0 && Helper.EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else if (i == result.Cigar.Count - 1 && Helper.EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion))
                        {
                            newCigar.Add(new CigarOp('S', result.Cigar[i].Length));
                        }
                        else
                        {
                            newCigar.Add(result.Cigar[i]);
                        }
                    }

                    newCigar.Compress();
                    result.Cigar = newCigar;
                }
            }


            // Re-append the N-prefix
            var nPrefixPositionMap = Enumerable.Repeat(-1, nPrefixLength);
            var nSuffixPositionMap = Enumerable.Repeat(-1, nSuffixLength);
            var finalPositionMap   = nPrefixPositionMap.Concat(positionMapWithoutTerminalNs).Concat(nSuffixPositionMap).ToArray();

            var finalCigar = new CigarAlignment {
                new CigarOp('S', (uint)nPrefixLength)
            };

            foreach (CigarOp op in result.Cigar)
            {
                finalCigar.Add(op);
            }
            finalCigar.Add(new CigarOp('S', (uint)nSuffixLength));
            finalCigar.Compress();
            result.Cigar = finalCigar;

            var UpdatedSummary = Extensions.GetAlignmentSummary(result.Position - 1, result.Cigar, refSequence, read.Sequence);

            result.NumIndels                    = UpdatedSummary.NumIndels;
            result.NumNonNMismatches            = UpdatedSummary.NumNonNMismatches;
            result.NumMismatchesIncludeSoftclip = UpdatedSummary.NumMismatchesIncludeSoftclip;
            result.NumNonNSoftclips             = UpdatedSummary.NumNonNSoftclips;
            result.NumSoftclips                 = UpdatedSummary.NumSoftclips;
            result.NumIndelBases                = UpdatedSummary.NumIndelBases;
            result.MismatchesIncludeSoftclip    = UpdatedSummary.MismatchesIncludeSoftclip;
            result.HasHighFrequencyIndel        = indels.Any(t => t.Frequency > HighFrequencyIndelCutoff);


            // In case realignment introduced a bunch of mismatch-Ms where there was previously softclipping, optionally re-mask them.
            if (result != null && _remaskSoftclips)
            {
                var mismatchMap = Helper.GetMismatchMap(read.Sequence, finalPositionMap, refSequence);

                var softclipAdjustedCigar = Helper.SoftclipCigar(result.Cigar, mismatchMap, prefixSoftclip, suffixSoftclip, maskNsOnly: true, prefixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', false), suffixNs: Helper.GetCharacterBookendLength(read.Sequence, 'N', true));

                // Update position map to account for any softclipping added
                var adjustedPrefixClip = softclipAdjustedCigar.GetPrefixClip();
                for (var i = 0; i < adjustedPrefixClip; i++)
                {
                    finalPositionMap[i] = -2;
                }
                var adjustedSuffixClip = softclipAdjustedCigar.GetSuffixClip();
                for (var i = 0; i < adjustedSuffixClip; i++)
                {
                    finalPositionMap[finalPositionMap.Length - 1 - i] = -2;
                }

                var editDistance = Helper.GetEditDistance(read.Sequence, finalPositionMap, refSequence);
                if (editDistance == null)
                {
                    // This shouldn't happen at this point - we already have a successful result
                    throw new InvalidDataException("Edit distance is null for :" + read.Name + " with position map " +
                                                   string.Join(",", finalPositionMap) + " and CIGAR " + softclipAdjustedCigar);
                }

                var readHasPosition = finalPositionMap.Any(p => p >= 0);
                if (!readHasPosition)
                {
                    throw new InvalidDataException(string.Format("Read does not have any alignable bases. ({2} --> {0} --> {3}, {1})", freshCigarWithoutTerminalNs, string.Join(",", finalPositionMap), read.CigarData, softclipAdjustedCigar));
                }

                result.Position      = finalPositionMap.First(p => p >= 0);
                result.Cigar         = softclipAdjustedCigar;
                result.NumMismatches = editDistance.Value;


                var newSummary = Extensions.GetAlignmentSummary(result.Position - 1, result.Cigar, refSequence,
                                                                read.Sequence);

                result.NumNonNMismatches            = newSummary.NumNonNMismatches;
                result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip;
                result.NumNonNSoftclips             = newSummary.NumNonNSoftclips;
                result.NumSoftclips              = newSummary.NumSoftclips;
                result.NumIndelBases             = newSummary.NumIndelBases;
                result.MismatchesIncludeSoftclip = newSummary.MismatchesIncludeSoftclip;
                result.HasHighFrequencyIndel     = indels.Any(t => t.Frequency > HighFrequencyIndelCutoff);
                result.NumIndelBases             = UpdatedSummary.NumIndelBases;
            }

            return(result);
        }
Пример #23
0
        private RealignmentResult RealignForAnchor(HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts,
                                                   Read read, bool anchorOnLeft, ReadToRealignDetails details, bool pairSpecific, int[] indexes)
        {
            try
            {
                var freshCigarWithoutTerminalNs = new CigarAlignment(details.FreshCigarWithoutTerminalNs);
                var freshPositionMap            = new PositionMap(details.PositionMapLength);

                for (int i = 0; i < details.PositionMapLength; i++)
                {
                    freshPositionMap.UpdatePositionAtIndex(i,
                                                           details.PositionMapWithoutTerminalNs.GetPositionAtIndex(i));
                }

                var result = new RealignmentResult();

                // layer on indels one by one, indels already sorted by ascending position

                if (LayerOnIndels(indels, indelContexts, anchorOnLeft, details.SequenceWithoutTerminalNs,
                                  freshPositionMap, ref result, pairSpecific))
                {
                    return(null);
                }

                var context = indelContexts[indels[0]];

                // Softclip partial insertions at read ends
                if (_maskPartialInsertion || _minimumUnanchoredInsertionLength > 0)
                {
                    MaskPartialInsertion(indels, read, context.Sequence, result, context.StartPosition);
                }

                _softclipReapplier.ReapplySoftclips(read, details.NPrefixLength, details.NSuffixLength, freshPositionMap, result, context,
                                                    details.PrefixSoftclip, details.SuffixSoftclip, freshCigarWithoutTerminalNs);

                result.AcceptedIndels         = new List <int>();
                result.AcceptedHashableIndels = new List <HashableIndel>();
                for (int i = 0; i < result.AcceptedIndelsInSubList.Count; i++)
                {
                    // TODO do we need to be more nuanced about this and only do it in duplication areas?
                    var currentSubIndex = result.AcceptedIndelsInSubList[i];
                    result.AcceptedIndels.Add(indexes[currentSubIndex]);
                    var currentIndel = indels[currentSubIndex];
                    result.AcceptedHashableIndels.Add(currentIndel);
                    if (currentIndel.Type == AlleleCategory.Deletion)
                    {
                        var addedAt             = result.IndelsAddedAt[i];
                        var anchorStart         = addedAt + 1;
                        var lastOp              = result.Cigar[result.Cigar.Count - 1];
                        var rightSoftclipLength = lastOp.Type == 'S' ? (int)lastOp.Length : 0;
                        var rightAnchorLength   = read.Sequence.Length - anchorStart - rightSoftclipLength;
                        if (rightAnchorLength < currentIndel.Length && anchorStart < read.Sequence.Length)
                        {
                            if (read.Sequence.Substring(anchorStart, rightAnchorLength) ==
                                currentIndel.ReferenceAllele.Substring(1, rightAnchorLength))
                            {
                                return(null);
                            }
                        }
                    }
                }

                if (result.SumOfMismatchingQualities == null)
                {
                    result.SumOfMismatchingQualities = Helper.GetSumOfMismatchQualities(read.Qualities, read.Sequence,
                                                                                        freshPositionMap, context.Sequence,
                                                                                        context.StartPosition);
                }


                result.Indels = string.Join("|", indels.Select(x => StringifyIndel(x)));

                return(result);
            }
            catch (Exception e)
            {
                if (_debug)
                {
                    Logger.WriteExceptionToLog(new Exception($"Realign for anchor failed: read '{read.Name}' with indels {(string.Join("|", indels.Select(x => StringifyIndel(x))))}, anchoring on {(anchorOnLeft ? "left" : "right")}.", e));
                }
                return(null);
            }
        }
Пример #24
0
 private bool NeedBetter(RealignmentResult bestResultSoFar)
 {
     return(bestResultSoFar == null || bestResultSoFar.NumMismatches > 0);
 }
Пример #25
0
        private bool LayerOnIndels(HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts, bool anchorOnLeft,
                                   string sequenceWithoutTerminalNs, PositionMap positionMapWithoutTerminalNs, ref RealignmentResult result, bool pairSpecific)
        {
            var resultIndels        = "";
            var resultIndelIndexes  = new List <int>();
            var resultIndelsAddedAt = new List <int>();
            var resultNifiedAt      = new List <int>();

            if (anchorOnLeft)
            {
                for (var i = 0; i < indels.Length; i++)
                {
                    var snippet = GetContext(indels[i], indelContexts);

                    result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i],
                                                  snippet.Sequence, true, positionMapWithoutTerminalNs,
                                                  snippet.StartPosition, pairSpecific);

                    if (result == null)
                    {
                        return(true);
                    }
                    resultIndels += result.Indels + "|";
                    resultIndelIndexes.Add(i);
                    resultIndelsAddedAt.AddRange(result.IndelsAddedAt);
                    resultNifiedAt.AddRange(result.NifiedAt);
                }
            }
            else
            {
                for (var i = indels.Length - 1; i >= 0; i--)
                {
                    var snippet = GetContext(indels[i], indelContexts);
                    result = AddIndelAndGetResult(sequenceWithoutTerminalNs, indels[i],
                                                  snippet.Sequence, false, positionMapWithoutTerminalNs,
                                                  snippet.StartPosition, pairSpecific);

                    if (result == null)
                    {
                        return(true);
                    }
                    resultIndels += result.Indels + "|";
                    resultIndelIndexes.Add(i);
                    resultIndelsAddedAt.AddRange(result.IndelsAddedAt);
                    resultNifiedAt.AddRange(result.NifiedAt);
                }
            }

            result.Indels = resultIndels; // TODO can we remove this? Think it gets overwritten later...
            result.AcceptedIndelsInSubList = resultIndelIndexes;
            result.NifiedAt      = resultNifiedAt;
            result.IndelsAddedAt = resultIndelsAddedAt;
            return(false);
        }