Ejemplo n.º 1
0
        private RealignmentResult RealignForAnchor(HashableIndel[] indels, Dictionary <HashableIndel, GenomeSnippet> indelContexts,
                                                   Read read, bool anchorOnLeft, ReadToRealignDetails details, bool pairSpecific, int[] indexes)
        {
            try
            {
                var freshCigarWithoutTerminalNs = new CigarAlignment(details.FreshCigarWithoutTerminalNs);
                var freshPositionMap            = new PositionMap(details.PositionMapLength);

                for (int i = 0; i < details.PositionMapLength; i++)
                {
                    freshPositionMap.UpdatePositionAtIndex(i,
                                                           details.PositionMapWithoutTerminalNs.GetPositionAtIndex(i));
                }

                var result = new RealignmentResult();

                // layer on indels one by one, indels already sorted by ascending position

                if (LayerOnIndels(indels, indelContexts, anchorOnLeft, details.SequenceWithoutTerminalNs,
                                  freshPositionMap, ref result, pairSpecific))
                {
                    return(null);
                }

                var context = indelContexts[indels[0]];

                // Softclip partial insertions at read ends
                if (_maskPartialInsertion || _minimumUnanchoredInsertionLength > 0)
                {
                    MaskPartialInsertion(indels, read, context.Sequence, result, context.StartPosition);
                }

                _softclipReapplier.ReapplySoftclips(read, details.NPrefixLength, details.NSuffixLength, freshPositionMap, result, context,
                                                    details.PrefixSoftclip, details.SuffixSoftclip, freshCigarWithoutTerminalNs);

                result.AcceptedIndels         = new List <int>();
                result.AcceptedHashableIndels = new List <HashableIndel>();
                for (int i = 0; i < result.AcceptedIndelsInSubList.Count; i++)
                {
                    // TODO do we need to be more nuanced about this and only do it in duplication areas?
                    var currentSubIndex = result.AcceptedIndelsInSubList[i];
                    result.AcceptedIndels.Add(indexes[currentSubIndex]);
                    var currentIndel = indels[currentSubIndex];
                    result.AcceptedHashableIndels.Add(currentIndel);
                    if (currentIndel.Type == AlleleCategory.Deletion)
                    {
                        var addedAt             = result.IndelsAddedAt[i];
                        var anchorStart         = addedAt + 1;
                        var lastOp              = result.Cigar[result.Cigar.Count - 1];
                        var rightSoftclipLength = lastOp.Type == 'S' ? (int)lastOp.Length : 0;
                        var rightAnchorLength   = read.Sequence.Length - anchorStart - rightSoftclipLength;
                        if (rightAnchorLength < currentIndel.Length && anchorStart < read.Sequence.Length)
                        {
                            if (read.Sequence.Substring(anchorStart, rightAnchorLength) ==
                                currentIndel.ReferenceAllele.Substring(1, rightAnchorLength))
                            {
                                return(null);
                            }
                        }
                    }
                }

                if (result.SumOfMismatchingQualities == null)
                {
                    result.SumOfMismatchingQualities = Helper.GetSumOfMismatchQualities(read.Qualities, read.Sequence,
                                                                                        freshPositionMap, context.Sequence,
                                                                                        context.StartPosition);
                }


                result.Indels = string.Join("|", indels.Select(x => StringifyIndel(x)));

                return(result);
            }
            catch (Exception e)
            {
                if (_debug)
                {
                    Logger.WriteExceptionToLog(new Exception($"Realign for anchor failed: read '{read.Name}' with indels {(string.Join("|", indels.Select(x => StringifyIndel(x))))}, anchoring on {(anchorOnLeft ? "left" : "right")}.", e));
                }
                return(null);
            }
        }
Ejemplo n.º 2
0
        public RealignmentResult GetBestAlignment(List <HashableIndel> rankedIndels,
                                                  Dictionary <HashableIndel, GenomeSnippet> indelContexts, Read read, out int attemptedTargetSides, bool fromPairSpecificIndels)
        {
            bool realign2 = true;
            RealignmentResult bestResultSoFar = null;

            attemptedTargetSides = 0;

            // Note this used to be in the loop... hopefully I'm not killing anything here...
            var nPrefixLength = read.GetNPrefix();

            if (_keepProbeSoftclips)
            {
                if ((_keepBothSideSoftclips || !read.BamAlignment.IsReverseStrand() || !read.BamAlignment.IsPaired()) && nPrefixLength == 0)
                {
                    nPrefixLength = (int)read.CigarData.GetPrefixClip();
                }
            }

            var details = new ReadToRealignDetails(read, read.GetAdjustedPosition(true, probePrefix: _keepProbeSoftclips ?
                                                                                  nPrefixLength : 0), _keepProbeSoftclips, _keepBothSideSoftclips);

            var positionFromRight =
                read.GetAdjustedPosition(false, probePrefix: _keepProbeSoftclips ? nPrefixLength : 0);
            ReadToRealignDetails rightAnchoredDetails = null;

            if (positionFromRight >= 0)
            {
                rightAnchoredDetails = new ReadToRealignDetails(read, positionFromRight, _keepProbeSoftclips, _keepBothSideSoftclips);
            }

            // align to all permutations of one indel, two indels, and three indels
            // try to skip alignment if we know it will fail
            for (var i = 0; i < rankedIndels.Count; i++)
            {
                var indel1  = rankedIndels[i];
                var indexes = new int[] { i };

                // try aligning to one indel
                _oneIndelSimpleTargets[0] = indel1;
                var indel1Result = RealignToTargets(read, _oneIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes);
                attemptedTargetSides += 2;

                // update best result so far for one indel
                bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel1Result);
                if (IsUnbeatable(bestResultSoFar))
                {
                    return(bestResultSoFar);
                }
                //if (bestResultSoFar != null && bestResultSoFar.NumIndels == 1 && bestResultSoFar.NumMismatches == 0)
                //{
                //    return bestResultSoFar; // can't beat this
                //}

                if (realign2)
                {
                    var indexes2 = new int[2];
                    for (var j = i + 1; j < rankedIndels.Count; j++)
                    {
                        var indel2 = rankedIndels[j];
                        if (!CanCoexist(indel1, indel2, fromPairSpecificIndels))
                        {
                            continue;
                        }

                        _twoIndelSimpleTargets[0] = indel1;
                        _twoIndelSimpleTargets[1] = indel2;

                        indexes2[0] = i;
                        indexes2[1] = j;

                        Array.Sort(_twoIndelSimpleTargets, CompareSimple); // need to sort by position

                        // for optimization, don't try to align from a given side if we already failed aligning the indel on that side
                        var alreadyFailedFromLeft  = indel1Result == null && _twoIndelSimpleTargets[0].Equals(indel1);
                        var alreadyFailedFromRight = indel1Result == null && _twoIndelSimpleTargets[1].Equals(indel1);
                        if (!alreadyFailedFromLeft)
                        {
                            attemptedTargetSides++;
                        }
                        if (!alreadyFailedFromRight)
                        {
                            attemptedTargetSides++;
                        }

                        var indel2Result = RealignToTargets(read, _twoIndelSimpleTargets, indelContexts, details, rightAnchoredDetails, pairSpecific: fromPairSpecificIndels, indexes: indexes2,
                                                            skipLeftAnchored: alreadyFailedFromLeft, skipRightAnchored: alreadyFailedFromRight);
                        bestResultSoFar = _comparer.GetBetterResult(bestResultSoFar, indel2Result);
                    }
                }
            }

            return(bestResultSoFar);
        }
Ejemplo n.º 3
0
        public RealignmentResult RealignToTargets(Read read, HashableIndel[] indels,
                                                  Dictionary <HashableIndel, GenomeSnippet> indelContexts, ReadToRealignDetails leftAnchoredDetails, ReadToRealignDetails rightAnchoredDetails, bool pairSpecific, int[] indexes,
                                                  bool skipLeftAnchored = false, bool skipRightAnchored = false)
        {
            if (rightAnchoredDetails == null)
            {
                skipRightAnchored = true;
            }

            // when aligning with left anchor, if there's an insertion and a deletion at the same position
            // we need to process the insertion first.  this is an artifact of how we adjust positions after an insertion
            // luckily this is already how they are sorted in the default sort function
            var resultLeftAnchored = skipLeftAnchored ? null : RealignForAnchor(indels, indelContexts, read, true, leftAnchoredDetails, pairSpecific, indexes);

            if (IsUnbeatable(resultLeftAnchored))
            {
                return(resultLeftAnchored);
            }

            // when aligning with right anchor, if there's an insertion and a deletion at the same position
            // we need to process the deletion first.
            // this is because the position of indels are reported on the left side of the indel, and the deletion
            // could have adjusted the other side positions such that an insertion comes into view (which otherwise might not)
            var resultRightAnchored = skipRightAnchored ? null : RealignForAnchor(indels, indelContexts, read, false, rightAnchoredDetails, pairSpecific, indexes);

            var betterResult = _comparer.GetBetterResult(resultLeftAnchored, resultRightAnchored);

            if (betterResult != null)
            {
                betterResult.FailedForLeftAnchor  = resultLeftAnchored == null;
                betterResult.FailedForRightAnchor = resultRightAnchored == null;
            }

            return(betterResult);
        }