Esempio n. 1
0
        public BamRewriter(IBamReader bamReader, IBamWriter bamWriter, IAlignmentPairFilter filter,
                           IReadPairHandler pairHandler, long?bufferSize = 100000, bool getUnpaired = false, string chrFilter = null)
        {
            _bamReader   = bamReader;
            _bamWriter   = bamWriter;
            _filter      = filter;
            _pairHandler = pairHandler;
            _bufferSize  = bufferSize;
            _getUnpaired = getUnpaired;
            _chrFilter   = chrFilter;

            _alignmentBuffer = new List <BamAlignment>();

            OnLog = message => Console.WriteLine(message);
        }
        private IEnumerable <BamAlignment> TryStitch(ReadPair readPair, IReadPairHandler pairHandler, out PairClassification classification)
        {
            // TODO if we end up allowing NM calculation in here, this will become true.
            const bool allowStitchingOnImperfectReads   = false;
            IEnumerable <BamAlignment> bamAlignmentList = pairHandler.ExtractReads(readPair);
            var bamAlignmentList2 = bamAlignmentList.ToList();

            if (bamAlignmentList2.Count == 1)
            {
                readPair.Stitched = true;
                classification    = PairClassification.PerfectStitched;

                if (allowStitchingOnImperfectReads)
                {
                    var stitchedResult = bamAlignmentList2[0];
                    int?nm             = 0;
                    //TODO handle this if it is a hit on performance. Making it simple for now because the previous logic where we were lazy evaluating was a bit skewed
                    var containsImperfections = ReadContainsImperfections(stitchedResult, _trustSoftclips);
                    //nm = stitchedResult.GetIntTag("NM"); // TODO reinstate this if stitched read has proper NM

                    var numMismatchesInR1 = readPair.Read1.GetIntTag("NM");
                    var numMismatchesInR2 = readPair.Read2.GetIntTag("NM");
                    if (containsImperfections ||
                        (nm > 0 || numMismatchesInR1 > 0 || numMismatchesInR2 > 0))
                    {
                        classification = PairClassification.ImperfectStitched;

                        if (numMismatchesInR1 <= NumMismatchesToBeConsideredLikelySnvInStitched &&
                            numMismatchesInR2 <= NumMismatchesToBeConsideredLikelySnvInStitched &&
                            !containsImperfections)
                        {
                            classification = PairClassification.SingleMismatchStitched;
                        }
                        else if (nm >= _numMismatchesToBeConsideredMessy ||
                                 numMismatchesInR1 >= _numMismatchesToBeConsideredMessy ||
                                 numMismatchesInR2 >= _numMismatchesToBeConsideredMessy)
                        {
                            classification = PairClassification.MessyStitched;
                        }
                    }
                }

                foreach (var alignment in bamAlignmentList)
                {
                    foreach (var tag in _tagsToKeepFromR1)
                    {
                        var r1Tag = readPair.Read1.GetStringTag(tag);
                        if (r1Tag != null)
                        {
                            alignment.ReplaceOrAddStringTag(tag, r1Tag);
                        }
                    }
                }
            }
            else
            {
                classification = PairClassification.FailStitch;
            }

            return(bamAlignmentList);
        }
        public PairResult GetBamAlignmentsAndClassification(ReadPair readPair, IReadPairHandler pairHandler)
        {
            if (readPair.PairStatus == PairStatus.Duplicate)
            {
                // TODO hasIndels and numMismatches and split don't have meaning in an unusable dup, but it's a bit misleading to set them..
                // Also, it's kind of silly to extract those alignments if we're going to set it to unusable anyway.
                var alignments = readPair.GetAlignments().ToList();

                return(new PairResult(alignments: alignments, readPair: readPair, classification: PairClassification.Duplicate, hasIndels: false,
                                      isSplit: false, numMismatchesInSingleton: 0, softclipLengthForIndelRead: 0)
                {
                    IsReputableIndelContaining = false
                });
            }


            var classification = PairClassification.Unknown;
            IEnumerable <BamAlignment> bamAlignmentList = null;

            int?numMismatchesInR1 = null;
            int?numMismatchesInR2 = null;

            var r1HasIndels = OverlappingIndelHelpers.ReadContainsIndels(readPair.Read1);
            var r2HasIndels = OverlappingIndelHelpers.ReadContainsIndels(readPair.Read2);
            var hasIndels   = r1HasIndels || r2HasIndels;


            if (IsCompletedPairedPair(readPair))
            {
                if (BothReadsHighQuality(readPair))
                {
                    numMismatchesInR1 = readPair.Read1.GetIntTag("NM");
                    numMismatchesInR2 = readPair.Read2.GetIntTag("NM");

                    var tryStitch = true;

                    if (hasIndels)
                    {
                        if (numMismatchesInR1 == null && numMismatchesInR2 == null)
                        {
                            Logger.WriteWarningToLog(
                                $"Found indel-containing read without NM: '{readPair.Name}', likely indicating that NM is not set on any read. Consider preprocessing the BAM to calculate NM tags for best results.");
                        }

                        return(HandleIndelPairIfStitchUnallowed(readPair, numMismatchesInR1 ?? 0,
                                                                numMismatchesInR2 ?? 0, r1HasIndels, r2HasIndels));
                    }
                    else
                    {
                        // TODO if not realigning anything (or not realigning imperfects), go ahead and stitch immediately
                        // TODO why are we using this bool multiple times
                        // ^ Because there's no point checking for imperfectinos if we don't care about softclips -- we already know there are no indels because this is in the else of if(hasIndels)

                        if (!_trustSoftclips && (ReadContainsImperfections(readPair.Read1, _trustSoftclips) ||
                                                 ReadContainsImperfections(readPair.Read2, _trustSoftclips)))
                        {
                            tryStitch      = false;
                            classification =
                                ClassifySoftclipContainingPairGivenSoftclipDistrust(readPair, numMismatchesInR1,
                                                                                    numMismatchesInR2);
                            bamAlignmentList = readPair.GetAlignments();
                        }
                        else
                        {
                            if (numMismatchesInR1 == null)
                            {
                                numMismatchesInR1 = readPair.Read1.GetIntTag("NM");
                            }

                            if (numMismatchesInR2 == null)
                            {
                                numMismatchesInR2 = readPair.Read2.GetIntTag("NM");
                            }

                            if (numMismatchesInR1 >= _numMismatchesToBeConsideredMessy ||
                                numMismatchesInR2 >= _numMismatchesToBeConsideredMessy)
                            {
                                classification = PairClassification.UnstitchMessy;
                                tryStitch      = false;

                                if (numMismatchesInR1 <= 1 || numMismatchesInR2 <= 1)
                                {
                                    // One of the reads is clean

                                    tryStitch = false;

                                    if (numMismatchesInR1 <= 1)
                                    {
                                        // R1 is the clean one.
                                        if (readPair.Read2.IsReverseStrand())
                                        {
                                            classification = PairClassification.UnstitchReverseMessy;
                                        }
                                        else
                                        {
                                            classification = PairClassification.UnstitchForwardMessy;
                                        }
                                    }
                                    else
                                    {
                                        if (readPair.Read1.IsReverseStrand())
                                        {
                                            classification = PairClassification.UnstitchReverseMessy;
                                        }
                                        else
                                        {
                                            classification = PairClassification.UnstitchForwardMessy;
                                        }
                                    }
                                }

                                bamAlignmentList = readPair.GetAlignments();
                            }
                            else if (numMismatchesInR1 + numMismatchesInR2 == 0)
                            {
                                classification   = PairClassification.UnstitchPerfect;
                                bamAlignmentList = readPair.GetAlignments();
                            }
                            else if (numMismatchesInR1 <= 1 && numMismatchesInR2 <= 1)
                            {
                                classification   = PairClassification.UnstitchSingleMismatch;
                                bamAlignmentList = readPair.GetAlignments();
                            }
                            else
                            {
                                classification   = PairClassification.UnstitchImperfect;
                                bamAlignmentList = readPair.GetAlignments();
                            }
                        }

                        classification = AdjustClassificationForMultimapper(readPair, classification);
                    }

                    if (classification == PairClassification.UnstitchMessySuspiciousRead)
                    {
                        tryStitch = false;
                    }

                    if (_skipStitch)
                    {
                        tryStitch = false;
                    }

                    if (classification != PairClassification.UnstitchPerfect)
                    {
                        //For now we can't stitch anything else because we can't properly calculate NM!!
                        tryStitch = false;
                    }

                    if (!tryStitch)
                    {
                        if (bamAlignmentList == null)
                        {
                            bamAlignmentList = readPair.GetAlignments().ToList();
                            classification   = PairClassification.Unstitchable;
                        }
                    }
                    else
                    {
                        bamAlignmentList = TryStitch(readPair, pairHandler, out classification);
                    }
                }
                else if (OneReadIsHighQuality(readPair))
                {
                    classification = PairClassification.Split;
                    if (hasIndels)
                    {
                        numMismatchesInR1 = numMismatchesInR1 ?? readPair.Read1?.GetIntTag("NM") ?? 0;
                        numMismatchesInR2 = numMismatchesInR2 ?? readPair.Read2?.GetIntTag("NM") ?? 0;

                        return(HandlePairContainingIndels(readPair, r1HasIndels, r2HasIndels, numMismatchesInR1.Value,
                                                          numMismatchesInR2.Value, true, PairClassification.Split, true));
                    }
                }
                else
                {
                    classification   = PairClassification.Unusable;
                    bamAlignmentList = readPair.GetAlignments().ToList();
                }
            }
            else
            {
                numMismatchesInR1 = numMismatchesInR1 ?? readPair.Read1?.GetIntTag("NM") ?? 0;
                numMismatchesInR2 = numMismatchesInR2 ?? readPair.Read2?.GetIntTag("NM") ?? 0;
                return(ClassifyIncompletePair(readPair, r1HasIndels, r2HasIndels, numMismatchesInR1.Value, numMismatchesInR2.Value));
            }

            // TODO - not sure why I originally had this double-check on whether pairs were split? shouldn't this already be evident from the pair status?
            //var isSplit = bamAlignmentList?.Count() > 0 && bamAlignmentList?.Select(x => x.RefID).Distinct().Count() > 1;
            var isSplit = false;

            if (isSplit || classification == PairClassification.Split || readPair.PairStatus == PairStatus.SplitChromosomes ||
                readPair.PairStatus == PairStatus.MateNotFound || readPair.PairStatus == PairStatus.MateUnmapped)
            {
                return(HandleSplitNonIndelPair(readPair, bamAlignmentList, hasIndels, isSplit));
            }

            var pr = new PairResult(bamAlignmentList.ToList(), readPair, classification, hasIndels, isSplit);

            if (classification == PairClassification.UnstitchMessy || classification == PairClassification.UnstitchMessySuspiciousRead)
            {
                if (_checkMd && HasSuspiciousMd(readPair, numMismatchesInR1, numMismatchesInR2, pr))
                {
                    classification    = PairClassification.UnstitchMessySuspiciousMd;
                    pr.Classification = classification;
                }
            }

            return(pr);
        }
Esempio n. 4
0
 public PostRealignmentStitcher(IReadPairHandler stitchedPairHandler, IStatusHandler statusHandler, List <string> tagsToKeepFromR1 = null)
 {
     _stitchedPairHandler = stitchedPairHandler;
     _statusHandler       = statusHandler;
     _tagsToKeepFromR1    = tagsToKeepFromR1 ?? new List <string>();
 }