private void VerifyClassificationAndExtraction(ReadPair readpair, PairClassification expectedClassification,
                                                       int expectedNumReads, bool trustSoftclips, bool deferStitchIndelReads = true, bool shouldTryStitch = true, bool stageStitchSucceed = true, bool treatAbnormalOrientationAsImproper = false,
                                                       int messyMapq = 30, bool checkMd = false)
        {
            var pairHandler = new Mock <IReadPairHandler>();

            pairHandler.Setup(x => x.ExtractReads(It.IsAny <ReadPair>())).Returns(stageStitchSucceed
                ? new List <BamAlignment>()
            {
                readpair.Read1
            }
                : new List <BamAlignment>()
            {
                readpair.Read1, readpair.Read2
            });

            var extractor = new ReadPairClassifierAndExtractor(trustSoftclips,
                                                               messyMapq: messyMapq, treatAbnormalOrientationAsImproper: treatAbnormalOrientationAsImproper, checkMd: checkMd);

            var result = extractor.GetBamAlignmentsAndClassification(readpair, pairHandler.Object);

            var alignments = result.Alignments;

            Assert.Equal(expectedClassification, result.Classification);
            pairHandler.Verify(x => x.ExtractReads(It.IsAny <ReadPair>()), Times.Exactly(shouldTryStitch ? 1 : 0));

            Assert.Equal(expectedNumReads, alignments.ToList().Count);
        }
コード例 #2
0
        public ActionBlock <PairResult[]> GetActionablePairsBlock(PairClassification classification, ConcurrentDictionary <PairClassification, List <PairResult> > pairResultLookup)
        {
            var actBlock = new ActionBlock <PairResult[]>((p) =>
            {
                if (_lightDebug)
                {
                    Logger.WriteToLog(
                        $"Started handling {classification} block for region {_chrom}:{_startPosition}-{_endPosition}.");
                }

                pairResultLookup.AddOrUpdate(classification, p.ToList(), (c, n) => { return(n.Concat(p).ToList()); });

                _categoryLookup.AddOrUpdate(classification, p.Sum(x => x.ReadPair.NumPrimaryReads),
                                            (c, n) => { return(n + p.Sum(x => x.ReadPair.NumPrimaryReads)); });

                var toRemove = _allToWaitFor.Keys.Where(x => x.IsCompleted);
                foreach (var task in toRemove)
                {
                    _allToWaitFor.TryRemove(task, out _);
                }

                if (_lightDebug)
                {
                    Logger.WriteToLog(
                        $"Done handling {classification} block for region {_chrom}:{_startPosition}-{_endPosition}.");
                }
            }, new ExecutionDataflowBlockOptions()
            {
                EnsureOrdered = false
            });

            return(actBlock);
        }
コード例 #3
0
 private static bool ClassificationContainsQualityIndels(PairClassification classification)
 {
     // TODO should this have the other indel read types?
     return(classification == PairClassification.Disagree ||
            classification == PairClassification.IndelSingleton ||
            classification == PairClassification.IndelUnstitchable ||
            classification == PairClassification.UnstitchIndel);
 }
コード例 #4
0
 public void DoneWithWriter(string chrom, PairClassification classification, int idNum, int numWritten = 0, IBamWriterHandle handle = null)
 {
     if (handle != null)
     {
         var name = _bamFileHandleNames[handle];
         _bamFileAlignmentsWritten.AddOrUpdate(name, numWritten, (n, i) => { return(i + numWritten); });
         _bamFileTimesUsed.AddOrUpdate(name, 1, (n, i) => { return(i + 1); });
         _handles.Enqueue(handle);
     }
 }
コード例 #5
0
 public PairResult(IEnumerable <BamAlignment> alignments, ReadPair readPair,
                   PairClassification classification = PairClassification.Unknown, bool hasIndels = false, bool isSplit = false, int numMismatchesInSingleton = 0, int softclipLengthForIndelRead = 0)
 {
     NumMismatchesInSingleton = numMismatchesInSingleton;
     IsSplit                    = isSplit;
     HasIndels                  = hasIndels;
     Classification             = classification;
     Alignments                 = alignments;
     ReadPair                   = readPair;
     SoftclipLengthForIndelRead = softclipLengthForIndelRead;
 }
コード例 #6
0
        public ActionBlock <List <BamAlignment> > GetWriterBlock(PairClassification classification = PairClassification.Unknown)
        {
            var writerTask = new ActionBlock <List <BamAlignment> >((alignments) =>
            {
                WriteAlignments(_chrom, _writerSource, classification, alignments);
            }, new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = 1, EnsureOrdered = false
            });

            return(writerTask);
        }
コード例 #7
0
        public IBamWriterHandle BamWriterHandle(string chrom,
                                                PairClassification classification,
                                                int idNum)
        {
            IBamWriterHandle writerHandle;

            if (_handles.Count == 0)
            {
                var outStub = GetPathStub(_outputBam, chrom, classification);
                var path    =
                    $"{GetPathStub(outStub, chrom, classification)}_{idNum}_{Thread.CurrentThread.ManagedThreadId}_{_allHandles.Count}_{Guid.NewGuid()}";

                writerHandle = _writerFactory.CreateSingleBamWriter(path);
                _allHandles.Add(writerHandle);
                _bamFileHandleNames.AddOrUpdate(writerHandle, path, (h, p) =>
                {
                    Console.WriteLine($"Path already existed for handle: {p} vs {path}.");
                    return(p);
                });

                _bamFileAlignmentsWritten.AddOrUpdate(path, 0, (n, i) => i + 0);
                _bamFileTimesUsed.AddOrUpdate(path, 1, (n, i) => i + 1);

                _bamFiles.Add(path);
            }
            else
            {
                _handles.TryDequeue(out writerHandle);
                if (writerHandle == null)
                {
                    var outStub = GetPathStub(_outputBam, chrom, classification);
                    var path    =
                        $"{GetPathStub(outStub, chrom, classification)}_{idNum}_{Thread.CurrentThread.ManagedThreadId}_{_allHandles.Count}_{Guid.NewGuid()}";

                    writerHandle = _writerFactory.CreateSingleBamWriter(path);
                    _allHandles.Add(writerHandle);
                    _bamFileHandleNames.AddOrUpdate(writerHandle, path, (h, p) =>
                    {
                        Console.WriteLine($"Path already existed for handle: {p} vs {path}.");
                        return(p);
                    });

                    _bamFileAlignmentsWritten.AddOrUpdate(path, 0, (n, i) => i + 0);
                    _bamFileTimesUsed.AddOrUpdate(path, 1, (n, i) => i + 1);

                    _bamFiles.Add(path);
                }
            }


            return(writerHandle);
        }
コード例 #8
0
 private static bool ClassificationHasIndels(PairClassification classification)
 {
     // TODO any others? What if we did already try to stitch?
     return(classification == PairClassification.Disagree ||
            classification == PairClassification.UnstitchIndel ||
            classification == PairClassification.IndelImproper ||
            classification == PairClassification.IndelSingleton ||
            classification == PairClassification.IndelUnstitchable ||
            classification == PairClassification.UnstitchForwardMessyIndel ||
            classification == PairClassification.UnstitchReverseMessyIndel ||
            classification == PairClassification.UnstitchMessyIndelSuspiciousRead ||
            classification == PairClassification.UnstitchMessyIndel);
 }
コード例 #9
0
        public static PairResult GetPairResult(int position, int offset          = 0,
                                               string r1Cigar                    = "5M1I5M", string r2Cigar = "5M1I5M",
                                               PairClassification classification = PairClassification.Unknown,
                                               int numMismatchesInSingleton      = 0, int softclipLength = 0, bool hasIndels = false, bool isReputableIndelContaining = false)
        {
            //var read1 = TestHelpers.CreateBamAlignment("ATCGATCG", 125005, 126005, 30, true);
            //var read2 = TestHelpers.CreateBamAlignment("ATCGATCG", 126005, 125005, 30, true);

            //var pair2 = new ReadPair(read1);
            //pair2.AddAlignment(read2);

            var readPair1   = TestHelpers.GetPair(r1Cigar, r2Cigar, read2Offset: offset, read1Position: position);
            var pairResult2 = new PairResult(readPair1.GetAlignments(), readPair1, classification: classification, numMismatchesInSingleton: numMismatchesInSingleton, softclipLengthForIndelRead: softclipLength, hasIndels: hasIndels);

            pairResult2.IsReputableIndelContaining = isReputableIndelContaining;
            return(pairResult2);
        }
コード例 #10
0
 public static bool ClassificationIsStitchable(PairClassification classification)
 {
     return(classification == PairClassification.Disagree ||
            classification == PairClassification.FailStitch ||
            classification == PairClassification.UnstitchIndel ||
            classification == PairClassification.UnstitchImperfect ||
            classification == PairClassification.UnstitchPerfect ||
            classification == PairClassification.LongFragment ||
            classification == PairClassification.UnstitchMessy ||
            classification == PairClassification.UnstitchMessyIndel ||
            classification == PairClassification.UnstitchMessySuspiciousRead ||
            classification == PairClassification.UnstitchMessyIndelSuspiciousRead ||
            classification == PairClassification.Unstitchable ||
            classification == PairClassification.UnstitchSingleMismatch ||
            classification == PairClassification.UnstitchReverseMessy ||
            classification == PairClassification.UnstitchForwardMessy ||
            classification == PairClassification.UnstitchForwardMessyIndel ||
            classification == PairClassification.UnstitchReverseMessyIndel);
 }
コード例 #11
0
        private PairClassification AdjustClassificationForMultimapper(ReadPair readPair,
                                                                      PairClassification classification)
        {
            if (classification == PairClassification.UnstitchMessy || classification == PairClassification.UnstitchMessyIndel)
            {
                var hasIndels = classification == PairClassification.UnstitchMessyIndel;
                if (readPair.Read1 == null)
                {
                    throw new Exception("Null read 1");
                }
                if (readPair.Read2 == null)
                {
                    throw new Exception("Null read 1");
                }
                if (readPair.Read1.MapQuality < _messyMapq || readPair.Read2.MapQuality < _messyMapq)
                {
                    classification = hasIndels ? PairClassification.UnstitchMessyIndelSuspiciousRead : PairClassification.UnstitchMessySuspiciousRead;
                }
            }

            return(classification);
        }
コード例 #12
0
 private static bool SkipRestitchIfUnchanged(PairClassification classification)
 {
     return(classification == PairClassification.FailStitch ||
            classification == PairClassification.LongFragment ||
            classification == PairClassification.Unstitchable);
 }
コード例 #13
0
 private static bool ClassificationIsStitched(PairClassification classification)
 {
     return(classification == PairClassification.ImperfectStitched ||
            classification == PairClassification.PerfectStitched ||
            classification == PairClassification.MessyStitched || classification == PairClassification.SingleMismatchStitched);
 }
コード例 #14
0
        private PairResult HandlePairContainingIndels(ReadPair readPair, bool r1HasIndels, bool r2HasIndels, int r1Nm,
                                                      int r2Nm,
                                                      bool hasIndels, PairClassification classification, bool isSplit, IEnumerable <BamAlignment> bamAlignmentList = null)
        {
            var effectiveSc         = 0;
            var effectiveNm         = 0;
            var r1TotMismatchEvents = 0;
            var r2TotMismatchEvents = 0;

            if (r1HasIndels)
            {
                var r1Sc = _trustSoftclips ? 0 :
                           readPair.Read1.CigarData.HasSoftclips ? readPair.Read1.CountBasesWithOperationType('S') : 0;
                var r1NumIndels = (int)readPair.Read1.CountBasesWithOperationType('I') +
                                  (int)readPair.Read1.CountBasesWithOperationType('D');
                var r1NumIndelEvents =
                    readPair.Read1.NumOperationsOfType('D') + readPair.Read1.NumOperationsOfType('I');
                r1Nm = Math.Max(0, r1Nm - r1NumIndels);
                r1TotMismatchEvents = r1Nm + r1NumIndelEvents;

                if (r2HasIndels)
                {
                    var r2Sc = _trustSoftclips ? 0 :
                               readPair.Read2.CigarData.HasSoftclips ? readPair.Read2.CountBasesWithOperationType('S') : 0;

                    var r2NumIndels = (int)readPair.Read2.CountBasesWithOperationType('I') +
                                      (int)readPair.Read2.CountBasesWithOperationType('D');

                    var r2NumIndelEvents =
                        readPair.Read2.NumOperationsOfType('D') + readPair.Read2.NumOperationsOfType('I');

                    r2Nm = Math.Max(0, r2Nm - r2NumIndels);

                    r2TotMismatchEvents = r2Nm + r2NumIndelEvents;
                    effectiveNm         = Math.Min(r1Nm, r2Nm);
                    effectiveSc         = (int)Math.Min(r1Sc, r2Sc);
                }
                else
                {
                    r2TotMismatchEvents = r2Nm;
                    effectiveNm         = r1Nm;
                    effectiveSc         = (int)r1Sc;
                }
            }
            else if (r2HasIndels)
            {
                var r2Sc = _trustSoftclips ? 0 :
                           readPair.Read2.CigarData.HasSoftclips ? readPair.Read2.CountBasesWithOperationType('S') : 0;
                var r2NumIndelBases = (int)readPair.Read2.CountBasesWithOperationType('I') +
                                      (int)readPair.Read2.CountBasesWithOperationType('D');

                var r2NumIndelEvents =
                    readPair.Read2.NumOperationsOfType('D') + readPair.Read2.NumOperationsOfType('I');
                r2Nm                = Math.Max(0, r2Nm - r2NumIndelBases);
                effectiveNm         = r2Nm;
                effectiveSc         = (int)r2Sc;
                r2TotMismatchEvents = r2Nm + r2NumIndelEvents;


                r1TotMismatchEvents = r1Nm;
            }

            if ((Math.Max(r1TotMismatchEvents, r2TotMismatchEvents)) > _numMismatchesToBeConsideredMessy &&
                (classification == PairClassification.UnstitchIndel || classification == PairClassification.Disagree))
            {
                classification = PairClassification.UnstitchMessyIndel;
                {
                    if (r1TotMismatchEvents <= 2)
                    {
                        // Only r2 is messy
                        classification = readPair.Read2.IsReverseStrand()
                            ? PairClassification.UnstitchReverseMessyIndel
                            : PairClassification.UnstitchForwardMessyIndel;
                    }
                    else if (r2TotMismatchEvents <= 2)
                    {
                        // Only r1 is messy
                        classification = readPair.Read1.IsReverseStrand()
                            ? PairClassification.UnstitchReverseMessyIndel
                            : PairClassification.UnstitchForwardMessyIndel;
                    }
                }

                classification = AdjustClassificationForMultimapper(readPair, classification);
            }

            var isReputable = effectiveNm < 3 && (_trustSoftclips || effectiveSc < 10);

            return(new PairResult(alignments: bamAlignmentList ?? readPair.GetAlignments(), readPair: readPair,
                                  classification: classification, hasIndels: hasIndels,
                                  isSplit: isSplit, numMismatchesInSingleton: effectiveNm, softclipLengthForIndelRead: effectiveSc)
            {
                R1Nm = r1Nm,
                R2Nm = r2Nm,
                IsReputableIndelContaining = isReputable
            });
        }
コード例 #15
0
        private static void WriteAlignments(string chrom, IWriterSource writerSource, PairClassification classification,
                                            List <BamAlignment> alignments)
        {
            var idNum                = Thread.CurrentThread.ManagedThreadId;
            var writerHandle         = writerSource.BamWriterHandle(chrom, classification, idNum);
            var numAlignmentsWritten = 0;

            foreach (var alignment in alignments)
            {
                if (alignment == null)
                {
                    continue;
                }

                numAlignmentsWritten++;

                writerHandle.WriteAlignment(alignment);
            }

            alignments.Clear();
            writerSource.DoneWithWriter(chrom, classification, idNum, numAlignmentsWritten, writerHandle);
        }
コード例 #16
0
        private static string GetPathStub(string outStub, string chrom, PairClassification classification)
        {
            var outPath = Path.Combine(outStub + "_" + (int)classification + "_" + chrom + "_" + "All" + "_" + "All");

            return(outPath);
        }
コード例 #17
0
 private static bool IsForwardMessy(PairClassification category)
 {
     return(category == PairClassification.UnstitchForwardMessy ||
            category == PairClassification.UnstitchForwardMessyIndel);
 }
コード例 #18
0
 private static bool IsReverseMessy(PairClassification category)
 {
     return(category == PairClassification.UnstitchReverseMessy ||
            category == PairClassification.UnstitchReverseMessyIndel);
 }
コード例 #19
0
 private static bool IsSuspiciousMapping(PairClassification category)
 {
     return(category == PairClassification.UnstitchMessySuspiciousRead ||
            category == PairClassification.UnstitchMessyIndelSuspiciousRead);
 }
コード例 #20
0
        public ActionBlock <PairResult[]> GetEarlyFlushBlock(PairClassification classification, bool isSingleMismatch)
        {
            var actBlock = new ActionBlock <PairResult[]>((p) =>
            {
                if (_lightDebug)
                {
                    Logger.WriteToLog(
                        $"Started handling {classification} block for region {_chrom}:{_startPosition}-{_endPosition}.");
                }

                var toRemove2 = _allToWaitFor.Keys.Where(x => x.IsCompleted);
                foreach (var task in toRemove2)
                {
                    _allToWaitFor.TryRemove(task, out _);
                }

                if (_debug)
                {
                    Console.WriteLine($"{p.Length} pairs in category {classification} in {_startPosition}-{_endPosition}");
                }

                var idNum        = Thread.CurrentThread.ManagedThreadId;
                var writerHandle = _writerSource.BamWriterHandle(_chrom, classification, _startPosition);

                _categoryLookup.AddOrUpdate(classification, p.Sum(x => x.ReadPair.NumPrimaryReads),
                                            (c, n) => { return(n + p.Sum(x => x.ReadPair.NumPrimaryReads)); });

                if (_filterForProperPairs && _improperTypes.Contains(classification))
                {
                    _progressTracker.AddOrUpdate("Skipped", p.Sum(x => x.ReadPair.NumPrimaryReads),
                                                 (c, n) => { return(n + p.Sum(x => x.ReadPair.NumPrimaryReads)); });
                }
                else
                {
                    var numAlignmentsWritten = 0;

                    var classificationString = classification.ToString();
                    foreach (var pair in p)
                    {
                        if (classification != PairClassification.Duplicate)
                        {
                            // Don't add bin evidence for duplicates, may wash the signal out
                            BinEvidenceHelpers.AddEvidence(pair, _messySiteWidth, _adjustedStartPosition,
                                                           _totalBinCounts,
                                                           _singleMismatchBinCounts, isSingleMismatch, _numBins, _refId);
                        }

                        foreach (var alignment in pair.Alignments)
                        {
                            if (alignment == null)
                            {
                                continue;
                            }

                            alignment.ReplaceOrAddStringTag("XP", classificationString);
                            numAlignmentsWritten++;

                            if (writerHandle == null)
                            {
                                throw new Exception("This is odd, why is the handle null");
                            }

                            writerHandle.WriteAlignment(alignment);
                        }
                    }

                    _progressTracker.AddOrUpdate("Early Flushed", p.Sum(x => x.ReadPair.NumPrimaryReads),
                                                 (s, n) => { return(n + p.Sum(x => x.ReadPair.NumPrimaryReads)); });
                    _progressTracker.AddOrUpdate("Simple Alignments Written", numAlignmentsWritten,
                                                 (s, n) => { return(n + numAlignmentsWritten); });


                    Array.Clear(p, 0, p.Length);
                    _writerSource.DoneWithWriter(_chrom, classification, idNum, numAlignmentsWritten, writerHandle);
                    if (_lightDebug)
                    {
                        Logger.WriteToLog(
                            $"Done handling {classification} block for region {_chrom}:{_startPosition}-{_endPosition}.");
                    }
                }
            }, new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = _maxDegreeOfParallelism, EnsureOrdered = true
            });

            return(actBlock);
        }
コード例 #21
0
 private static bool ClassificationIsPairAwareRealignable(PairClassification classification)
 {
     return(classification == PairClassification.Disagree ||
            classification == PairClassification.FailStitch ||
            classification == PairClassification.UnstitchIndel);
 }
コード例 #22
0
        private int ReadsToSilence(PairClassification classification, BinConclusions binEvidence, PairResult pairResult)
        {
            if (_geminiOptions.SilenceSuspiciousMdReads && classification == PairClassification.UnstitchMessySuspiciousMd)
            {
                return(3);
            }

            var isForwardMessy      = IsForwardMessy(classification);
            var isReverseMessy      = IsReverseMessy(classification);
            var isSuspiciousMapping = IsSuspiciousMapping(classification);

            if (!isForwardMessy && !isReverseMessy && !isSuspiciousMapping)
            {
                return(0);
            }

            var silenced     = 0;
            var doSilenceFwd = false;
            var doSilenceRev = false;

            var r1IsReverse = pairResult.ReadPair.Read1.IsReverseStrand();

            // This assumes that there is exactly one forward and one reverse read.
            var fwdRead = r1IsReverse ? pairResult.ReadPair.Read2 : pairResult.ReadPair.Read1;
            var revRead = r1IsReverse ? pairResult.ReadPair.Read1 : pairResult.ReadPair.Read2;

            if (isForwardMessy)
            {
                var binId = binEvidence.GetBinId(fwdRead.Position);
                doSilenceFwd = binEvidence.GetFwdMessyStatus(binId) || binEvidence.GetMapqMessyStatus(binId);
            }
            else if (isReverseMessy)
            {
                var binId = binEvidence.GetBinId(revRead.Position);
                doSilenceRev = binEvidence.GetRevMessyStatus(binId) || binEvidence.GetMapqMessyStatus(binId);
            }
            else if (isSuspiciousMapping)
            {
                var binId       = binEvidence.GetBinId(revRead.Position);
                var isMapqMessy = binEvidence.GetMapqMessyStatus(binId);

                doSilenceFwd = isMapqMessy;
                doSilenceRev = isMapqMessy;
            }

            if (doSilenceFwd)
            {
                silenced = r1IsReverse ? 2 : 1;
            }

            if (doSilenceRev)
            {
                silenced = r1IsReverse ? 1 : 2;
            }

            if (doSilenceFwd && doSilenceRev)
            {
                silenced = 3;
            }

            return(silenced);
        }
コード例 #23
0
        private IEnumerable <BamAlignment> TryStitch(ReadPair readPair, IReadPairHandler pairHandler, out PairClassification classification)
        {
            // TODO if we end up allowing NM calculation in here, this will become true.
            const bool allowStitchingOnImperfectReads   = false;
            IEnumerable <BamAlignment> bamAlignmentList = pairHandler.ExtractReads(readPair);
            var bamAlignmentList2 = bamAlignmentList.ToList();

            if (bamAlignmentList2.Count == 1)
            {
                readPair.Stitched = true;
                classification    = PairClassification.PerfectStitched;

                if (allowStitchingOnImperfectReads)
                {
                    var stitchedResult = bamAlignmentList2[0];
                    int?nm             = 0;
                    //TODO handle this if it is a hit on performance. Making it simple for now because the previous logic where we were lazy evaluating was a bit skewed
                    var containsImperfections = ReadContainsImperfections(stitchedResult, _trustSoftclips);
                    //nm = stitchedResult.GetIntTag("NM"); // TODO reinstate this if stitched read has proper NM

                    var numMismatchesInR1 = readPair.Read1.GetIntTag("NM");
                    var numMismatchesInR2 = readPair.Read2.GetIntTag("NM");
                    if (containsImperfections ||
                        (nm > 0 || numMismatchesInR1 > 0 || numMismatchesInR2 > 0))
                    {
                        classification = PairClassification.ImperfectStitched;

                        if (numMismatchesInR1 <= NumMismatchesToBeConsideredLikelySnvInStitched &&
                            numMismatchesInR2 <= NumMismatchesToBeConsideredLikelySnvInStitched &&
                            !containsImperfections)
                        {
                            classification = PairClassification.SingleMismatchStitched;
                        }
                        else if (nm >= _numMismatchesToBeConsideredMessy ||
                                 numMismatchesInR1 >= _numMismatchesToBeConsideredMessy ||
                                 numMismatchesInR2 >= _numMismatchesToBeConsideredMessy)
                        {
                            classification = PairClassification.MessyStitched;
                        }
                    }
                }

                foreach (var alignment in bamAlignmentList)
                {
                    foreach (var tag in _tagsToKeepFromR1)
                    {
                        var r1Tag = readPair.Read1.GetStringTag(tag);
                        if (r1Tag != null)
                        {
                            alignment.ReplaceOrAddStringTag(tag, r1Tag);
                        }
                    }
                }
            }
            else
            {
                classification = PairClassification.FailStitch;
            }

            return(bamAlignmentList);
        }
コード例 #24
0
        public List <TransformBlock <PairResult[], PairResult[]> > GetAndLinkPerClassificationBlocksWithEcFinalization(ISourceBlock <PairResult> pairClassifierBlock,
                                                                                                                       PairClassification classification,
                                                                                                                       ConcurrentDictionary <string, IndelEvidence> indelLookup)
        {
            var writerBuffer = _batchBlockFactory.GetBlock();

            pairClassifierBlock.LinkTo(writerBuffer,
                                       new DataflowLinkOptions {
                PropagateCompletion = true, Append = true
            }, (p) => p.Classification == classification);

            var passThruBlock = EcPassThruBlock(_targetFinder, _chrom, indelLookup);

            writerBuffer.LinkTo(passThruBlock, new DataflowLinkOptions {
                PropagateCompletion = true, Append = true
            });

            return(new List <TransformBlock <PairResult[], PairResult[]> >()
            {
                passThruBlock
            });
        }
コード例 #25
0
        private List <BamAlignment> ProcessCategory(
            List <PairClassification> categoriesForRealignment, IChromosomeIndelSource indelSource,
            bool shouldRealignAtAll, Dictionary <HashableIndel, int[]> outcomesLookup, ref int numSkippedDueToSites,
            ref int numKept, ref int numRealigned, ref int numSilenced,
            List <PairResult> pairResults, PairClassification classification, IBinEvidence binEvidence,
            ConcurrentDictionary <string, int> progressTracker, BinConclusions binConclusions, UsableBins usableBins, int startPosition, int endPosition)
        {
            var allAlignments = new List <BamAlignment>();
            var isHighLikelihoodForRealign = false;

            if (_geminiOptions.ForceHighLikelihoodRealigners)
            {
                var highLikelihoodCategories = new List <PairClassification>()
                {
                    PairClassification.Disagree,
                    PairClassification.MessyStitched,
                    PairClassification.MessySplit,
                    PairClassification.UnstitchMessy,
                    PairClassification.UnstitchIndel
                };
                isHighLikelihoodForRealign = highLikelihoodCategories.Contains(classification);
            }

            int alignmentsCount = 0;

            var doRealign = false;
            ReadPairRealignerAndCombiner realignHandler = null;
            var alreadyStitched       = ClassificationIsStitched(classification);
            var doStitch              = !_geminiOptions.SkipStitching && TypeClassifier.ClassificationIsStitchable(classification);
            var categoryIsRealignable = categoriesForRealignment.Contains(classification);

            if (categoryIsRealignable || doStitch)
            {
                doRealign = true;

                realignHandler = _bamRealignmentFactory.GetRealignPairHandler(doStitch,
                                                                              alreadyStitched,
                                                                              _realignmentOptions.PairAwareEverything ||
                                                                              ClassificationIsPairAwareRealignable(classification),
                                                                              _refIdMapping,
                                                                              new ReadStatusCounter(), false, indelSource, _chrom, new Dictionary <string, IndelEvidence>(),
                                                                              ClassificationHasIndels(classification), outcomesLookup
                                                                              , SkipRestitchIfUnchanged(classification));
            }

            using (var snippetSource = _dataSourceFactory.CreateGenomeSnippetSource(_chrom, _chrReference))
                using (var singleSnippetSource = new ReusableSnippetSource(snippetSource))
                {
                    var nmCalculator = new NmCalculator(singleSnippetSource);

                    var classificationString = classification.ToString();
                    foreach (var pairResult in pairResults)
                    {
                        int toSilence = 0;

                        IEnumerable <BamAlignment> alignments;
                        if (!doRealign)
                        {
                            alignments = pairResult.Alignments;
                        }
                        else
                        {
                            bool doRealignPair =
                                shouldRealignAtAll && (isHighLikelihoodForRealign ||
                                                       (categoryIsRealignable &&
                                                        (usableBins.IsPositionUsable(pairResult.ReadPair.MinPosition) ||
                                                         usableBins.IsPositionUsable(pairResult.ReadPair.MaxPosition))));


                            if (!doRealignPair)
                            {
                                numSkippedDueToSites++;
                            }
                            else
                            {
                                numKept++;
                            }

                            toSilence = ReadsToSilence(classification, binConclusions, pairResult);
                            if (toSilence > 0)
                            {
                                numSilenced++;
                            }

                            alignments = realignHandler.ExtractReads(pairResult, nmCalculator, doRealignPair, toSilence);

                            if (pairResult.ReadPair.Realigned || pairResult.ReadPair.RealignedR1 ||
                                pairResult.ReadPair.RealignedR2)
                            {
                                numRealigned++;
                            }
                        }

                        var silencedR1    = (toSilence == 1 || toSilence == 3) && !pairResult.ReadPair.RealignedR1;
                        var silencedR2    = (toSilence == 2 || toSilence == 3) && !pairResult.ReadPair.RealignedR2;
                        var readTreatment = ReadTreatment(silencedR1, silencedR2, pairResult);

                        progressTracker.AddOrUpdate(classificationString + ":" + readTreatment, 1,
                                                    (x, currentCount) => { return(currentCount + 1); });

                        var alignmentsList = alignments.ToList();
                        foreach (var bamAlignment in alignmentsList)
                        {
                            if (_geminiOptions.LightDebug)
                            {
                                AddMdTagCountsTags(bamAlignment, pairResult);
                            }

                            bamAlignment.ReplaceOrAddStringTag("XT", readTreatment);
                            bamAlignment.ReplaceOrAddStringTag("XP", classificationString);
                        }

                        alignmentsCount += alignmentsList.Count();
                        allAlignments.AddRange(alignmentsList);
                    }
                }

            if (realignHandler != null)
            {
                realignHandler.Finish();
            }

            pairResults.Clear();
            return(allAlignments);
        }