示例#1
0
        public void Execute()
        {
            var readStatuses       = new ReadStatusCounter();
            var pairHandler        = CreatePairHandler(readStatuses);
            var stitcherPairFilter = new StitcherPairFilter(_options.FilterDuplicates,
                                                            _options.FilterForProperPairs, CreateDuplicateIdentifier(), readStatuses,
                                                            minMapQuality: _options.FilterMinMapQuality);

            Logger.WriteToLog(string.Format("Beginning execution of {0}.", _inBam + (_chrFilter != null ? ":" + _chrFilter : "")));

            using (var bamWriter = CreateBamWriter())
            {
                using (var bamReader = CreateBamReader())
                {
                    var rewriter = new BamRewriter(bamReader, bamWriter, stitcherPairFilter,
                                                   pairHandler, bufferSize: 100000, getUnpaired: _options.KeepUnpairedReads, chrFilter: _chrFilter);
                    rewriter.Execute();
                }
            }

            foreach (var readStatus in readStatuses.GetReadStatuses())
            {
                Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value);
            }

            if (_options.Debug || _options.DebugSummary)
            {
                foreach (var readStatus in readStatuses.GetDebugReadStatuses())
                {
                    Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value);
                }
            }

            Logger.WriteToLog(string.Format("Done writing filtered bam at '{0}'.", _outBam));
        }
示例#2
0
 public PairHandler(Dictionary <int, string> refIdMapping, IAlignmentStitcher stitcher, bool filterUnstitchablePairs, ReadStatusCounter statusCounter)
 {
     _refIdMapping            = refIdMapping;
     _stitcher                = stitcher;
     _filterUnstitchablePairs = filterUnstitchablePairs;
     _statusCounter           = statusCounter;
     _stitcher.SetStatusCounter(_statusCounter);
 }
示例#3
0
 public PairHandler(Dictionary <int, string> refIdMapping, IAlignmentStitcher stitcher, ReadStatusCounter statusCounter, bool filterUnstitchablePairs = false, bool tryStitch = true)
 {
     _refIdMapping            = refIdMapping;
     _stitcher                = stitcher;
     _filterUnstitchablePairs = filterUnstitchablePairs;
     _masterStatusCounter     = statusCounter;
     _statusCounter           = new ReadStatusCounter();
     _stitcher.SetStatusCounter(_statusCounter);
     _tryStitch = tryStitch;
 }
示例#4
0
        public void Execute()
        {
            var readStatuses       = new ReadStatusCounter();
            var pairHandlers       = CreatePairHandlers(readStatuses, _options.NumThreads);
            var stitcherPairFilter = new StitcherPairFilter(_options.FilterDuplicates,
                                                            _options.FilterForProperPairs, CreateDuplicateIdentifier(), readStatuses,
                                                            minMapQuality: _options.FilterMinMapQuality, filterPairUnmapped: _options.FilterPairUnmapped, filterPairLowMapQ: _options.FilterPairLowMapQ);

            BlockingCollection <Task> taskQueue = null;
            ThreadPool threadPool = null;

            if (_options.NumThreads > 1)
            {
                taskQueue  = new BlockingCollection <Task>(4 * _options.NumThreads);
                threadPool = new ThreadPool(taskQueue, _options.NumThreads);
            }

            Logger.WriteToLog(string.Format("Beginning execution of {0}.", _inBam + (_chrFilter != null ? ":" + _chrFilter : "")));

            using (var bamWriter = CreateBamWriter())
            {
                using (var bamReader = CreateBamReader())
                {
                    var rewriter = new BamRewriter(bamReader, bamWriter, stitcherPairFilter,
                                                   pairHandlers, taskQueue, getUnpaired: _options.KeepUnpairedReads, chrFilter: _chrFilter);
                    rewriter.Execute();
                }

                threadPool?.RunToCompletion();

                foreach (var pairHandler in pairHandlers)
                {
                    pairHandler.Finish();
                }

                Logger.WriteToLog("Finished stitching. Starting sort and write.");
                bamWriter.Flush();
            }

            foreach (var readStatus in readStatuses.GetReadStatuses())
            {
                Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value);
            }

            if (_options.Debug || _options.DebugSummary)
            {
                foreach (var readStatus in readStatuses.GetDebugReadStatuses())
                {
                    Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value);
                }
            }

            Logger.WriteToLog(string.Format("Done writing filtered bam at '{0}'.", _outBam));
        }
示例#5
0
        public void AppendStatusStringTag()
        {
            var counter = new ReadStatusCounter();
            var handler = new DebugSummaryStatusHandler(counter);
            var pair    = TestHelpers.GetPair("10M", "10M");

            pair.Read1.ReplaceOrAddStringTag("HI", "nothing");;

            // Should  not update
            handler.AppendStatusStringTag("HI", "newvalue", pair.Read1);
            Assert.Equal("nothing", pair.Read1.GetStringTag("HI"));
        }
示例#6
0
 public StitcherPairFilter(bool skipDuplicates, bool filterForProperPairs, IDuplicateIdentifier dupIdentifier, ReadStatusCounter statusCounter,
                           bool shouldSkipFusions = true, int minMapQuality = 0, int maxPairGap = 500) : base(true, new StitchingReadPairEvaluator(true, true, false))
 {
     _skipDuplicates       = skipDuplicates;
     _filterForProperPairs = filterForProperPairs;
     _dupIdentifier        = dupIdentifier;
     _shouldSkipFusions    = shouldSkipFusions;
     _minMapQuality        = minMapQuality;
     _maxPairGap           = maxPairGap;
     _statusCounter        = statusCounter;
     OnLog = WriteToLog;
 }
示例#7
0
        public IDataSource <ReadPair> CreateReadPairSource(IBamReader bamReader, ReadStatusCounter statusCounter)
        {
            //var pairSourceLevelFilterProperPairs = _stitcherOptions.FilterForProperPairs;
            var pairSourceLevelFilterProperPairs = false; // This gets taken care of at the Gemini level now.
            var filter = new StitcherPairFilter(_stitcherOptions.FilterDuplicates,
                                                pairSourceLevelFilterProperPairs, new AlignmentFlagDuplicateIdentifier(), statusCounter,
                                                minMapQuality: 0, treatImproperAsIncomplete: false);

            var readLength = 150;

            return(new PairFilterReadPairSource(bamReader, statusCounter, _skipAndRemoveDuplicates,
                                                filter, refId: _refId,
                                                expectedFragmentLength: readLength, filterForProperPairs: pairSourceLevelFilterProperPairs));
        }
示例#8
0
        public StitcherPairFilter(bool skipDuplicates, bool filterForProperPairs, IDuplicateIdentifier dupIdentifier, ReadStatusCounter statusCounter,
                                  bool shouldSkipFusions = true, uint minMapQuality = 0, int maxPairGap = 500, bool filterPairUnmapped = false, bool filterPairLowMapQ = true) : base(true, new StitchingReadPairEvaluator(true, true, false), false)

        {
            _skipDuplicates       = skipDuplicates;
            _filterForProperPairs = filterForProperPairs;
            _dupIdentifier        = dupIdentifier;
            _shouldSkipFusions    = shouldSkipFusions;
            _minMapQuality        = minMapQuality;
            _maxPairGap           = maxPairGap;
            _statusCounter        = statusCounter;
            _filterPairUnmapped   = filterPairUnmapped;
            _filterPairLowMapQ    = filterPairLowMapQ;
        }
        public void AddCombinedStatusStringTags()
        {
            var counter = new ReadStatusCounter();
            var handler = new DebugStatusHandler(counter);
            var pair    = TestHelpers.GetPair("10M", "10M");

            pair.Read1.ReplaceOrAddStringTag("HI", "read1_hi");
            pair.Read2.ReplaceOrAddStringTag("HI", "read2_hi");

            var outAlignment = new BamAlignment(pair.Read1);

            handler.AddCombinedStatusStringTags("HI", pair.Read1, pair.Read2, outAlignment);
            Assert.Equal("read1_hi,read2_hi", outAlignment.GetStringTag("HI"));
        }
示例#10
0
        private IReadPairHandler CreatePairHandler(ReadStatusCounter readStatuses)
        {
            var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases,
                                             nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength);

            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }
            return(new PairHandler(refIdMapping, stitcher, _options.FilterUnstitchablePairs, readStatuses));
        }
        public void UpdateStatusStringTag()
        {
            var counter = new ReadStatusCounter();
            var handler = new DebugStatusHandler(counter);
            var pair    = TestHelpers.GetPair("10M", "10M");

            pair.Read1.ReplaceOrAddStringTag("HI", "read1_hi");
            pair.Read2.ReplaceOrAddStringTag("HI", "read2_hi");

            var outAlignment = new BamAlignment(pair.Read1);

            outAlignment.ReplaceOrAddStringTag("HI", "nothing");

            // Should  not update
            handler.UpdateStatusStringTag("HI", "newvalue", outAlignment);
            Assert.Equal("newvalue", outAlignment.GetStringTag("HI"));
        }
        public PairFilterReadPairSource(IBamReader bamReader, ReadStatusCounter readStatuses, bool skipAndRemoveDuplicates, IAlignmentPairFilter filter, int?refId = null,
                                        int?expectedFragmentLength = null, bool filterForProperPairs = false)
        {
            _bamReader               = bamReader;
            _readStatuses            = readStatuses;
            _skipAndRemoveDuplicates = skipAndRemoveDuplicates;
            if (refId != null)
            {
                _applyChrFilter = true;
                _refId          = refId.Value;
                _bamReader.Jump(_refId, 0);
            }

            _filter = filter;
            _filterForProperPairs = filterForProperPairs;

            if (expectedFragmentLength != null)
            {
                _considerInsertSize     = true;
                _expectedFragmentLength = expectedFragmentLength.Value;
            }
        }
示例#13
0
        private List <IReadPairHandler> CreatePairHandlers(ReadStatusCounter readStatuses, int numThreads)
        {
            var handlers = new List <IReadPairHandler>(numThreads);

            var refIdMapping = new Dictionary <int, string>();

            using (var reader = new BamReader(_inBam))
            {
                foreach (var referenceName in reader.GetReferenceNames())
                {
                    refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName);
                }
            }

            for (int i = 0; i < numThreads; ++i)
            {
                var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases,
                                                 nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength, ignoreReadsAboveMaxLength: _options.IgnoreReadsAboveMaxLength, minMapQuality: _options.FilterMinMapQuality, dontStitchHomopolymerBridge: _options.DontStitchHomopolymerBridge);

                handlers.Add(new PairHandler(refIdMapping, stitcher, readStatuses, _options.FilterUnstitchablePairs, true));
            }

            return(handlers);
        }
示例#14
0
        public void AddStatusCount()
        {
            var counter = new ReadStatusCounter();
            var handler = new DebugSummaryStatusHandler(counter);

            handler.AddStatusCount("x");

            var statuses = counter.GetReadStatuses();

            Assert.Equal(1.0, statuses.Count);
            Assert.Equal(1, statuses["x"]);

            handler.AddStatusCount("y");
            statuses = counter.GetReadStatuses();
            Assert.Equal(2, statuses.Count);
            Assert.Equal(1, statuses["x"]);
            Assert.Equal(1, statuses["y"]);

            handler.AddStatusCount("x");
            statuses = counter.GetReadStatuses();
            Assert.Equal(2, statuses.Count);
            Assert.Equal(2, statuses["x"]);
            Assert.Equal(1, statuses["y"]);
        }
示例#15
0
 public DebugStatusHandler(ReadStatusCounter statusCounter)
 {
     _statusCounter = statusCounter;
 }
示例#16
0
        private void ExtractReadsStrandXRValidation()
        {
            var refIdMapping = new Dictionary <int, string>()
            {
                { 1, "chr1" }
            };

            var stitcher = StitcherTestHelpers.GetStitcher(10, false);

            var readStatusCounter = new ReadStatusCounter();

            var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true);

            var alignment1 = new BamAlignment()
            {
                AlignmentFlag  = 99,
                Bases          = "ABCF",
                Bin            = 4,
                CigarData      = new CigarAlignment("2S2M"),
                FragmentLength = 42,
                MapQuality     = 30,
                MatePosition   = 2,
                MateRefID      = 43,
                Name           = "Read1",
                Position       = 1,
                Qualities      = new byte[4],
                RefID          = 1,
                TagData        = new byte[0]
            };
            var tagUtils = new TagUtils();

            tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction
            alignment1.AppendTagData(tagUtils.ToBytes());

            var alignment2 = new BamAlignment()
            {
                AlignmentFlag  = 144,
                Bases          = "ABCF",
                Bin            = 4,
                CigarData      = new CigarAlignment("2S2M"),
                FragmentLength = 42,
                MapQuality     = 30,
                MatePosition   = 2,
                MateRefID      = 43,
                Name           = "Read1",
                Position       = 1,
                Qualities      = new byte[4],
                RefID          = 1,
                TagData        = new byte[0]
            };

            var tagUtils2 = new TagUtils();

            tagUtils.AddStringTag("XR", "BLABLA");  //start with random XR tag and confirm whether it is overwritten by read direction
            alignment2.AppendTagData(tagUtils.ToBytes());

            var readPair = new ReadPair(alignment1);

            readPair.AddAlignment(alignment2);

            var alignmentResults = pairHandler.ExtractReads(readPair);

            Assert.Equal(1, alignmentResults.Count);
            var alignment = alignmentResults[0];

            Assert.Equal("FR", alignment.GetStringTag("XR"));
        }
示例#17
0
        private void ExtractReadsStrand(bool r1Reverse, bool r2Reverse, string expectedXRTag)
        {
            var refIdMapping = new Dictionary <int, string>()
            {
                { 1, "chr1" }
            };

            var stitcher = StitcherTestHelpers.GetStitcher(10, false);

            var readStatusCounter = new ReadStatusCounter();

            var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true);

            var alignment1 = new BamAlignment()
            {
                AlignmentFlag  = 0,
                Bases          = "ABCF",
                Bin            = 4,
                CigarData      = new CigarAlignment("2S2M"),
                FragmentLength = 42,
                MapQuality     = 30,
                MatePosition   = 2,
                MateRefID      = 43,
                Name           = "Read1",
                Position       = 1,
                Qualities      = new byte[4],
                RefID          = 1,
                TagData        = new byte[0]
            };

            alignment1.SetIsFirstMate(true);
            alignment1.SetIsReverseStrand(r1Reverse);
            var alignment2 = new BamAlignment()
            {
                AlignmentFlag  = 0,
                Bases          = "ABCF",
                Bin            = 4,
                CigarData      = new CigarAlignment("2S2M"),
                FragmentLength = 42,
                MapQuality     = 30,
                MatePosition   = 2,
                MateRefID      = 43,
                Name           = "Read1",
                Position       = 1,
                Qualities      = new byte[4],
                RefID          = 1,
                TagData        = new byte[0]
            };

            alignment2.SetIsSecondMate(true);
            alignment2.SetIsReverseStrand(r2Reverse);
            var readPair = new ReadPair(alignment1);

            readPair.AddAlignment(alignment2);

            var alignmentResults = pairHandler.ExtractReads(readPair);

            Assert.Equal(1, alignmentResults.Count);
            var alignment = alignmentResults[0];
            var XD        = alignment.GetStringTag("XR");

            Assert.Equal(expectedXRTag, alignment.GetStringTag("XR"));
        }
示例#18
0
        private void ExtractReads(bool addUmiTags)
        {
            var refIdMapping = new Dictionary <int, string>()
            {
                { 1, "chr1" }
            };

            var stitcher = StitcherTestHelpers.GetStitcher(10, false);

            var readStatusCounter = new ReadStatusCounter();

            var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true);

            var alignment1 = new BamAlignment()
            {
                AlignmentFlag  = 0,
                Bases          = "ABCF",
                Bin            = 4,
                CigarData      = new CigarAlignment("2S2M"),
                FragmentLength = 42,
                MapQuality     = 30,
                MatePosition   = 2,
                MateRefID      = 43,
                Name           = "Read1",
                Position       = 1,
                Qualities      = new byte[4],
                RefID          = 1,
                TagData        = new byte[0]
            };

            var tagUtils = new TagUtils();

            tagUtils.AddStringTag("BC", "14");
            tagUtils.AddIntTag("SM", 40);
            if (addUmiTags)
            {
                tagUtils.AddIntTag("XV", 1);
                tagUtils.AddIntTag("XW", 2);
                tagUtils.AddStringTag("XU", "ABBA-ZABBA");
            }

            alignment1.AppendTagData(tagUtils.ToBytes());

            var alignment2 = new BamAlignment()
            {
                AlignmentFlag  = 0,
                Bases          = "ABCF",
                Bin            = 4,
                CigarData      = new CigarAlignment("2S2M"),
                FragmentLength = 42,
                MapQuality     = 30,
                MatePosition   = 2,
                MateRefID      = 43,
                Name           = "Read1",
                Position       = 1,
                Qualities      = new byte[4],
                RefID          = 1,
                TagData        = new byte[0]
            };

            var tagUtils2 = new TagUtils();

            tagUtils2.AddIntTag("NM", 5);
            tagUtils2.AddStringTag("BC", "14");
            tagUtils2.AddIntTag("SM", 20);
            if (addUmiTags)
            {
                tagUtils2.AddIntTag("XV", 1);
                tagUtils2.AddIntTag("XW", 2);
                tagUtils2.AddStringTag("XU", "ABBA-ZABBA");
            }
            alignment2.AppendTagData(tagUtils2.ToBytes());

            var readPair = new ReadPair(alignment1);

            readPair.AddAlignment(alignment2);

            var alignmentResults = pairHandler.ExtractReads(readPair);

            Assert.Equal(1, alignmentResults.Count);
            var alignment = alignmentResults[0];


            Assert.NotNull(alignment.GetStringTag("XD"));
            Assert.Null(alignment.GetIntTag("NM"));
            Assert.Null(alignment.GetStringTag("BC"));
            Assert.Null(alignment.GetIntTag("SM"));
            if (addUmiTags)
            {
                Assert.Equal("ABBA-ZABBA", alignment.GetStringTag("XU"));
                Assert.Equal(1, alignment.GetIntTag("XV"));
                Assert.Equal(2, alignment.GetIntTag("XW"));
            }
        }
        public ReadPairRealignerAndCombiner GetRealignPairHandler(bool tryRestitch, bool alreadyStitched,
                                                                  bool pairAwareRealign,
                                                                  Dictionary <int, string> refIdMapping, ReadStatusCounter statusCounter, bool isSnowball,
                                                                  IChromosomeIndelSource indelSource, string chromosome, Dictionary <string, IndelEvidence> masterLookup,
                                                                  bool hasIndels, Dictionary <HashableIndel, int[]> outcomesLookup, bool skipRestitchIfNothingChanged)
        {
            var stitcher = GetStitcher();

            var stitchedPairHandler = new PairHandler(refIdMapping, stitcher, tryStitch: tryRestitch);

            var judger = new RealignmentJudger(GetAlignmentComparer());

            var readRealigner = new GeminiReadRealigner(GetAlignmentComparer(), remaskSoftclips: _geminiOptions.RemaskMessySoftclips,
                                                        keepProbeSoftclips: _geminiOptions.KeepProbeSoftclip, keepBothSideSoftclips: _geminiOptions.KeepBothSideSoftclips || (_geminiOptions.KeepProbeSoftclip && alreadyStitched),
                                                        trackActualMismatches: _realignmentAssessmentOptions.TrackActualMismatches, checkSoftclipsForMismatches: _realignmentAssessmentOptions.CheckSoftclipsForMismatches,
                                                        debug: _geminiOptions.Debug, maskNsOnly: !(_geminiOptions.RemaskMessySoftclips || _geminiOptions.KeepProbeSoftclip || _geminiOptions.KeepBothSideSoftclips), maskPartialInsertion: _realignmentOptions.MaskPartialInsertion,
                                                        minimumUnanchoredInsertionLength: _realignmentOptions.MinimumUnanchoredInsertionLength,
                                                        minInsertionSizeToAllowMismatchingBases: 4, maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here...

            IStatusHandler statusHandler = new DebugSummaryStatusHandler(statusCounter);

            if (_geminiOptions.Debug)
            {
                statusHandler = new DebugStatusHandler(statusCounter);
            }

            // Only softclip unknowns if it is not stitched to begin with (we believe in these more, plus it makes our lives simpler for dealing with stitched directions)
            var softclipUnknownIndels = _geminiOptions.SoftclipUnknownIndels && !alreadyStitched;

            //var regionFilterer = new RegionFilterer(chromosome, indelSource.Indels);
            var regionFilterer       = new DummyRegionFilterer();
            var collector            = GetCollector(isSnowball);
            var realignmentEvaluator = new RealignmentEvaluator(indelSource.DeepCopy(), statusHandler, readRealigner, judger, chromosome,
                                                                _realignmentAssessmentOptions.TrackActualMismatches, _realignmentAssessmentOptions.CheckSoftclipsForMismatches, _geminiOptions.AllowRescoringOrigZero, softclipUnknownIndels,
                                                                regionFilterer, _geminiOptions.LightDebug);

            return(new ReadPairRealignerAndCombiner(
                       collector,
                       GetRestitcher(stitchedPairHandler, statusHandler),
                       realignmentEvaluator,
                       GetIndelFinder(pairAwareRealign, chromosome, indelSource), chromosome, alreadyStitched, pairAwareRealign,
                       masterLookup: masterLookup, hasExistingIndels: hasIndels,
                       masterOutcomesLookup: outcomesLookup, skipRestitchIfNothingChanged: skipRestitchIfNothingChanged, allowedToStitch: !_geminiOptions.SkipStitching));
        }