public void Execute() { var readStatuses = new ReadStatusCounter(); var pairHandler = CreatePairHandler(readStatuses); var stitcherPairFilter = new StitcherPairFilter(_options.FilterDuplicates, _options.FilterForProperPairs, CreateDuplicateIdentifier(), readStatuses, minMapQuality: _options.FilterMinMapQuality); Logger.WriteToLog(string.Format("Beginning execution of {0}.", _inBam + (_chrFilter != null ? ":" + _chrFilter : ""))); using (var bamWriter = CreateBamWriter()) { using (var bamReader = CreateBamReader()) { var rewriter = new BamRewriter(bamReader, bamWriter, stitcherPairFilter, pairHandler, bufferSize: 100000, getUnpaired: _options.KeepUnpairedReads, chrFilter: _chrFilter); rewriter.Execute(); } } foreach (var readStatus in readStatuses.GetReadStatuses()) { Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value); } if (_options.Debug || _options.DebugSummary) { foreach (var readStatus in readStatuses.GetDebugReadStatuses()) { Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value); } } Logger.WriteToLog(string.Format("Done writing filtered bam at '{0}'.", _outBam)); }
public PairHandler(Dictionary <int, string> refIdMapping, IAlignmentStitcher stitcher, bool filterUnstitchablePairs, ReadStatusCounter statusCounter) { _refIdMapping = refIdMapping; _stitcher = stitcher; _filterUnstitchablePairs = filterUnstitchablePairs; _statusCounter = statusCounter; _stitcher.SetStatusCounter(_statusCounter); }
public PairHandler(Dictionary <int, string> refIdMapping, IAlignmentStitcher stitcher, ReadStatusCounter statusCounter, bool filterUnstitchablePairs = false, bool tryStitch = true) { _refIdMapping = refIdMapping; _stitcher = stitcher; _filterUnstitchablePairs = filterUnstitchablePairs; _masterStatusCounter = statusCounter; _statusCounter = new ReadStatusCounter(); _stitcher.SetStatusCounter(_statusCounter); _tryStitch = tryStitch; }
public void Execute() { var readStatuses = new ReadStatusCounter(); var pairHandlers = CreatePairHandlers(readStatuses, _options.NumThreads); var stitcherPairFilter = new StitcherPairFilter(_options.FilterDuplicates, _options.FilterForProperPairs, CreateDuplicateIdentifier(), readStatuses, minMapQuality: _options.FilterMinMapQuality, filterPairUnmapped: _options.FilterPairUnmapped, filterPairLowMapQ: _options.FilterPairLowMapQ); BlockingCollection <Task> taskQueue = null; ThreadPool threadPool = null; if (_options.NumThreads > 1) { taskQueue = new BlockingCollection <Task>(4 * _options.NumThreads); threadPool = new ThreadPool(taskQueue, _options.NumThreads); } Logger.WriteToLog(string.Format("Beginning execution of {0}.", _inBam + (_chrFilter != null ? ":" + _chrFilter : ""))); using (var bamWriter = CreateBamWriter()) { using (var bamReader = CreateBamReader()) { var rewriter = new BamRewriter(bamReader, bamWriter, stitcherPairFilter, pairHandlers, taskQueue, getUnpaired: _options.KeepUnpairedReads, chrFilter: _chrFilter); rewriter.Execute(); } threadPool?.RunToCompletion(); foreach (var pairHandler in pairHandlers) { pairHandler.Finish(); } Logger.WriteToLog("Finished stitching. Starting sort and write."); bamWriter.Flush(); } foreach (var readStatus in readStatuses.GetReadStatuses()) { Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value); } if (_options.Debug || _options.DebugSummary) { foreach (var readStatus in readStatuses.GetDebugReadStatuses()) { Logger.WriteToLog((_chrFilter ?? "") + " STATUSCOUNT " + readStatus.Key + ": " + readStatus.Value); } } Logger.WriteToLog(string.Format("Done writing filtered bam at '{0}'.", _outBam)); }
public void AppendStatusStringTag() { var counter = new ReadStatusCounter(); var handler = new DebugSummaryStatusHandler(counter); var pair = TestHelpers.GetPair("10M", "10M"); pair.Read1.ReplaceOrAddStringTag("HI", "nothing");; // Should not update handler.AppendStatusStringTag("HI", "newvalue", pair.Read1); Assert.Equal("nothing", pair.Read1.GetStringTag("HI")); }
public StitcherPairFilter(bool skipDuplicates, bool filterForProperPairs, IDuplicateIdentifier dupIdentifier, ReadStatusCounter statusCounter, bool shouldSkipFusions = true, int minMapQuality = 0, int maxPairGap = 500) : base(true, new StitchingReadPairEvaluator(true, true, false)) { _skipDuplicates = skipDuplicates; _filterForProperPairs = filterForProperPairs; _dupIdentifier = dupIdentifier; _shouldSkipFusions = shouldSkipFusions; _minMapQuality = minMapQuality; _maxPairGap = maxPairGap; _statusCounter = statusCounter; OnLog = WriteToLog; }
public IDataSource <ReadPair> CreateReadPairSource(IBamReader bamReader, ReadStatusCounter statusCounter) { //var pairSourceLevelFilterProperPairs = _stitcherOptions.FilterForProperPairs; var pairSourceLevelFilterProperPairs = false; // This gets taken care of at the Gemini level now. var filter = new StitcherPairFilter(_stitcherOptions.FilterDuplicates, pairSourceLevelFilterProperPairs, new AlignmentFlagDuplicateIdentifier(), statusCounter, minMapQuality: 0, treatImproperAsIncomplete: false); var readLength = 150; return(new PairFilterReadPairSource(bamReader, statusCounter, _skipAndRemoveDuplicates, filter, refId: _refId, expectedFragmentLength: readLength, filterForProperPairs: pairSourceLevelFilterProperPairs)); }
public StitcherPairFilter(bool skipDuplicates, bool filterForProperPairs, IDuplicateIdentifier dupIdentifier, ReadStatusCounter statusCounter, bool shouldSkipFusions = true, uint minMapQuality = 0, int maxPairGap = 500, bool filterPairUnmapped = false, bool filterPairLowMapQ = true) : base(true, new StitchingReadPairEvaluator(true, true, false), false) { _skipDuplicates = skipDuplicates; _filterForProperPairs = filterForProperPairs; _dupIdentifier = dupIdentifier; _shouldSkipFusions = shouldSkipFusions; _minMapQuality = minMapQuality; _maxPairGap = maxPairGap; _statusCounter = statusCounter; _filterPairUnmapped = filterPairUnmapped; _filterPairLowMapQ = filterPairLowMapQ; }
public void AddCombinedStatusStringTags() { var counter = new ReadStatusCounter(); var handler = new DebugStatusHandler(counter); var pair = TestHelpers.GetPair("10M", "10M"); pair.Read1.ReplaceOrAddStringTag("HI", "read1_hi"); pair.Read2.ReplaceOrAddStringTag("HI", "read2_hi"); var outAlignment = new BamAlignment(pair.Read1); handler.AddCombinedStatusStringTags("HI", pair.Read1, pair.Read2, outAlignment); Assert.Equal("read1_hi,read2_hi", outAlignment.GetStringTag("HI")); }
private IReadPairHandler CreatePairHandler(ReadStatusCounter readStatuses) { var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases, nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength); var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } return(new PairHandler(refIdMapping, stitcher, _options.FilterUnstitchablePairs, readStatuses)); }
public void UpdateStatusStringTag() { var counter = new ReadStatusCounter(); var handler = new DebugStatusHandler(counter); var pair = TestHelpers.GetPair("10M", "10M"); pair.Read1.ReplaceOrAddStringTag("HI", "read1_hi"); pair.Read2.ReplaceOrAddStringTag("HI", "read2_hi"); var outAlignment = new BamAlignment(pair.Read1); outAlignment.ReplaceOrAddStringTag("HI", "nothing"); // Should not update handler.UpdateStatusStringTag("HI", "newvalue", outAlignment); Assert.Equal("newvalue", outAlignment.GetStringTag("HI")); }
public PairFilterReadPairSource(IBamReader bamReader, ReadStatusCounter readStatuses, bool skipAndRemoveDuplicates, IAlignmentPairFilter filter, int?refId = null, int?expectedFragmentLength = null, bool filterForProperPairs = false) { _bamReader = bamReader; _readStatuses = readStatuses; _skipAndRemoveDuplicates = skipAndRemoveDuplicates; if (refId != null) { _applyChrFilter = true; _refId = refId.Value; _bamReader.Jump(_refId, 0); } _filter = filter; _filterForProperPairs = filterForProperPairs; if (expectedFragmentLength != null) { _considerInsertSize = true; _expectedFragmentLength = expectedFragmentLength.Value; } }
private List <IReadPairHandler> CreatePairHandlers(ReadStatusCounter readStatuses, int numThreads) { var handlers = new List <IReadPairHandler>(numThreads); var refIdMapping = new Dictionary <int, string>(); using (var reader = new BamReader(_inBam)) { foreach (var referenceName in reader.GetReferenceNames()) { refIdMapping.Add(reader.GetReferenceIndex(referenceName), referenceName); } } for (int i = 0; i < numThreads; ++i) { var stitcher = new BasicStitcher(_options.MinBaseCallQuality, useSoftclippedBases: _options.UseSoftClippedBases, nifyDisagreements: _options.NifyDisagreements, debug: _options.Debug, nifyUnstitchablePairs: _options.NifyUnstitchablePairs, ignoreProbeSoftclips: !_options.StitchProbeSoftclips, maxReadLength: _options.MaxReadLength, ignoreReadsAboveMaxLength: _options.IgnoreReadsAboveMaxLength, minMapQuality: _options.FilterMinMapQuality, dontStitchHomopolymerBridge: _options.DontStitchHomopolymerBridge); handlers.Add(new PairHandler(refIdMapping, stitcher, readStatuses, _options.FilterUnstitchablePairs, true)); } return(handlers); }
public void AddStatusCount() { var counter = new ReadStatusCounter(); var handler = new DebugSummaryStatusHandler(counter); handler.AddStatusCount("x"); var statuses = counter.GetReadStatuses(); Assert.Equal(1.0, statuses.Count); Assert.Equal(1, statuses["x"]); handler.AddStatusCount("y"); statuses = counter.GetReadStatuses(); Assert.Equal(2, statuses.Count); Assert.Equal(1, statuses["x"]); Assert.Equal(1, statuses["y"]); handler.AddStatusCount("x"); statuses = counter.GetReadStatuses(); Assert.Equal(2, statuses.Count); Assert.Equal(2, statuses["x"]); Assert.Equal(1, statuses["y"]); }
public DebugStatusHandler(ReadStatusCounter statusCounter) { _statusCounter = statusCounter; }
private void ExtractReadsStrandXRValidation() { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 99, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils = new TagUtils(); tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction alignment1.AppendTagData(tagUtils.ToBytes()); var alignment2 = new BamAlignment() { AlignmentFlag = 144, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils2 = new TagUtils(); tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction alignment2.AppendTagData(tagUtils.ToBytes()); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; Assert.Equal("FR", alignment.GetStringTag("XR")); }
private void ExtractReadsStrand(bool r1Reverse, bool r2Reverse, string expectedXRTag) { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; alignment1.SetIsFirstMate(true); alignment1.SetIsReverseStrand(r1Reverse); var alignment2 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; alignment2.SetIsSecondMate(true); alignment2.SetIsReverseStrand(r2Reverse); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; var XD = alignment.GetStringTag("XR"); Assert.Equal(expectedXRTag, alignment.GetStringTag("XR")); }
private void ExtractReads(bool addUmiTags) { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils = new TagUtils(); tagUtils.AddStringTag("BC", "14"); tagUtils.AddIntTag("SM", 40); if (addUmiTags) { tagUtils.AddIntTag("XV", 1); tagUtils.AddIntTag("XW", 2); tagUtils.AddStringTag("XU", "ABBA-ZABBA"); } alignment1.AppendTagData(tagUtils.ToBytes()); var alignment2 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils2 = new TagUtils(); tagUtils2.AddIntTag("NM", 5); tagUtils2.AddStringTag("BC", "14"); tagUtils2.AddIntTag("SM", 20); if (addUmiTags) { tagUtils2.AddIntTag("XV", 1); tagUtils2.AddIntTag("XW", 2); tagUtils2.AddStringTag("XU", "ABBA-ZABBA"); } alignment2.AppendTagData(tagUtils2.ToBytes()); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; Assert.NotNull(alignment.GetStringTag("XD")); Assert.Null(alignment.GetIntTag("NM")); Assert.Null(alignment.GetStringTag("BC")); Assert.Null(alignment.GetIntTag("SM")); if (addUmiTags) { Assert.Equal("ABBA-ZABBA", alignment.GetStringTag("XU")); Assert.Equal(1, alignment.GetIntTag("XV")); Assert.Equal(2, alignment.GetIntTag("XW")); } }
public ReadPairRealignerAndCombiner GetRealignPairHandler(bool tryRestitch, bool alreadyStitched, bool pairAwareRealign, Dictionary <int, string> refIdMapping, ReadStatusCounter statusCounter, bool isSnowball, IChromosomeIndelSource indelSource, string chromosome, Dictionary <string, IndelEvidence> masterLookup, bool hasIndels, Dictionary <HashableIndel, int[]> outcomesLookup, bool skipRestitchIfNothingChanged) { var stitcher = GetStitcher(); var stitchedPairHandler = new PairHandler(refIdMapping, stitcher, tryStitch: tryRestitch); var judger = new RealignmentJudger(GetAlignmentComparer()); var readRealigner = new GeminiReadRealigner(GetAlignmentComparer(), remaskSoftclips: _geminiOptions.RemaskMessySoftclips, keepProbeSoftclips: _geminiOptions.KeepProbeSoftclip, keepBothSideSoftclips: _geminiOptions.KeepBothSideSoftclips || (_geminiOptions.KeepProbeSoftclip && alreadyStitched), trackActualMismatches: _realignmentAssessmentOptions.TrackActualMismatches, checkSoftclipsForMismatches: _realignmentAssessmentOptions.CheckSoftclipsForMismatches, debug: _geminiOptions.Debug, maskNsOnly: !(_geminiOptions.RemaskMessySoftclips || _geminiOptions.KeepProbeSoftclip || _geminiOptions.KeepBothSideSoftclips), maskPartialInsertion: _realignmentOptions.MaskPartialInsertion, minimumUnanchoredInsertionLength: _realignmentOptions.MinimumUnanchoredInsertionLength, minInsertionSizeToAllowMismatchingBases: 4, maxProportionInsertSequenceMismatch: 0.2); // TODO fix // TODO figure out what I was saying to fix here... IStatusHandler statusHandler = new DebugSummaryStatusHandler(statusCounter); if (_geminiOptions.Debug) { statusHandler = new DebugStatusHandler(statusCounter); } // Only softclip unknowns if it is not stitched to begin with (we believe in these more, plus it makes our lives simpler for dealing with stitched directions) var softclipUnknownIndels = _geminiOptions.SoftclipUnknownIndels && !alreadyStitched; //var regionFilterer = new RegionFilterer(chromosome, indelSource.Indels); var regionFilterer = new DummyRegionFilterer(); var collector = GetCollector(isSnowball); var realignmentEvaluator = new RealignmentEvaluator(indelSource.DeepCopy(), statusHandler, readRealigner, judger, chromosome, _realignmentAssessmentOptions.TrackActualMismatches, _realignmentAssessmentOptions.CheckSoftclipsForMismatches, _geminiOptions.AllowRescoringOrigZero, softclipUnknownIndels, regionFilterer, _geminiOptions.LightDebug); return(new ReadPairRealignerAndCombiner( collector, GetRestitcher(stitchedPairHandler, statusHandler), realignmentEvaluator, GetIndelFinder(pairAwareRealign, chromosome, indelSource), chromosome, alreadyStitched, pairAwareRealign, masterLookup: masterLookup, hasExistingIndels: hasIndels, masterOutcomesLookup: outcomesLookup, skipRestitchIfNothingChanged: skipRestitchIfNothingChanged, allowedToStitch: !_geminiOptions.SkipStitching)); }