private void WriteChromosomeReads(string inputBamFile, string chrName, IRealignmentWriter writer) { using (var extractor = _factory.CreateAlignmentExtractor(inputBamFile, chrName)) { var read = new Read(); while (extractor.GetNextAlignment(read)) { var bamAlignment = read.BamAlignment; writer.WriteRead(ref bamAlignment, false); } } writer.FlushAllBufferedRecords(); }
// todo add documentation on what null private void Realign(int?upToPosition) { var batch = _stateManager.GetCandidatesToProcess(upToPosition); var candidateIndelGroups = _stateManager.GetCandidateGroups(upToPosition); if (batch == null || (upToPosition.HasValue && batch.ClearedRegions == null)) // nothing to do { return; } // grab candidates and rank them var allTargets = batch.GetCandidates().Where(c => c.Length <= _maxIndelSize).Select(c => new CandidateIndel(c)).ToList(); var goodTargets = _caller.Call(allTargets, _stateManager).OrderBy(t => t.ReferencePosition).ToList(); var maxTargetSize = 0; if (goodTargets.Any()) { maxTargetSize = goodTargets.Max(t => t.Length) + 1; } // realign reads do { if (_lastExtractedRead == null) // first time around, get aligment { _lastExtractedRead = new Read(); if (!_extractorForRealign.GetNextAlignment(_lastExtractedRead)) { break; } } if (batch.MaxClearedPosition.HasValue && _lastExtractedRead.Position >= batch.MaxClearedPosition) { break; } var bamAlignment = _lastExtractedRead.BamAlignment; // get original alignment summary var originalAlignmentSummary = _lastExtractedRead.GetAlignmentSummary(_chrReference.Sequence); if (_skipAndRemoveDuplicates && bamAlignment.IsDuplicate()) { continue; } if (!goodTargets.Any() || !bamAlignment.IsPrimaryAlignment() || (_skipDuplicates && bamAlignment.IsDuplicate()) || bamAlignment.IsSupplementaryAlignment() || bamAlignment.HasSupplementaryAlignment() || PassesSuspicion(originalAlignmentSummary)) // skip reads that are or have supplementary alignments { _writer.WriteRead(ref bamAlignment, false); continue; } #if false Console.WriteLine("Original read has {0} mismatches {1} indels", originalAlignmentSummary.NumMismatches, originalAlignmentSummary.NumIndels); #endif // try realigning // take realignment even if it's equal to original, this gives opportunity to known variants var realignResult = _readRealigner.Realign(_lastExtractedRead, goodTargets, _chrReference.Sequence, _indelRanker, candidateIndelGroups, maxTargetSize); if (realignResult != null && RealignmentIsWithinRange(realignResult, bamAlignment) && !RealignmentIsUnchanged(realignResult, bamAlignment) && RealignmentBetterOrEqual(realignResult, originalAlignmentSummary) ) { bamAlignment.Position = realignResult.Position - 1; // 0 base bamAlignment.CigarData = realignResult.Cigar; bamAlignment.UpdateIntTagData("NM", realignResult.NumMismatches + realignResult.NumIndelBases); // update NM tag (edit distance) if (bamAlignment.MapQuality <= 20 && realignResult.NumMismatches == 0 && (_allowRescoringOrig0 || bamAlignment.MapQuality > 0)) { bamAlignment.MapQuality = 40; // todo what to set this to? } _writer.WriteRead(ref bamAlignment, true); TotalRealignedReads++; } else { if (realignResult != null && !RealignmentIsWithinRange(realignResult, bamAlignment)) { Logger.WriteToLog( string.Format( "Realignment attempt resulted in an attempted shift of read '{6}' from {0}:{1}:{2} to {0}:{3}:{4}, which is larger than the max realign shift of {5}. Original read will be outputted.", bamAlignment.RefID, bamAlignment.Position, bamAlignment.CigarData, realignResult.Position - 1, realignResult.Cigar, _maxRealignShift, bamAlignment.Name)); } // doesn't look any better _writer.WriteRead(ref bamAlignment, false); } } while (_extractorForRealign.GetNextAlignment(_lastExtractedRead)); _stateManager.DoneProcessing(batch); }