public void CreateBamReader() { // Should return a fully functional bam reader var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath)) { File.Delete(tempPath); } using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>())) { var bamWriterHandle = new BamWriterHandle(bamWriter); bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 8, 10, 30, true)); bamWriterHandle.WriteAlignment(null); } Assert.True(File.Exists(tempPath)); var stitcherOptions = new StitcherOptions(); var factory = new GeminiDataSourceFactory(stitcherOptions, "fakeGenomePath", false); using (var bamReader = factory.CreateBamReader(tempPath)) { BamAlignment alignment = new BamAlignment(); var getNext = bamReader.GetNextAlignment(ref alignment, true); Assert.True(getNext); Assert.Equal(7, alignment.Position); } File.Delete(tempPath); }
public void Dispose() { if (_writer != null) { _writer.Close(); _writer.Dispose(); _writer = null; } }
/// <summary> /// We're finished processing a read. Add it to the read buffer, and flush /// old nodes to disk. /// </summary> public void WriteRead(ref BamAlignment read, bool remapped) { if (remapped) { var info = new RemapInfo(read.Position, read.Position + (int)read.CigarData.GetReferenceSpan() - 1); _remappings[string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 1 : 2)] = info; } LinkedListNode <ReadsForPosition> node = _readBuffer.First; LinkedListNode <ReadsForPosition> nextNode; byte[] buffer = BamWriter.SerializeAlignment(ref read); while (node != null) { // Flush reads that are earlier than the earliest possible read we could see in the future (i.e. 2x max shift less than current read). // Reasoning: The realign shift could go in two directions. So, for example with max shift of 10, you could receive // and buffer reads in an order like this: 100, 100, 110, 115 (originally 110, shifted right to 115), // 100 (originally 110, shifted left to 100), 120 (originally 110, shifted right to 120), // 101 (originally 110, shifted left to 100), 121, 130, etc. If you had been flushing reads // using the max shift only as the threshold, upon hitting the 115 (fourth read) you would have // flushed the 100s (because 100 < 115 - 10), even though there still may be 100s coming through. // By the time we hit 121, we know that all of the reads we encounter in the future are going to be > 100 (at minimum, the 121 could represent a max-right-shift from 111 and other 111 reads could be max-left-shifted to 101). if (node.Value.Position < read.Position - (_maxRealignShift * 2)) { MinimumRealignmentStartPosition = Math.Max(MinimumRealignmentStartPosition, node.Value.Position); nextNode = node.Next; FlushBufferedRecords(node.Value); _readBuffer.Remove(node); node = nextNode; continue; } if (node.Value.Position == read.Position) { node.Value.Reads.Add(buffer); return; } if (node.Value.Position > read.Position) { ReadsForPosition reads = new ReadsForPosition(); reads.Position = read.Position; reads.Reads.Add(buffer); _readBuffer.AddBefore(node, reads); return; } node = node.Next; } ReadsForPosition readList = new ReadsForPosition(); readList.Position = read.Position; readList.Reads.Add(buffer); _readBuffer.AddLast(readList); }
private void AdjustMates(string tmpFile, BamWriter writer) { // Second pass: Adjust flags on mates Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count); var read = new BamAlignment(); using (var reader = new BamReader(tmpFile)) { while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } // Adjust flags as needed: var mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1); RemapInfo info; if (!_remappings.TryGetValue(mateKey, out info)) { writer.WriteAlignment(read); continue; } if (info.Start == -1) { read.SetIsMateUnmapped(true); read.SetIsProperPair(false); read.FragmentLength = 0; } else { read.MatePosition = info.Start; } if (read.IsMateMapped() && read.IsProperPair()) { int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1; // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs read.FragmentLength = (read.Position < info.Start ? info.End - read.Position + 1 : info.Start - readEnd - 1); } writer.WriteAlignment(read); } } }
public void Process(string inputBam, string outFolder, StitcherOptions stitcherOptions) { var jobManager = new JobManager(10); var jobs = new List <IJob>(); var perChromBams = new List <string>(); // Process each of the chromosomes separately foreach (var chrom in _chroms) { var intermediateOutput = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + "." + chrom + ".stitched.bam"); perChromBams.Add(intermediateOutput); var stitcher = new BamStitcher(inputBam, intermediateOutput, stitcherOptions, chrFilter: chrom); jobs.Add(new GenericJob(() => stitcher.Execute(), "Stitcher_" + chrom)); } jobManager.Process(jobs); // Combine the per-chromosome bams Logger.WriteToLog("Writing final bam."); var outputBam = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + ".final.stitched.bam"); using (var finalOutput = new BamWriter(outputBam, _header, _references)) { foreach (var bam in perChromBams) { Logger.WriteToLog("Adding " + bam + " to final bam."); var bamAlignment = new BamAlignment(); using (var bamReader = new BamReader(bam)) { while (true) { var hasMoreReads = bamReader.GetNextAlignment(ref bamAlignment, false); if (!hasMoreReads) { break; } finalOutput.WriteAlignment(bamAlignment); } } File.Delete(bam); } } Logger.WriteToLog("Finished combining per-chromosome bams into final bam at " + outputBam); }
public void Initialize() { var outputDirectory = Path.GetDirectoryName(_outputFile); if (!Directory.Exists(outputDirectory)) { Directory.CreateDirectory(outputDirectory); } using (var reader = new BamReader(_inputFile)) { var genome = reader.GetReferences(); _bamWriter = new BamWriter(_temp1File, reader.GetHeader(), genome); } }
public void HappyPath() { //var bamFilePath = Path.Combine(TestPaths.SharedBamDirectory, "Chr17Chr19.bam"); //Assert.True(File.Exists(bamFilePath)); // TODO figure out how to access the shared bams var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath)) { File.Delete(tempPath); } using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>())) { bamWriter.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true)); } var bamWriterFactory = new BamWriterFactory(1, tempPath); var tempPath2 = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath2)) { File.Delete(tempPath2); } var bamWriterHandle = bamWriterFactory.CreateSingleBamWriter(tempPath2); bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCAG", 1, 10, 30, true)); bamWriterHandle.WriteAlignment(null); using (var reader = new BamReader(tempPath2)) { // TODO more specific? var header = reader.GetHeader(); Assert.Contains("ID:Gemini", header); Assert.Contains("PN:Gemini", header); } File.Delete(tempPath); File.Delete(tempPath2); }
public void HappyPath() { var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam"; if (File.Exists(tempPath)) { File.Delete(tempPath); } using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>())) { var bamWriterHandle = new BamWriterHandle(bamWriter); bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true)); bamWriterHandle.WriteAlignment(null); } Assert.True(File.Exists(tempPath)); File.Delete(tempPath); }
/// <summary> /// Seek to the unaligned (and mate-unaligned) reads at the tail of the input file, and write them all out to the output file. /// </summary> private void WriteUnalignedReads(BamWriter writer) { Logger.WriteToLog("Writing unaligned reads"); using (var reader = new BamReader(_inputFile)) { reader.JumpToUnaligned(); var read = new BamAlignment(); while (true) { var result = reader.GetNextAlignment(ref read, false); if (!result) { break; } if (read.RefID != -1) { continue; // skip over last reads } writer.WriteAlignment(read); } } }
public BamWriterHandle(BamWriter writer) { _writer = writer; }
public BamWriterWrapper(BamWriter writer) { _writer = writer; }