Example #1
0
        public void CreateBamReader()
        {
            // Should return a fully functional bam reader
            var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam";

            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }

            using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>()))
            {
                var bamWriterHandle = new BamWriterHandle(bamWriter);
                bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 8, 10, 30, true));
                bamWriterHandle.WriteAlignment(null);
            }

            Assert.True(File.Exists(tempPath));

            var stitcherOptions = new StitcherOptions();
            var factory         = new GeminiDataSourceFactory(stitcherOptions, "fakeGenomePath", false);

            using (var bamReader = factory.CreateBamReader(tempPath))
            {
                BamAlignment alignment = new BamAlignment();
                var          getNext   = bamReader.GetNextAlignment(ref alignment, true);
                Assert.True(getNext);
                Assert.Equal(7, alignment.Position);
            }

            File.Delete(tempPath);
        }
        public void Dispose()
        {
            if (_writer != null)
            {
                _writer.Close();
                _writer.Dispose();

                _writer = null;
            }
        }
Example #3
0
        /// <summary>
        /// We're finished processing a read.  Add it to the read buffer, and flush
        /// old nodes to disk.
        /// </summary>
        public void WriteRead(ref BamAlignment read, bool remapped)
        {
            if (remapped)
            {
                var info = new RemapInfo(read.Position,
                                         read.Position + (int)read.CigarData.GetReferenceSpan() - 1);
                _remappings[string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 1 : 2)] = info;
            }

            LinkedListNode <ReadsForPosition> node = _readBuffer.First;
            LinkedListNode <ReadsForPosition> nextNode;

            byte[] buffer = BamWriter.SerializeAlignment(ref read);
            while (node != null)
            {
                // Flush reads that are earlier than the earliest possible read we could see in the future (i.e. 2x max shift less than current read).
                // Reasoning: The realign shift could go in two directions. So, for example with max shift of 10, you could receive
                // and buffer reads in an order like this: 100, 100, 110, 115 (originally 110, shifted right to 115),
                // 100 (originally 110, shifted left to 100), 120 (originally 110, shifted right to 120),
                // 101 (originally 110, shifted left to 100), 121, 130, etc. If you had been flushing reads
                // using the max shift only as the threshold, upon hitting the 115 (fourth read) you would have
                // flushed the 100s (because 100 < 115 - 10), even though there still may be 100s coming through.
                // By the time we hit 121, we know that all of the reads we encounter in the future are going to be > 100 (at minimum, the 121 could represent a max-right-shift from 111 and other 111 reads could be max-left-shifted to 101).
                if (node.Value.Position < read.Position - (_maxRealignShift * 2))
                {
                    MinimumRealignmentStartPosition = Math.Max(MinimumRealignmentStartPosition, node.Value.Position);
                    nextNode = node.Next;
                    FlushBufferedRecords(node.Value);
                    _readBuffer.Remove(node);
                    node = nextNode;
                    continue;
                }
                if (node.Value.Position == read.Position)
                {
                    node.Value.Reads.Add(buffer);
                    return;
                }
                if (node.Value.Position > read.Position)
                {
                    ReadsForPosition reads = new ReadsForPosition();
                    reads.Position = read.Position;
                    reads.Reads.Add(buffer);
                    _readBuffer.AddBefore(node, reads);
                    return;
                }
                node = node.Next;
            }
            ReadsForPosition readList = new ReadsForPosition();

            readList.Position = read.Position;
            readList.Reads.Add(buffer);
            _readBuffer.AddLast(readList);
        }
Example #4
0
        private void AdjustMates(string tmpFile, BamWriter writer)
        {
            // Second pass: Adjust flags on mates
            Logger.WriteToLog("Writing reads with corrected mate flags, {0} total remapped reads", _remappings.Count);
            var read = new BamAlignment();

            using (var reader = new BamReader(tmpFile))
            {
                while (true)
                {
                    var result = reader.GetNextAlignment(ref read, false);
                    if (!result)
                    {
                        break;
                    }

                    // Adjust flags as needed:
                    var       mateKey = string.Format("{0}-{1}", read.Name, read.IsFirstMate() ? 2 : 1);
                    RemapInfo info;

                    if (!_remappings.TryGetValue(mateKey, out info))
                    {
                        writer.WriteAlignment(read);
                        continue;
                    }

                    if (info.Start == -1)
                    {
                        read.SetIsMateUnmapped(true);
                        read.SetIsProperPair(false);
                        read.FragmentLength = 0;
                    }
                    else
                    {
                        read.MatePosition = info.Start;
                    }
                    if (read.IsMateMapped() && read.IsProperPair())
                    {
                        int readEnd = read.Position + (int)read.CigarData.GetReferenceSpan() - 1;
                        // todo jg - should FragmentLength be 0 if the reads are mapped to diff chrs
                        read.FragmentLength = (read.Position < info.Start
                            ? info.End - read.Position + 1
                            : info.Start - readEnd - 1);
                    }

                    writer.WriteAlignment(read);
                }
            }
        }
Example #5
0
        public void Process(string inputBam, string outFolder, StitcherOptions stitcherOptions)
        {
            var jobManager   = new JobManager(10);
            var jobs         = new List <IJob>();
            var perChromBams = new List <string>();

            // Process each of the chromosomes separately
            foreach (var chrom in _chroms)
            {
                var intermediateOutput = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + "." + chrom + ".stitched.bam");
                perChromBams.Add(intermediateOutput);
                var stitcher = new BamStitcher(inputBam, intermediateOutput, stitcherOptions, chrFilter: chrom);
                jobs.Add(new GenericJob(() => stitcher.Execute(), "Stitcher_" + chrom));
            }

            jobManager.Process(jobs);

            // Combine the per-chromosome bams
            Logger.WriteToLog("Writing final bam.");

            var outputBam = Path.Combine(outFolder, Path.GetFileNameWithoutExtension(inputBam) + ".final.stitched.bam");

            using (var finalOutput = new BamWriter(outputBam, _header, _references))
            {
                foreach (var bam in perChromBams)
                {
                    Logger.WriteToLog("Adding " + bam + " to final bam.");
                    var bamAlignment = new BamAlignment();

                    using (var bamReader = new BamReader(bam))
                    {
                        while (true)
                        {
                            var hasMoreReads = bamReader.GetNextAlignment(ref bamAlignment, false);
                            if (!hasMoreReads)
                            {
                                break;
                            }
                            finalOutput.WriteAlignment(bamAlignment);
                        }
                    }

                    File.Delete(bam);
                }
            }

            Logger.WriteToLog("Finished combining per-chromosome bams into final bam at " + outputBam);
        }
Example #6
0
        public void Initialize()
        {
            var outputDirectory = Path.GetDirectoryName(_outputFile);

            if (!Directory.Exists(outputDirectory))
            {
                Directory.CreateDirectory(outputDirectory);
            }

            using (var reader = new BamReader(_inputFile))
            {
                var genome = reader.GetReferences();

                _bamWriter = new BamWriter(_temp1File, reader.GetHeader(), genome);
            }
        }
Example #7
0
        public void HappyPath()
        {
            //var bamFilePath = Path.Combine(TestPaths.SharedBamDirectory, "Chr17Chr19.bam");
            //Assert.True(File.Exists(bamFilePath));
            // TODO figure out how to access the shared bams

            var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam";

            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }

            using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>()))
            {
                bamWriter.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true));
            }

            var bamWriterFactory = new BamWriterFactory(1, tempPath);

            var tempPath2 = $"TemporaryBamFile_{Guid.NewGuid()}.bam";

            if (File.Exists(tempPath2))
            {
                File.Delete(tempPath2);
            }

            var bamWriterHandle = bamWriterFactory.CreateSingleBamWriter(tempPath2);

            bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCAG", 1, 10, 30, true));
            bamWriterHandle.WriteAlignment(null);

            using (var reader = new BamReader(tempPath2))
            {
                // TODO more specific?
                var header = reader.GetHeader();
                Assert.Contains("ID:Gemini", header);
                Assert.Contains("PN:Gemini", header);
            }

            File.Delete(tempPath);
            File.Delete(tempPath2);
        }
Example #8
0
        public void HappyPath()
        {
            var tempPath = $"TemporaryBamFile_{Guid.NewGuid()}.bam";

            if (File.Exists(tempPath))
            {
                File.Delete(tempPath);
            }

            using (var bamWriter = new BamWriter(tempPath, "header", new List <GenomeMetadata.SequenceMetadata>()))
            {
                var bamWriterHandle = new BamWriterHandle(bamWriter);
                bamWriterHandle.WriteAlignment(TestHelpers.CreateBamAlignment("ATCG", 1, 10, 30, true));
                bamWriterHandle.WriteAlignment(null);
            }

            Assert.True(File.Exists(tempPath));

            File.Delete(tempPath);
        }
Example #9
0
 /// <summary>
 /// Seek to the unaligned (and mate-unaligned) reads at the tail of the input file, and write them all out to the output file.
 /// </summary>
 private void WriteUnalignedReads(BamWriter writer)
 {
     Logger.WriteToLog("Writing unaligned reads");
     using (var reader = new BamReader(_inputFile))
     {
         reader.JumpToUnaligned();
         var read = new BamAlignment();
         while (true)
         {
             var result = reader.GetNextAlignment(ref read, false);
             if (!result)
             {
                 break;
             }
             if (read.RefID != -1)
             {
                 continue;                   // skip over last reads
             }
             writer.WriteAlignment(read);
         }
     }
 }
 public BamWriterHandle(BamWriter writer)
 {
     _writer = writer;
 }
 public BamWriterWrapper(BamWriter writer)
 {
     _writer = writer;
 }