/// <summary> /// Writes specified BAMIndex data. /// </summary> /// <param name="bamIndex">BAMIndex instance to write.</param> public void Write(BAMIndex bamIndex) { if (bamIndex == null) { throw new ArgumentNullException("bamIndex"); } if (sourceStream == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } byte[] arrays = new byte[20]; byte[] magic = new byte[] { 66, 65, 73, 1 }; Write(magic, 0, 4); arrays = Helper.GetLittleEndianByteArray(bamIndex.RefIndexes.Count); Write(arrays, 0, 4); for (Int32 refindex = 0; refindex < bamIndex.RefIndexes.Count; refindex++) { BAMReferenceIndexes bamindices = bamIndex.RefIndexes[refindex]; arrays = Helper.GetLittleEndianByteArray(bamindices.Bins.Count); Write(arrays, 0, 4); for (Int32 binIndex = 0; binIndex < bamindices.Bins.Count; binIndex++) { Bin bin = bamindices.Bins[binIndex]; arrays = Helper.GetLittleEndianByteArray(bin.BinNumber); Write(arrays, 0, 4); arrays = Helper.GetLittleEndianByteArray(bin.Chunks.Count); Write(arrays, 0, 4); for (Int32 chunkIndex = 0; chunkIndex < bin.Chunks.Count; chunkIndex++) { Chunk chunk = bin.Chunks[chunkIndex]; arrays = GetBAMOffsetArray(chunk.ChunkStart); Write(arrays, 0, 8); arrays = GetBAMOffsetArray(chunk.ChunkEnd); Write(arrays, 0, 8); } } arrays = Helper.GetLittleEndianByteArray(bamindices.LinearOffsets.Count); Write(arrays, 0, 4); for (Int32 offsetIndex = 0; offsetIndex < bamindices.LinearOffsets.Count; offsetIndex++) { FileOffset value = bamindices.LinearOffsets[offsetIndex]; arrays = GetBAMOffsetArray(value); Write(arrays, 0, 8); } sourceStream.Flush(); } sourceStream.Flush(); }
public IEnumerable <CompactSAMSequence> ParseRangeAsEnumerableSequences(string fileName, string refSeqName, int start = 0, int end = Int32.MaxValue) { if (refSeqName == null) { throw new ArgumentNullException("refSeqName"); } using (FileStream bamStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { string bamIndexFileName = getBAMIndexFileName(fileName); using (BAMIndexFile bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read)) { readStream = bamStream; if (readStream == null || readStream.Length == 0) { throw new FileFormatException(Properties.Resource.BAM_InvalidBAMFile); } ValidateReader(); SAMAlignmentHeader header = GetHeader(); // verify whether there is any reads related to chromosome. int refSeqIndex = refSeqNames.IndexOf(refSeqName); if (refSeqIndex < 0) { string message = string.Format(CultureInfo.InvariantCulture, Properties.Resource.BAM_RefSeqNotFound, refSeqName); throw new ArgumentException(message, "refSeqName"); } BAMIndex bamIndexInfo = bamIndexFile.Read(); BAMReferenceIndexes refIndex = bamIndexInfo.RefIndexes[refSeqIndex]; IList <Chunk> chunks = GetChunks(refIndex, start, end); foreach (var s in EnumerateAlignedSequences(chunks)) { if (s != null && (s.RName == "*" || (s.Pos >= (start - 1) && s.RefEndPos < end))) { yield return(s); } } readStream = null; } } }
/// <summary> /// Writes specified BAMIndex data. /// </summary> /// <param name="bamIndex">BAMIndex instance to write.</param> public void Write(BAMIndex bamIndex) { if (bamIndex == null) { throw new ArgumentNullException("bamIndex"); } if (sourceStream == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } byte[] arrays = new byte[20]; byte[] magic = new byte[] { 66, 65, 73, 1 }; Write(magic, 0, 4); arrays = Helper.GetLittleEndianByteArray(bamIndex.RefIndexes.Count); Write(arrays, 0, 4); for (Int32 refindex = 0; refindex < bamIndex.RefIndexes.Count; refindex++) { BAMReferenceIndexes bamindices = bamIndex.RefIndexes[refindex]; int binCount = bamindices.Bins.Count; bool addingMetaData = bamindices.HasMetaData && BitConverter.IsLittleEndian; if (addingMetaData) { binCount++; } arrays = Helper.GetLittleEndianByteArray(binCount); Write(arrays, 0, 4); //Write each bin for (Int32 binIndex = 0; binIndex < bamindices.Bins.Count; binIndex++) { Bin bin = bamindices.Bins[binIndex]; arrays = Helper.GetLittleEndianByteArray(bin.BinNumber); Write(arrays, 0, 4); int chunkCount = bin.Chunks.Count; arrays = Helper.GetLittleEndianByteArray(chunkCount); Write(arrays, 0, 4); for (Int32 chunkIndex = 0; chunkIndex < bin.Chunks.Count; chunkIndex++) { Chunk chunk = bin.Chunks[chunkIndex]; arrays = GetBAMOffsetArray(chunk.ChunkStart); Write(arrays, 0, 8); arrays = GetBAMOffsetArray(chunk.ChunkEnd); Write(arrays, 0, 8); } } //Add Meta Data - this varies by implementation, .NET Bio will do start and //end of reads found in file and then mapped/unmapped //TODO: Assumes little endian, only adds if so if (addingMetaData) { //Dummy bin to indicate meta-data arrays = Helper.GetLittleEndianByteArray(BAMIndexFile.MAX_BINS); Write(arrays, 0, 4); //2 chunks worth of meta data //first the file offsets arrays = Helper.GetLittleEndianByteArray((int)2); Write(arrays, 0, 4); arrays = GetBAMOffsetArray(bamindices.FirstOffSetSeen); Write(arrays, 0, 8); arrays = GetBAMOffsetArray(bamindices.LastOffSetSeen); Write(arrays, 0, 8); arrays = BitConverter.GetBytes(bamindices.MappedReadsCount); Write(arrays, 0, 8); arrays = BitConverter.GetBytes(bamindices.UnMappedReadsCount); Write(arrays, 0, 8); } arrays = Helper.GetLittleEndianByteArray(bamindices.LinearIndex.Count); Write(arrays, 0, 4); for (Int32 offsetIndex = 0; offsetIndex < bamindices.LinearIndex.Count; offsetIndex++) { FileOffset value = bamindices.LinearIndex[offsetIndex]; arrays = GetBAMOffsetArray(value); Write(arrays, 0, 8); } sourceStream.Flush(); } sourceStream.Flush(); }
/// <summary> /// Returns BAMIndex instance by parsing BAM index source. /// </summary> public BAMIndex Read() { if (sourceStream == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } BAMIndex bamIndex = new BAMIndex(); byte[] arrays = new byte[20]; Read(arrays, 0, 4); if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1) { throw new FormatException(Properties.Resource.BAM_InvalidIndexFile); } Read(arrays, 0, 4); int n_ref = Helper.GetInt32(arrays, 0); for (Int32 refindex = 0; refindex < n_ref; refindex++) { BAMReferenceIndexes bamindices = new BAMReferenceIndexes(); bamIndex.RefIndexes.Add(bamindices); Read(arrays, 0, 4); int n_bin = Helper.GetInt32(arrays, 0); for (Int32 binIndex = 0; binIndex < n_bin; binIndex++) { Bin bin = new Bin(); Read(arrays, 0, 4); bin.BinNumber = Helper.GetUInt32(arrays, 0); Read(arrays, 0, 4); int n_chunk = Helper.GetInt32(arrays, 0); if (bin.BinNumber == MAX_BINS)//some groups use this to place meta-data, such as the picard toolkit and now SAMTools { //Meta data was later added in to the SAMTools specification for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { bamindices.HasMetaData = true; Read(arrays, 0, 8); bamindices.MappedReadsCount = Helper.GetUInt64(arrays, 0); Read(arrays, 0, 8); bamindices.UnMappedReadsCount = Helper.GetUInt64(arrays, 0); } } else if (bin.BinNumber > MAX_BINS) { throw new FileFormatException("BAM Index is incorrectly formatted. Bin number specified is higher than the maximum allowed."); } else { bamindices.Bins.Add(bin); for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { Chunk chunk = new Chunk(); bin.Chunks.Add(chunk); Read(arrays, 0, 8); chunk.ChunkStart = GetBAMOffset(arrays, 0); Read(arrays, 0, 8); chunk.ChunkEnd = GetBAMOffset(arrays, 0); } } } //Get number of linear bins Read(arrays, 0, 4); int n_intv = Helper.GetInt32(arrays, 0); for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++) { FileOffset value; Read(arrays, 0, 8); value = GetBAMOffset(arrays, 0); bamindices.LinearIndex.Add(value); } } return(bamIndex); }
/// <summary> /// Returns BAMIndex instance by parsing BAM index source. /// </summary> public BAMIndex Read() { if (sourceStream == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } BAMIndex bamIndex = new BAMIndex(); byte[] arrays = new byte[20]; Read(arrays, 0, 4); if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1) { throw new FormatException(Properties.Resource.BAM_InvalidIndexFile); } Read(arrays, 0, 4); int n_ref = Helper.GetInt32(arrays, 0); for (Int32 refindex = 0; refindex < n_ref; refindex++) { BAMReferenceIndexes bamindices = new BAMReferenceIndexes(); bamIndex.RefIndexes.Add(bamindices); Read(arrays, 0, 4); int n_bin = Helper.GetInt32(arrays, 0); for (Int32 binIndex = 0; binIndex < n_bin; binIndex++) { Bin bin = new Bin(); bamindices.Bins.Add(bin); Read(arrays, 0, 4); bin.BinNumber = Helper.GetUInt32(arrays, 0); Read(arrays, 0, 4); int n_chunk = Helper.GetInt32(arrays, 0); for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { Chunk chunk = new Chunk(); bin.Chunks.Add(chunk); Read(arrays, 0, 8); chunk.ChunkStart = GetBAMOffset(arrays, 0); Read(arrays, 0, 8); chunk.ChunkEnd = GetBAMOffset(arrays, 0); } } Read(arrays, 0, 4); int n_intv = Helper.GetInt32(arrays, 0); for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++) { FileOffset value; Read(arrays, 0, 8); value = GetBAMOffset(arrays, 0); bamindices.LinearOffsets.Add(value); } } return(bamIndex); }
/// <summary> /// Returns BAMIndex instance by parsing BAM index source. /// </summary> public BAMIndex Read() { if (Source == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } BAMIndex bamIndex = new BAMIndex(); byte[] arrays = new byte[20]; Read(arrays, 0, 4); if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1) { throw new FormatException(Properties.Resource.BAM_InvalidIndexFile); } Read(arrays, 0, 4); int n_ref = Helper.GetInt32(arrays, 0); for (Int32 refindex = 0; refindex < n_ref; refindex++) { BAMReferenceIndexes bamindices = new BAMReferenceIndexes(); bamIndex.RefIndexes.Add(bamindices); Read(arrays, 0, 4); int n_bin = Helper.GetInt32(arrays, 0); for (Int32 binIndex = 0; binIndex < n_bin; binIndex++) { Bin bin = new Bin(); Read(arrays, 0, 4); bin.BinNumber = Helper.GetUInt32(arrays, 0); Read(arrays, 0, 4); int n_chunk = Helper.GetInt32(arrays, 0); if (bin.BinNumber == MaxBins)//some groups use this to place meta-data, such as the picard toolkit and now SAMTools { //Meta data was later added in to the SAMTools specification for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { bamindices.HasMetaData = true; Read(arrays, 0, 8); bamindices.MappedReadsCount = Helper.GetUInt64(arrays, 0); Read(arrays, 0, 8); bamindices.UnMappedReadsCount = Helper.GetUInt64(arrays, 0); } } else if (bin.BinNumber > MaxBins) { throw new Exception("BAM Index is incorrectly formatted. Bin number specified is higher than the maximum allowed."); } else { bamindices.Bins.Add(bin); for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { Chunk chunk = new Chunk(); bin.Chunks.Add(chunk); Read(arrays, 0, 8); chunk.ChunkStart = GetBAMOffset(arrays, 0); Read(arrays, 0, 8); chunk.ChunkEnd = GetBAMOffset(arrays, 0); } } } //Get number of linear bins Read(arrays, 0, 4); int n_intv = Helper.GetInt32(arrays, 0); for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++) { FileOffset value; Read(arrays, 0, 8); value = GetBAMOffset(arrays, 0); bamindices.LinearIndex.Add(value); } } return bamIndex; }
// Refactored to remove this block from GetAlignmentMap() private SAMAlignedSequence BamIndexing(SAMAlignedSequence alignedSeq, BAMReferenceIndexes refIndices, BAMIndex index, ulong lastcOffset, ushort lastuOffset, ref Chunk lastChunk) { int lastBin = int.MaxValue; Bin bin; Chunk chunk; int lastRefSeqIndex = 0; int curRefSeqIndex; #region BAM indexing if (createBamIndex) { curRefSeqIndex = refSeqNames.IndexOf(alignedSeq.RName); if (lastRefSeqIndex != curRefSeqIndex) { refIndices = index.RefIndexes[curRefSeqIndex]; lastBin = int.MaxValue; lastRefSeqIndex = curRefSeqIndex; } if (lastBin != alignedSeq.Bin) { bin = refIndices.Bins.FirstOrDefault(B => B.BinNumber == alignedSeq.Bin); if (bin == null) { bin = new Bin(); bin.BinNumber = (uint)alignedSeq.Bin; refIndices.Bins.Add(bin); } if (lastChunk != null) { lastChunk.ChunkEnd.CompressedBlockOffset = lastcOffset; lastChunk.ChunkEnd.UncompressedBlockOffset = lastuOffset; } chunk = new Chunk(); chunk.ChunkStart = new FileOffset(); chunk.ChunkEnd = new FileOffset(); chunk.ChunkStart.CompressedBlockOffset = lastcOffset; chunk.ChunkStart.UncompressedBlockOffset = lastuOffset; bin.Chunks.Add(chunk); lastChunk = chunk; lastBin = alignedSeq.Bin; } // store linear index other than 16k bins, that is bin number less than 4681. if (alignedSeq.Bin < 4681) { int pos = alignedSeq.Pos > 0 ? alignedSeq.Pos - 1 : 0; int end = alignedSeq.RefEndPos > 0 ? alignedSeq.RefEndPos - 1 : 0; pos = pos >> 14; end = end >> 14; if (refIndices.LinearOffsets.Count == 0) { refIndices.LinearOffsets.Add(new FileOffset()); } if (refIndices.LinearOffsets.Count <= end) { for (int i = refIndices.LinearOffsets.Count; i <= end; i++) { refIndices.LinearOffsets.Add(new FileOffset()); } } for (int i = pos + 1; i <= end; i++) { FileOffset offset = refIndices.LinearOffsets[i]; if (offset.CompressedBlockOffset == 0 && offset.UncompressedBlockOffset == 0) { offset.CompressedBlockOffset = lastcOffset; offset.UncompressedBlockOffset = lastuOffset; } } } } #endregion return alignedSeq; }
// Gets chunks for specified ref seq index, start and end co-ordinate this method considers linear index also. private static IList<Chunk> GetChunks(BAMReferenceIndexes refIndex, int start, int end) { List<Chunk> chunks = new List<Chunk>(); IList<uint> binnumbers = Reg2Bins((uint)start, (uint)end); List<Bin> bins = refIndex.Bins.Where(B => binnumbers.Contains(B.BinNumber)).ToList(); // consider linear indexing only for the bins less than 4681. foreach (Bin bin in bins.Where(B => B.BinNumber < 4681)) { chunks.InsertRange(chunks.Count, bin.Chunks); } int index = start / (16 * 1024); // Linear indexing window size is 16K if (refIndex.LinearOffsets.Count > index) { FileOffset offset = refIndex.LinearOffsets[index]; chunks = chunks.Where(C => C.ChunkEnd.CompressedBlockOffset > offset.CompressedBlockOffset || (C.ChunkEnd.CompressedBlockOffset == offset.CompressedBlockOffset && C.ChunkEnd.UncompressedBlockOffset > offset.UncompressedBlockOffset)).ToList(); } // add chunks for the bin numbers greater than 4681. foreach (Bin bin in bins.Where(B => B.BinNumber >= 4681)) { chunks.InsertRange(chunks.Count, bin.Chunks); } return SortAndMergeChunks(chunks); }
// Gets all chunks for the specified ref sequence index. private static IList<Chunk> GetChunks(BAMReferenceIndexes refIndex) { List<Chunk> chunks = new List<Chunk>(); foreach (Bin bin in refIndex.Bins) { chunks.InsertRange(chunks.Count, bin.Chunks); } return SortAndMergeChunks(chunks); }