// Refactored to remove this block from GetAlignmentMap() private SAMAlignedSequence BamIndexing(SAMAlignedSequence alignedSeq, BAMReferenceIndexes refIndices, BAMIndex index, ulong lastcOffset, ushort lastuOffset, ref Chunk lastChunk) { int lastBin = int.MaxValue; Bin bin; Chunk chunk; int lastRefSeqIndex = 0; int curRefSeqIndex; #region BAM indexing if (createBamIndex) { curRefSeqIndex = refSeqNames.IndexOf(alignedSeq.RName); if (lastRefSeqIndex != curRefSeqIndex) { refIndices = index.RefIndexes[curRefSeqIndex]; lastBin = int.MaxValue; lastRefSeqIndex = curRefSeqIndex; } if (lastBin != alignedSeq.Bin) { bin = refIndices.Bins.FirstOrDefault(B => B.BinNumber == alignedSeq.Bin); if (bin == null) { bin = new Bin(); bin.BinNumber = (uint)alignedSeq.Bin; refIndices.Bins.Add(bin); } if (lastChunk != null) { lastChunk.ChunkEnd.CompressedBlockOffset = lastcOffset; lastChunk.ChunkEnd.UncompressedBlockOffset = lastuOffset; } chunk = new Chunk(); chunk.ChunkStart = new FileOffset(); chunk.ChunkEnd = new FileOffset(); chunk.ChunkStart.CompressedBlockOffset = lastcOffset; chunk.ChunkStart.UncompressedBlockOffset = lastuOffset; bin.Chunks.Add(chunk); lastChunk = chunk; lastBin = alignedSeq.Bin; } // store linear index other than 16k bins, that is bin number less than 4681. if (alignedSeq.Bin < 4681) { int pos = alignedSeq.Pos > 0 ? alignedSeq.Pos - 1 : 0; int end = alignedSeq.RefEndPos > 0 ? alignedSeq.RefEndPos - 1 : 0; pos = pos >> 14; end = end >> 14; if (refIndices.LinearOffsets.Count == 0) { refIndices.LinearOffsets.Add(new FileOffset()); } if (refIndices.LinearOffsets.Count <= end) { for (int i = refIndices.LinearOffsets.Count; i <= end; i++) { refIndices.LinearOffsets.Add(new FileOffset()); } } for (int i = pos + 1; i <= end; i++) { FileOffset offset = refIndices.LinearOffsets[i]; if (offset.CompressedBlockOffset == 0 && offset.UncompressedBlockOffset == 0) { offset.CompressedBlockOffset = lastcOffset; offset.UncompressedBlockOffset = lastuOffset; } } } } #endregion return alignedSeq; }
/// <summary> /// Returns BAMIndex instance by parsing BAM index source. /// </summary> public BAMIndex Read() { if (Source == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } BAMIndex bamIndex = new BAMIndex(); byte[] arrays = new byte[20]; Read(arrays, 0, 4); if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1) { throw new FormatException(Properties.Resource.BAM_InvalidIndexFile); } Read(arrays, 0, 4); int n_ref = Helper.GetInt32(arrays, 0); for (Int32 refindex = 0; refindex < n_ref; refindex++) { BAMReferenceIndexes bamindices = new BAMReferenceIndexes(); bamIndex.RefIndexes.Add(bamindices); Read(arrays, 0, 4); int n_bin = Helper.GetInt32(arrays, 0); for (Int32 binIndex = 0; binIndex < n_bin; binIndex++) { Bin bin = new Bin(); Read(arrays, 0, 4); bin.BinNumber = Helper.GetUInt32(arrays, 0); Read(arrays, 0, 4); int n_chunk = Helper.GetInt32(arrays, 0); if (bin.BinNumber == MaxBins)//some groups use this to place meta-data, such as the picard toolkit and now SAMTools { //Meta data was later added in to the SAMTools specification for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { bamindices.HasMetaData = true; Read(arrays, 0, 8); bamindices.MappedReadsCount = Helper.GetUInt64(arrays, 0); Read(arrays, 0, 8); bamindices.UnMappedReadsCount = Helper.GetUInt64(arrays, 0); } } else if (bin.BinNumber > MaxBins) { throw new Exception("BAM Index is incorrectly formatted. Bin number specified is higher than the maximum allowed."); } else { bamindices.Bins.Add(bin); for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { Chunk chunk = new Chunk(); bin.Chunks.Add(chunk); Read(arrays, 0, 8); chunk.ChunkStart = GetBAMOffset(arrays, 0); Read(arrays, 0, 8); chunk.ChunkEnd = GetBAMOffset(arrays, 0); } } } //Get number of linear bins Read(arrays, 0, 4); int n_intv = Helper.GetInt32(arrays, 0); for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++) { FileOffset value; Read(arrays, 0, 8); value = GetBAMOffset(arrays, 0); bamindices.LinearIndex.Add(value); } } return bamIndex; }
/// <summary> /// Returns BAMIndex instance by parsing BAM index source. /// </summary> public BAMIndex Read() { if (sourceStream == null) { throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed); } BAMIndex bamIndex = new BAMIndex(); byte[] arrays = new byte[20]; Read(arrays, 0, 4); if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1) { throw new FormatException(Properties.Resource.BAM_InvalidIndexFile); } Read(arrays, 0, 4); int n_ref = Helper.GetInt32(arrays, 0); for (Int32 refindex = 0; refindex < n_ref; refindex++) { BAMReferenceIndexes bamindices = new BAMReferenceIndexes(); bamIndex.RefIndexes.Add(bamindices); Read(arrays, 0, 4); int n_bin = Helper.GetInt32(arrays, 0); for (Int32 binIndex = 0; binIndex < n_bin; binIndex++) { Bin bin = new Bin(); bamindices.Bins.Add(bin); Read(arrays, 0, 4); bin.BinNumber = Helper.GetUInt32(arrays, 0); Read(arrays, 0, 4); int n_chunk = Helper.GetInt32(arrays, 0); for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++) { Chunk chunk = new Chunk(); bin.Chunks.Add(chunk); Read(arrays, 0, 8); chunk.ChunkStart = GetBAMOffset(arrays, 0); Read(arrays, 0, 8); chunk.ChunkEnd = GetBAMOffset(arrays, 0); } } Read(arrays, 0, 4); int n_intv = Helper.GetInt32(arrays, 0); for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++) { FileOffset value; Read(arrays, 0, 8); value = GetBAMOffset(arrays, 0); bamindices.LinearOffsets.Add(value); } } return(bamIndex); }