Example #1
0
        /// <summary>
        /// Writes specified BAMIndex data.
        /// </summary>
        /// <param name="bamIndex">BAMIndex instance to write.</param>
        public void Write(BAMIndex bamIndex)
        {
            if (bamIndex == null)
            {
                throw new ArgumentNullException("bamIndex");
            }

            if (sourceStream == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }

            byte[] arrays = new byte[20];

            byte[] magic = new byte[] { 66, 65, 73, 1 };
            Write(magic, 0, 4);

            arrays = Helper.GetLittleEndianByteArray(bamIndex.RefIndexes.Count);
            Write(arrays, 0, 4);

            for (Int32 refindex = 0; refindex < bamIndex.RefIndexes.Count; refindex++)
            {
                BAMReferenceIndexes bamindices = bamIndex.RefIndexes[refindex];
                arrays = Helper.GetLittleEndianByteArray(bamindices.Bins.Count);
                Write(arrays, 0, 4);

                for (Int32 binIndex = 0; binIndex < bamindices.Bins.Count; binIndex++)
                {
                    Bin bin = bamindices.Bins[binIndex];
                    arrays = Helper.GetLittleEndianByteArray(bin.BinNumber);
                    Write(arrays, 0, 4);

                    arrays = Helper.GetLittleEndianByteArray(bin.Chunks.Count);
                    Write(arrays, 0, 4);

                    for (Int32 chunkIndex = 0; chunkIndex < bin.Chunks.Count; chunkIndex++)
                    {
                        Chunk chunk = bin.Chunks[chunkIndex];
                        arrays = GetBAMOffsetArray(chunk.ChunkStart);
                        Write(arrays, 0, 8);
                        arrays = GetBAMOffsetArray(chunk.ChunkEnd);
                        Write(arrays, 0, 8);
                    }
                }

                arrays = Helper.GetLittleEndianByteArray(bamindices.LinearOffsets.Count);
                Write(arrays, 0, 4);

                for (Int32 offsetIndex = 0; offsetIndex < bamindices.LinearOffsets.Count; offsetIndex++)
                {
                    FileOffset value = bamindices.LinearOffsets[offsetIndex];
                    arrays = GetBAMOffsetArray(value);
                    Write(arrays, 0, 8);
                }

                sourceStream.Flush();
            }

            sourceStream.Flush();
        }
 public IEnumerable <CompactSAMSequence> ParseRangeAsEnumerableSequences(string fileName, string refSeqName, int start = 0, int end = Int32.MaxValue)
 {
     if (refSeqName == null)
     {
         throw new ArgumentNullException("refSeqName");
     }
     using (FileStream bamStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
     {
         string bamIndexFileName = getBAMIndexFileName(fileName);
         using (BAMIndexFile bamIndexFile = new BAMIndexFile(bamIndexFileName, FileMode.Open, FileAccess.Read))
         {
             readStream = bamStream;
             if (readStream == null || readStream.Length == 0)
             {
                 throw new FileFormatException(Properties.Resource.BAM_InvalidBAMFile);
             }
             ValidateReader();
             SAMAlignmentHeader header = GetHeader();
             // verify whether there is any reads related to chromosome.
             int refSeqIndex = refSeqNames.IndexOf(refSeqName);
             if (refSeqIndex < 0)
             {
                 string message = string.Format(CultureInfo.InvariantCulture, Properties.Resource.BAM_RefSeqNotFound, refSeqName);
                 throw new ArgumentException(message, "refSeqName");
             }
             BAMIndex            bamIndexInfo = bamIndexFile.Read();
             BAMReferenceIndexes refIndex     = bamIndexInfo.RefIndexes[refSeqIndex];
             IList <Chunk>       chunks       = GetChunks(refIndex, start, end);
             foreach (var s in EnumerateAlignedSequences(chunks))
             {
                 if (s != null && (s.RName == "*" || (s.Pos >= (start - 1) && s.RefEndPos < end)))
                 {
                     yield return(s);
                 }
             }
             readStream = null;
         }
     }
 }
Example #3
0
        /// <summary>
        /// Writes specified BAMIndex data.
        /// </summary>
        /// <param name="bamIndex">BAMIndex instance to write.</param>
        public void Write(BAMIndex bamIndex)
        {
            if (bamIndex == null)
            {
                throw new ArgumentNullException("bamIndex");
            }
            if (sourceStream == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }
            byte[] arrays = new byte[20];

            byte[] magic = new byte[] { 66, 65, 73, 1 };
            Write(magic, 0, 4);

            arrays = Helper.GetLittleEndianByteArray(bamIndex.RefIndexes.Count);
            Write(arrays, 0, 4);

            for (Int32 refindex = 0; refindex < bamIndex.RefIndexes.Count; refindex++)
            {
                BAMReferenceIndexes bamindices = bamIndex.RefIndexes[refindex];
                int  binCount       = bamindices.Bins.Count;
                bool addingMetaData = bamindices.HasMetaData && BitConverter.IsLittleEndian;
                if (addingMetaData)
                {
                    binCount++;
                }
                arrays = Helper.GetLittleEndianByteArray(binCount);
                Write(arrays, 0, 4);
                //Write each bin
                for (Int32 binIndex = 0; binIndex < bamindices.Bins.Count; binIndex++)
                {
                    Bin bin = bamindices.Bins[binIndex];
                    arrays = Helper.GetLittleEndianByteArray(bin.BinNumber);
                    Write(arrays, 0, 4);
                    int chunkCount = bin.Chunks.Count;

                    arrays = Helper.GetLittleEndianByteArray(chunkCount);
                    Write(arrays, 0, 4);
                    for (Int32 chunkIndex = 0; chunkIndex < bin.Chunks.Count; chunkIndex++)
                    {
                        Chunk chunk = bin.Chunks[chunkIndex];
                        arrays = GetBAMOffsetArray(chunk.ChunkStart);
                        Write(arrays, 0, 8);
                        arrays = GetBAMOffsetArray(chunk.ChunkEnd);
                        Write(arrays, 0, 8);
                    }
                }
                //Add Meta Data - this varies by implementation, .NET Bio will do start and
                //end of reads found in file and then mapped/unmapped
                //TODO: Assumes little endian, only adds if so
                if (addingMetaData)
                {
                    //Dummy bin to indicate meta-data
                    arrays = Helper.GetLittleEndianByteArray(BAMIndexFile.MAX_BINS);
                    Write(arrays, 0, 4);
                    //2 chunks worth of meta data
                    //first the file offsets
                    arrays = Helper.GetLittleEndianByteArray((int)2);
                    Write(arrays, 0, 4);
                    arrays = GetBAMOffsetArray(bamindices.FirstOffSetSeen);
                    Write(arrays, 0, 8);
                    arrays = GetBAMOffsetArray(bamindices.LastOffSetSeen);
                    Write(arrays, 0, 8);
                    arrays = BitConverter.GetBytes(bamindices.MappedReadsCount);
                    Write(arrays, 0, 8);
                    arrays = BitConverter.GetBytes(bamindices.UnMappedReadsCount);
                    Write(arrays, 0, 8);
                }
                arrays = Helper.GetLittleEndianByteArray(bamindices.LinearIndex.Count);
                Write(arrays, 0, 4);
                for (Int32 offsetIndex = 0; offsetIndex < bamindices.LinearIndex.Count; offsetIndex++)
                {
                    FileOffset value = bamindices.LinearIndex[offsetIndex];
                    arrays = GetBAMOffsetArray(value);
                    Write(arrays, 0, 8);
                }
                sourceStream.Flush();
            }
            sourceStream.Flush();
        }
Example #4
0
        /// <summary>
        /// Returns BAMIndex instance by parsing BAM index source.
        /// </summary>
        public BAMIndex Read()
        {
            if (sourceStream == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }

            BAMIndex bamIndex = new BAMIndex();

            byte[] arrays = new byte[20];

            Read(arrays, 0, 4);

            if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1)
            {
                throw new FormatException(Properties.Resource.BAM_InvalidIndexFile);
            }
            Read(arrays, 0, 4);
            int n_ref = Helper.GetInt32(arrays, 0);

            for (Int32 refindex = 0; refindex < n_ref; refindex++)
            {
                BAMReferenceIndexes bamindices = new BAMReferenceIndexes();
                bamIndex.RefIndexes.Add(bamindices);
                Read(arrays, 0, 4);
                int n_bin = Helper.GetInt32(arrays, 0);
                for (Int32 binIndex = 0; binIndex < n_bin; binIndex++)
                {
                    Bin bin = new Bin();
                    Read(arrays, 0, 4);
                    bin.BinNumber = Helper.GetUInt32(arrays, 0);
                    Read(arrays, 0, 4);
                    int n_chunk = Helper.GetInt32(arrays, 0);
                    if (bin.BinNumber == MAX_BINS)//some groups use this to place meta-data, such as the picard toolkit and now SAMTools
                    {
                        //Meta data was later added in to the SAMTools specification
                        for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                        {
                            bamindices.HasMetaData = true;
                            Read(arrays, 0, 8);
                            bamindices.MappedReadsCount = Helper.GetUInt64(arrays, 0);
                            Read(arrays, 0, 8);
                            bamindices.UnMappedReadsCount = Helper.GetUInt64(arrays, 0);
                        }
                    }
                    else if (bin.BinNumber > MAX_BINS)
                    {
                        throw new FileFormatException("BAM Index is incorrectly formatted.  Bin number specified is higher than the maximum allowed.");
                    }
                    else
                    {
                        bamindices.Bins.Add(bin);
                        for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                        {
                            Chunk chunk = new Chunk();
                            bin.Chunks.Add(chunk);
                            Read(arrays, 0, 8);
                            chunk.ChunkStart = GetBAMOffset(arrays, 0);
                            Read(arrays, 0, 8);
                            chunk.ChunkEnd = GetBAMOffset(arrays, 0);
                        }
                    }
                }
                //Get number of linear bins
                Read(arrays, 0, 4);
                int n_intv = Helper.GetInt32(arrays, 0);

                for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++)
                {
                    FileOffset value;
                    Read(arrays, 0, 8);
                    value = GetBAMOffset(arrays, 0);
                    bamindices.LinearIndex.Add(value);
                }
            }


            return(bamIndex);
        }
Example #5
0
        /// <summary>
        /// Returns BAMIndex instance by parsing BAM index source.
        /// </summary>
        public BAMIndex Read()
        {
            if (sourceStream == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }

            BAMIndex bamIndex = new BAMIndex();

            byte[] arrays = new byte[20];

            Read(arrays, 0, 4);

            if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1)
            {
                throw new FormatException(Properties.Resource.BAM_InvalidIndexFile);
            }

            Read(arrays, 0, 4);
            int n_ref = Helper.GetInt32(arrays, 0);

            for (Int32 refindex = 0; refindex < n_ref; refindex++)
            {
                BAMReferenceIndexes bamindices = new BAMReferenceIndexes();
                bamIndex.RefIndexes.Add(bamindices);
                Read(arrays, 0, 4);
                int n_bin = Helper.GetInt32(arrays, 0);

                for (Int32 binIndex = 0; binIndex < n_bin; binIndex++)
                {
                    Bin bin = new Bin();
                    bamindices.Bins.Add(bin);

                    Read(arrays, 0, 4);

                    bin.BinNumber = Helper.GetUInt32(arrays, 0);
                    Read(arrays, 0, 4);

                    int n_chunk = Helper.GetInt32(arrays, 0);


                    for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                    {
                        Chunk chunk = new Chunk();
                        bin.Chunks.Add(chunk);
                        Read(arrays, 0, 8);
                        chunk.ChunkStart = GetBAMOffset(arrays, 0);
                        Read(arrays, 0, 8);
                        chunk.ChunkEnd = GetBAMOffset(arrays, 0);
                    }
                }

                Read(arrays, 0, 4);
                int n_intv = Helper.GetInt32(arrays, 0);

                for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++)
                {
                    FileOffset value;
                    Read(arrays, 0, 8);
                    value = GetBAMOffset(arrays, 0);
                    bamindices.LinearOffsets.Add(value);
                }
            }

            return(bamIndex);
        }
Example #6
0
        /// <summary>
        /// Returns BAMIndex instance by parsing BAM index source.
        /// </summary>
        public BAMIndex Read()
        {
            if (Source == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }

            BAMIndex bamIndex = new BAMIndex();
            byte[] arrays = new byte[20];

            Read(arrays, 0, 4);

            if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1)
            {
                throw new FormatException(Properties.Resource.BAM_InvalidIndexFile);
            }
            Read(arrays, 0, 4);
            int n_ref = Helper.GetInt32(arrays, 0);
            for (Int32 refindex = 0; refindex < n_ref; refindex++)
            {
                BAMReferenceIndexes bamindices = new BAMReferenceIndexes();
                bamIndex.RefIndexes.Add(bamindices);
                Read(arrays, 0, 4);
                int n_bin = Helper.GetInt32(arrays, 0);
                for (Int32 binIndex = 0; binIndex < n_bin; binIndex++)
                {
                    Bin bin = new Bin();
                    Read(arrays, 0, 4);
                    bin.BinNumber = Helper.GetUInt32(arrays, 0);
                    Read(arrays, 0, 4);
                    int n_chunk = Helper.GetInt32(arrays, 0);
                    if (bin.BinNumber == MaxBins)//some groups use this to place meta-data, such as the picard toolkit and now SAMTools
                    {
                        //Meta data was later added in to the SAMTools specification
                        for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                        {
                            bamindices.HasMetaData = true;
                            Read(arrays, 0, 8);
                            bamindices.MappedReadsCount = Helper.GetUInt64(arrays, 0);
                            Read(arrays, 0, 8);
                            bamindices.UnMappedReadsCount = Helper.GetUInt64(arrays, 0);
                        }

                    }
                    else if (bin.BinNumber > MaxBins)
                    {
                        throw new Exception("BAM Index is incorrectly formatted.  Bin number specified is higher than the maximum allowed.");
                    }
                    else
                    {
                         bamindices.Bins.Add(bin);
                        for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                        {
                            Chunk chunk = new Chunk();
                            bin.Chunks.Add(chunk);
                            Read(arrays, 0, 8);
                            chunk.ChunkStart = GetBAMOffset(arrays, 0);
                            Read(arrays, 0, 8);
                            chunk.ChunkEnd = GetBAMOffset(arrays, 0);
                        }
                    }
                }
                //Get number of linear bins
                Read(arrays, 0, 4);
                int n_intv = Helper.GetInt32(arrays, 0);

                for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++)
                {
                    FileOffset value;
                    Read(arrays, 0, 8);
                    value = GetBAMOffset(arrays, 0);
                    bamindices.LinearIndex.Add(value);
                }
            }
            

            return bamIndex;
        }
Example #7
0
        // Refactored to remove this block from GetAlignmentMap()
        private SAMAlignedSequence BamIndexing(SAMAlignedSequence alignedSeq, BAMReferenceIndexes refIndices, BAMIndex index,
            ulong lastcOffset, ushort lastuOffset, ref Chunk lastChunk)
        {
            int lastBin = int.MaxValue;
            Bin bin;
            Chunk chunk;
            int lastRefSeqIndex = 0;
            int curRefSeqIndex;

                #region BAM indexing
                if (createBamIndex)
                {
                    curRefSeqIndex = refSeqNames.IndexOf(alignedSeq.RName);

                    if (lastRefSeqIndex != curRefSeqIndex)
                    {
                        refIndices = index.RefIndexes[curRefSeqIndex];
                        lastBin = int.MaxValue;
                        lastRefSeqIndex = curRefSeqIndex;
                    }

                    if (lastBin != alignedSeq.Bin)
                    {
                        bin = refIndices.Bins.FirstOrDefault(B => B.BinNumber == alignedSeq.Bin);
                        if (bin == null)
                        {
                            bin = new Bin();
                            bin.BinNumber = (uint)alignedSeq.Bin;
                            refIndices.Bins.Add(bin);
                        }

                        if (lastChunk != null)
                        {
                            lastChunk.ChunkEnd.CompressedBlockOffset = lastcOffset;
                            lastChunk.ChunkEnd.UncompressedBlockOffset = lastuOffset;
                        }

                        chunk = new Chunk();
                        chunk.ChunkStart = new FileOffset();
                        chunk.ChunkEnd = new FileOffset();
                        chunk.ChunkStart.CompressedBlockOffset = lastcOffset;
                        chunk.ChunkStart.UncompressedBlockOffset = lastuOffset;
                        bin.Chunks.Add(chunk);

                        lastChunk = chunk;
                        lastBin = alignedSeq.Bin;
                    }

                    // store linear index other than 16k bins, that is bin number less than 4681.
                    if (alignedSeq.Bin < 4681)
                    {
                        int pos = alignedSeq.Pos > 0 ? alignedSeq.Pos - 1 : 0;
                        int end = alignedSeq.RefEndPos > 0 ? alignedSeq.RefEndPos - 1 : 0;
                        pos = pos >> 14;
                        end = end >> 14;
                        if (refIndices.LinearOffsets.Count == 0)
                        {
                            refIndices.LinearOffsets.Add(new FileOffset());
                        }

                        if (refIndices.LinearOffsets.Count <= end)
                        {
                            for (int i = refIndices.LinearOffsets.Count; i <= end; i++)
                            {
                                refIndices.LinearOffsets.Add(new FileOffset());
                            }
                        }

                        for (int i = pos + 1; i <= end; i++)
                        {
                            FileOffset offset = refIndices.LinearOffsets[i];
                            if (offset.CompressedBlockOffset == 0 && offset.UncompressedBlockOffset == 0)
                            {
                                offset.CompressedBlockOffset = lastcOffset;
                                offset.UncompressedBlockOffset = lastuOffset;
                            }
                        }
                    }
                }
                #endregion
            return alignedSeq;
        }
Example #8
0
        // Gets chunks for specified ref seq index, start and end co-ordinate this method considers linear index also.
        private static IList<Chunk> GetChunks(BAMReferenceIndexes refIndex, int start, int end)
        {
            List<Chunk> chunks = new List<Chunk>();
            IList<uint> binnumbers = Reg2Bins((uint)start, (uint)end);
            List<Bin> bins = refIndex.Bins.Where(B => binnumbers.Contains(B.BinNumber)).ToList();

            // consider linear indexing only for the bins less than 4681.
            foreach (Bin bin in bins.Where(B => B.BinNumber < 4681))
            {
                chunks.InsertRange(chunks.Count, bin.Chunks);
            }

            int index = start / (16 * 1024);  // Linear indexing window size is 16K

            if (refIndex.LinearOffsets.Count > index)
            {
                FileOffset offset = refIndex.LinearOffsets[index];
                chunks = chunks.Where(C => C.ChunkEnd.CompressedBlockOffset > offset.CompressedBlockOffset || (C.ChunkEnd.CompressedBlockOffset == offset.CompressedBlockOffset && C.ChunkEnd.UncompressedBlockOffset > offset.UncompressedBlockOffset)).ToList();
            }

            // add chunks for the bin numbers greater than 4681.
            foreach (Bin bin in bins.Where(B => B.BinNumber >= 4681))
            {
                chunks.InsertRange(chunks.Count, bin.Chunks);
            }

            return SortAndMergeChunks(chunks);
        }
Example #9
0
        // Gets all chunks for the specified ref sequence index.
        private static IList<Chunk> GetChunks(BAMReferenceIndexes refIndex)
        {
            List<Chunk> chunks = new List<Chunk>();
            foreach (Bin bin in refIndex.Bins)
            {
                chunks.InsertRange(chunks.Count, bin.Chunks);
            }

            return SortAndMergeChunks(chunks);
        }