// Refactored to remove this block from GetAlignmentMap()
        private SAMAlignedSequence BamIndexing(SAMAlignedSequence alignedSeq, BAMReferenceIndexes refIndices, BAMIndex index,
            ulong lastcOffset, ushort lastuOffset, ref Chunk lastChunk)
        {
            int lastBin = int.MaxValue;
            Bin bin;
            Chunk chunk;
            int lastRefSeqIndex = 0;
            int curRefSeqIndex;

                #region BAM indexing
                if (createBamIndex)
                {
                    curRefSeqIndex = refSeqNames.IndexOf(alignedSeq.RName);

                    if (lastRefSeqIndex != curRefSeqIndex)
                    {
                        refIndices = index.RefIndexes[curRefSeqIndex];
                        lastBin = int.MaxValue;
                        lastRefSeqIndex = curRefSeqIndex;
                    }

                    if (lastBin != alignedSeq.Bin)
                    {
                        bin = refIndices.Bins.FirstOrDefault(B => B.BinNumber == alignedSeq.Bin);
                        if (bin == null)
                        {
                            bin = new Bin();
                            bin.BinNumber = (uint)alignedSeq.Bin;
                            refIndices.Bins.Add(bin);
                        }

                        if (lastChunk != null)
                        {
                            lastChunk.ChunkEnd.CompressedBlockOffset = lastcOffset;
                            lastChunk.ChunkEnd.UncompressedBlockOffset = lastuOffset;
                        }

                        chunk = new Chunk();
                        chunk.ChunkStart = new FileOffset();
                        chunk.ChunkEnd = new FileOffset();
                        chunk.ChunkStart.CompressedBlockOffset = lastcOffset;
                        chunk.ChunkStart.UncompressedBlockOffset = lastuOffset;
                        bin.Chunks.Add(chunk);

                        lastChunk = chunk;
                        lastBin = alignedSeq.Bin;
                    }

                    // store linear index other than 16k bins, that is bin number less than 4681.
                    if (alignedSeq.Bin < 4681)
                    {
                        int pos = alignedSeq.Pos > 0 ? alignedSeq.Pos - 1 : 0;
                        int end = alignedSeq.RefEndPos > 0 ? alignedSeq.RefEndPos - 1 : 0;
                        pos = pos >> 14;
                        end = end >> 14;
                        if (refIndices.LinearOffsets.Count == 0)
                        {
                            refIndices.LinearOffsets.Add(new FileOffset());
                        }

                        if (refIndices.LinearOffsets.Count <= end)
                        {
                            for (int i = refIndices.LinearOffsets.Count; i <= end; i++)
                            {
                                refIndices.LinearOffsets.Add(new FileOffset());
                            }
                        }

                        for (int i = pos + 1; i <= end; i++)
                        {
                            FileOffset offset = refIndices.LinearOffsets[i];
                            if (offset.CompressedBlockOffset == 0 && offset.UncompressedBlockOffset == 0)
                            {
                                offset.CompressedBlockOffset = lastcOffset;
                                offset.UncompressedBlockOffset = lastuOffset;
                            }
                        }
                    }
                }
                #endregion
            return alignedSeq;
        }
Exemple #2
0
        /// <summary>
        /// Returns BAMIndex instance by parsing BAM index source.
        /// </summary>
        public BAMIndex Read()
        {
            if (Source == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }

            BAMIndex bamIndex = new BAMIndex();
            byte[] arrays = new byte[20];

            Read(arrays, 0, 4);

            if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1)
            {
                throw new FormatException(Properties.Resource.BAM_InvalidIndexFile);
            }
            Read(arrays, 0, 4);
            int n_ref = Helper.GetInt32(arrays, 0);
            for (Int32 refindex = 0; refindex < n_ref; refindex++)
            {
                BAMReferenceIndexes bamindices = new BAMReferenceIndexes();
                bamIndex.RefIndexes.Add(bamindices);
                Read(arrays, 0, 4);
                int n_bin = Helper.GetInt32(arrays, 0);
                for (Int32 binIndex = 0; binIndex < n_bin; binIndex++)
                {
                    Bin bin = new Bin();
                    Read(arrays, 0, 4);
                    bin.BinNumber = Helper.GetUInt32(arrays, 0);
                    Read(arrays, 0, 4);
                    int n_chunk = Helper.GetInt32(arrays, 0);
                    if (bin.BinNumber == MaxBins)//some groups use this to place meta-data, such as the picard toolkit and now SAMTools
                    {
                        //Meta data was later added in to the SAMTools specification
                        for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                        {
                            bamindices.HasMetaData = true;
                            Read(arrays, 0, 8);
                            bamindices.MappedReadsCount = Helper.GetUInt64(arrays, 0);
                            Read(arrays, 0, 8);
                            bamindices.UnMappedReadsCount = Helper.GetUInt64(arrays, 0);
                        }

                    }
                    else if (bin.BinNumber > MaxBins)
                    {
                        throw new Exception("BAM Index is incorrectly formatted.  Bin number specified is higher than the maximum allowed.");
                    }
                    else
                    {
                         bamindices.Bins.Add(bin);
                        for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                        {
                            Chunk chunk = new Chunk();
                            bin.Chunks.Add(chunk);
                            Read(arrays, 0, 8);
                            chunk.ChunkStart = GetBAMOffset(arrays, 0);
                            Read(arrays, 0, 8);
                            chunk.ChunkEnd = GetBAMOffset(arrays, 0);
                        }
                    }
                }
                //Get number of linear bins
                Read(arrays, 0, 4);
                int n_intv = Helper.GetInt32(arrays, 0);

                for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++)
                {
                    FileOffset value;
                    Read(arrays, 0, 8);
                    value = GetBAMOffset(arrays, 0);
                    bamindices.LinearIndex.Add(value);
                }
            }
            

            return bamIndex;
        }
Exemple #3
0
        /// <summary>
        /// Returns BAMIndex instance by parsing BAM index source.
        /// </summary>
        public BAMIndex Read()
        {
            if (sourceStream == null)
            {
                throw new InvalidOperationException(Properties.Resource.BAM_CantUseBAMIndexStreamDisposed);
            }

            BAMIndex bamIndex = new BAMIndex();

            byte[] arrays = new byte[20];

            Read(arrays, 0, 4);

            if (arrays[0] != 66 || arrays[1] != 65 || arrays[2] != 73 || arrays[3] != 1)
            {
                throw new FormatException(Properties.Resource.BAM_InvalidIndexFile);
            }

            Read(arrays, 0, 4);
            int n_ref = Helper.GetInt32(arrays, 0);

            for (Int32 refindex = 0; refindex < n_ref; refindex++)
            {
                BAMReferenceIndexes bamindices = new BAMReferenceIndexes();
                bamIndex.RefIndexes.Add(bamindices);
                Read(arrays, 0, 4);
                int n_bin = Helper.GetInt32(arrays, 0);

                for (Int32 binIndex = 0; binIndex < n_bin; binIndex++)
                {
                    Bin bin = new Bin();
                    bamindices.Bins.Add(bin);

                    Read(arrays, 0, 4);

                    bin.BinNumber = Helper.GetUInt32(arrays, 0);
                    Read(arrays, 0, 4);

                    int n_chunk = Helper.GetInt32(arrays, 0);


                    for (Int32 chunkIndex = 0; chunkIndex < n_chunk; chunkIndex++)
                    {
                        Chunk chunk = new Chunk();
                        bin.Chunks.Add(chunk);
                        Read(arrays, 0, 8);
                        chunk.ChunkStart = GetBAMOffset(arrays, 0);
                        Read(arrays, 0, 8);
                        chunk.ChunkEnd = GetBAMOffset(arrays, 0);
                    }
                }

                Read(arrays, 0, 4);
                int n_intv = Helper.GetInt32(arrays, 0);

                for (Int32 offsetIndex = 0; offsetIndex < n_intv; offsetIndex++)
                {
                    FileOffset value;
                    Read(arrays, 0, 8);
                    value = GetBAMOffset(arrays, 0);
                    bamindices.LinearOffsets.Add(value);
                }
            }

            return(bamIndex);
        }