Esempio n. 1
0
        /// <summary>
        ///     Reads a BAM index from the supplied filename
        /// </summary>
        /// <returns>true if the index was successfully loaded</returns>
        public bool ReadIndex(string filename)
        {
            // check if the file exists
            if (!File.Exists(filename))
            {
                return(false);
            }

            using (
                BinaryReader reader =
                    new BinaryReader(new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read)))
            {
                // check to see if we have a proper BAM index signature
                byte[] buffer = reader.ReadBytes(4);

                string magicNumberString = Encoding.ASCII.GetString(buffer, 0, 4);

                if (magicNumberString != BamConstants.BaiMagicNumber)
                {
                    throw new InvalidDataException(
                              string.Format("ERROR: Expected the BAM index magic number to be {0}, but found {1}.",
                                            BamConstants.BaiMagicNumber, magicNumberString));
                }

                // read the number of reference sequences
                uint numReferenceSequences = reader.ReadUInt32();

                // iterate over each reference sequence
                _index.Clear();

                for (uint refSeqIndex = 0; refSeqIndex < numReferenceSequences; ++refSeqIndex)
                {
                    // =======================
                    // read the binning index
                    // =======================

                    BamReferenceIndex refIndex = new BamReferenceIndex();

                    // read the number of bins
                    uint numBins = reader.ReadUInt32();

                    // iterate over each bin in the regions dictionary
                    for (uint binIndex = 0; binIndex < numBins; ++binIndex)
                    {
                        // read the bin and the number of index regions
                        uint bin             = reader.ReadUInt32();
                        uint numIndexRegions = reader.ReadUInt32();

                        // read all of the index regions
                        List <BamIndexRegion> bamIndexRegions = new List <BamIndexRegion>();

                        for (uint regionIndex = 0; regionIndex < numIndexRegions; ++regionIndex)
                        {
                            ulong          begin       = reader.ReadUInt64();
                            ulong          end         = reader.ReadUInt64();
                            BamIndexRegion indexRegion = new BamIndexRegion(begin, end);
                            bamIndexRegions.Add(indexRegion);
                        }

                        // add the index regions to our dictionary
                        refIndex.RegionsDictionary[bin] = bamIndexRegions;
                    }

                    // =====================
                    // read the linear index
                    // =====================

                    // read the linear index size
                    uint numOffsets = reader.ReadUInt32();

                    for (uint offsetIndex = 0; offsetIndex < numOffsets; ++offsetIndex)
                    {
                        refIndex.OffsetList.Add(reader.ReadUInt64());
                    }

                    // add the reference index
                    _index.Add(refIndex);
                }

                // read the number of unaligned reads without coordinates
                _numUnalignedWithoutCoordinates = reader.ReadUInt64();
            }

            return(true);
        }
Esempio n. 2
0
        /// <summary>
        ///     returns a list of the index regions for the desire reference sequence and position
        /// </summary>
        internal bool GetOffsets(int refID, int position, out BamIterator bamIterator)
        {
            // initialize the bam iterator
            bamIterator = new BamIterator(refID, position, 1 << 29);

            // adjust the specified position
            if (position < 0)
            {
                position = 0;
            }

            // calculate which bins overlap this region
            List <uint> binKeys = GetBinKeysThatOverlapPos((uint)bamIterator.Begin, (uint)bamIterator.End);
            // get bins and offsets for this reference
            BamReferenceIndex refIndex = _index[refID];
            Dictionary <uint, List <BamIndexRegion> > binnedRegions = refIndex.RegionsDictionary;
            List <ulong> offsets   = refIndex.OffsetList;
            ulong        minOffset = 0;

            if (offsets.Count > 0)
            {
                int offsetIndex = position >> BamLidxShift;
                minOffset = (offsetIndex >= offsets.Count) ? offsets[offsets.Count - 1] : offsets[offsetIndex];

                // improvement for index files built by tabix prior to 0.1.4
                if (minOffset == 0)
                {
                    // Scan backward for a valid offset:
                    if (offsetIndex > offsets.Count)
                    {
                        offsetIndex = offsets.Count;
                    }

                    int i;
                    for (i = offsetIndex - 1; i >= 0; i--)
                    {
                        if (offsets[i] != 0)
                        {
                            break;
                        }
                    }
                    if (i >= 0)
                    {
                        minOffset = offsets[i];
                    }
                }
            }

            // get the total count of regions represented in each bin
            // this is a check to see if we can exit early
            int numIndexRegions = 0;
            List <BamIndexRegion> bamIndexRegions;

            foreach (uint binKey in binKeys)
            {
                if (binnedRegions.TryGetValue(binKey, out bamIndexRegions))
                {
                    numIndexRegions += bamIndexRegions.Count;
                }
            }

            if (numIndexRegions == 0)
            {
                return(false);
            }

            // grab all of the index regions that end after the minimum offset
            List <BamIndexRegion> regionsAfterMinOffset = new List <BamIndexRegion>(numIndexRegions);

            foreach (uint binKey in binKeys)
            {
                if (binnedRegions.TryGetValue(binKey, out bamIndexRegions))
                {
                    foreach (BamIndexRegion indexRegion in bamIndexRegions)
                    {
                        if (indexRegion.End > minOffset)
                        {
                            regionsAfterMinOffset.Add(indexRegion);
                        }
                    }
                }
            }

            // sort the index regions
            BamIndexRegion[] sortedList = regionsAfterMinOffset.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray();

            // consolidate completely contained adjacent blocks
            int prevIndex = 0;

            for (int currentIndex = 1; currentIndex < sortedList.Length; ++currentIndex)
            {
                if (sortedList[prevIndex].End < sortedList[currentIndex].End)
                {
                    sortedList[++prevIndex] = sortedList[currentIndex];
                }
            }

            numIndexRegions = prevIndex + 1;

            // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
            for (int currentIndex = 1; currentIndex < numIndexRegions; ++currentIndex)
            {
                if (sortedList[currentIndex - 1].End >= sortedList[currentIndex].Begin)
                {
                    sortedList[currentIndex - 1].End = sortedList[currentIndex].Begin;
                }
            }

            // merge adjacent blocks
            prevIndex = 0;

            for (int currentIndex = 1; currentIndex < numIndexRegions; ++currentIndex)
            {
                if ((sortedList[prevIndex].End >> 16) == (sortedList[currentIndex].Begin >> 16))
                {
                    sortedList[prevIndex].End = sortedList[currentIndex].End;
                }
                else
                {
                    sortedList[++prevIndex] = sortedList[currentIndex];
                }
            }

            numIndexRegions = prevIndex + 1;

            // add the index regions to our list
            bamIterator.Offsets = new BamIndexRegion[numIndexRegions];
            for (int currentIndex = 0; currentIndex < numIndexRegions; ++currentIndex)
            {
                bamIterator.Offsets[currentIndex] = sortedList[currentIndex];
            }

            return(true);
        }