/// <summary> /// Reads a BAM index from the supplied filename /// </summary> /// <returns>true if the index was successfully loaded</returns> public bool ReadIndex(string filename) { // check if the file exists if (!File.Exists(filename)) { return(false); } using ( BinaryReader reader = new BinaryReader(new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))) { // check to see if we have a proper BAM index signature byte[] buffer = reader.ReadBytes(4); string magicNumberString = Encoding.ASCII.GetString(buffer, 0, 4); if (magicNumberString != BamConstants.BaiMagicNumber) { throw new InvalidDataException( string.Format("ERROR: Expected the BAM index magic number to be {0}, but found {1}.", BamConstants.BaiMagicNumber, magicNumberString)); } // read the number of reference sequences uint numReferenceSequences = reader.ReadUInt32(); // iterate over each reference sequence _index.Clear(); for (uint refSeqIndex = 0; refSeqIndex < numReferenceSequences; ++refSeqIndex) { // ======================= // read the binning index // ======================= BamReferenceIndex refIndex = new BamReferenceIndex(); // read the number of bins uint numBins = reader.ReadUInt32(); // iterate over each bin in the regions dictionary for (uint binIndex = 0; binIndex < numBins; ++binIndex) { // read the bin and the number of index regions uint bin = reader.ReadUInt32(); uint numIndexRegions = reader.ReadUInt32(); // read all of the index regions List <BamIndexRegion> bamIndexRegions = new List <BamIndexRegion>(); for (uint regionIndex = 0; regionIndex < numIndexRegions; ++regionIndex) { ulong begin = reader.ReadUInt64(); ulong end = reader.ReadUInt64(); BamIndexRegion indexRegion = new BamIndexRegion(begin, end); bamIndexRegions.Add(indexRegion); } // add the index regions to our dictionary refIndex.RegionsDictionary[bin] = bamIndexRegions; } // ===================== // read the linear index // ===================== // read the linear index size uint numOffsets = reader.ReadUInt32(); for (uint offsetIndex = 0; offsetIndex < numOffsets; ++offsetIndex) { refIndex.OffsetList.Add(reader.ReadUInt64()); } // add the reference index _index.Add(refIndex); } // read the number of unaligned reads without coordinates _numUnalignedWithoutCoordinates = reader.ReadUInt64(); } return(true); }
/// <summary> /// returns a list of the index regions for the desire reference sequence and position /// </summary> internal bool GetOffsets(int refID, int position, out BamIterator bamIterator) { // initialize the bam iterator bamIterator = new BamIterator(refID, position, 1 << 29); // adjust the specified position if (position < 0) { position = 0; } // calculate which bins overlap this region List <uint> binKeys = GetBinKeysThatOverlapPos((uint)bamIterator.Begin, (uint)bamIterator.End); // get bins and offsets for this reference BamReferenceIndex refIndex = _index[refID]; Dictionary <uint, List <BamIndexRegion> > binnedRegions = refIndex.RegionsDictionary; List <ulong> offsets = refIndex.OffsetList; ulong minOffset = 0; if (offsets.Count > 0) { int offsetIndex = position >> BamLidxShift; minOffset = (offsetIndex >= offsets.Count) ? offsets[offsets.Count - 1] : offsets[offsetIndex]; // improvement for index files built by tabix prior to 0.1.4 if (minOffset == 0) { // Scan backward for a valid offset: if (offsetIndex > offsets.Count) { offsetIndex = offsets.Count; } int i; for (i = offsetIndex - 1; i >= 0; i--) { if (offsets[i] != 0) { break; } } if (i >= 0) { minOffset = offsets[i]; } } } // get the total count of regions represented in each bin // this is a check to see if we can exit early int numIndexRegions = 0; List <BamIndexRegion> bamIndexRegions; foreach (uint binKey in binKeys) { if (binnedRegions.TryGetValue(binKey, out bamIndexRegions)) { numIndexRegions += bamIndexRegions.Count; } } if (numIndexRegions == 0) { return(false); } // grab all of the index regions that end after the minimum offset List <BamIndexRegion> regionsAfterMinOffset = new List <BamIndexRegion>(numIndexRegions); foreach (uint binKey in binKeys) { if (binnedRegions.TryGetValue(binKey, out bamIndexRegions)) { foreach (BamIndexRegion indexRegion in bamIndexRegions) { if (indexRegion.End > minOffset) { regionsAfterMinOffset.Add(indexRegion); } } } } // sort the index regions BamIndexRegion[] sortedList = regionsAfterMinOffset.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray(); // consolidate completely contained adjacent blocks int prevIndex = 0; for (int currentIndex = 1; currentIndex < sortedList.Length; ++currentIndex) { if (sortedList[prevIndex].End < sortedList[currentIndex].End) { sortedList[++prevIndex] = sortedList[currentIndex]; } } numIndexRegions = prevIndex + 1; // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing for (int currentIndex = 1; currentIndex < numIndexRegions; ++currentIndex) { if (sortedList[currentIndex - 1].End >= sortedList[currentIndex].Begin) { sortedList[currentIndex - 1].End = sortedList[currentIndex].Begin; } } // merge adjacent blocks prevIndex = 0; for (int currentIndex = 1; currentIndex < numIndexRegions; ++currentIndex) { if ((sortedList[prevIndex].End >> 16) == (sortedList[currentIndex].Begin >> 16)) { sortedList[prevIndex].End = sortedList[currentIndex].End; } else { sortedList[++prevIndex] = sortedList[currentIndex]; } } numIndexRegions = prevIndex + 1; // add the index regions to our list bamIterator.Offsets = new BamIndexRegion[numIndexRegions]; for (int currentIndex = 0; currentIndex < numIndexRegions; ++currentIndex) { bamIterator.Offsets[currentIndex] = sortedList[currentIndex]; } return(true); }