Beispiel #1
0
 internal UidSpaceMapper(long strideLength, long newBaseUid, long oldBaseUid, long numUrls, string fileName)
 {
     this.oldBaseUid = oldBaseUid;
       this.newBaseUid = newBaseUid;
       this.numUrls = numUrls;
       this.strideLength = strideLength;
       using (var stream = new BufferedStream(new FileStream(fileName, FileMode.Open, FileAccess.Read))) {
     this.bytes = new CachedStream(stream, (ulong)stream.Length);
     this.deco = new VarNybbleIntStreamDecompressor(this.bytes);
       }
       // Construct the index by parsing this.bytes
       long numIdxItems = (numUrls / strideLength) + 1;
       this.idxPosition = new ulong[numIdxItems];
       this.idxGapSum = new ulong[numIdxItems];
       ulong gapSum = 0;
       int p = 0;
       for (long i = 0; i <= numUrls; i++) {
     if (i % strideLength == 0) {
       this.idxPosition[p] = deco.GetPosition();
       this.idxGapSum[p] = gapSum;
       p++;
     }
     if (i < numUrls) gapSum += deco.GetUInt64();
       }
       Contract.Assert(p == numIdxItems);
       Contract.Assert(deco.AtEnd());
 }
        private UInt64 word; // bytes 7 (highest-order byte) to lastContBit are payload

        #endregion Fields

        #region Constructors

        internal VarByteIntStreamDecompressor(CachedStream main)
        {
            this.main = main;
              this.pos = 0;

              UInt64 len = main.Size;
              if ((len % 8) != 0) {
            throw new FileFormatException("stream length (" + len + ") is not a multiple of 8");
              }
              this.nextContBit = -1;
              this.lastContBit = 0;
        }
        private UInt64 pos; // mutable state

        #endregion Fields

        #region Constructors

        internal DummyIntStreamDecompressor(CachedStream main)
        {
            this.main = main;
              this.pos = 0;
        }
        private UInt64 pos; // mutable state

        internal DummyIntStreamDecompressor(CachedStream main)
        {
            this.main = main;
            this.pos  = 0;
        }
Beispiel #5
0
 internal void Unload()
 {
     this.bytes = null;
     this.idxUrls = null;
     this.idxOffsets = null;
 }
Beispiel #6
0
            internal void Load()
            {
                Contract.Assert(this.numBytes >= 0);
                var sw = System.Diagnostics.Stopwatch.StartNew();
                using (var stream = new BufferedStream(new FileStream(this.cell.fileName, FileMode.Open, FileAccess.Read))) {
                  stream.Seek(this.startPos, SeekOrigin.Begin);
                  this.bytes = new CachedStream(stream, (ulong)this.numBytes);
                }
                var secs1 = 0.001 * sw.ElapsedMilliseconds;
                sw.Restart();

                // Given the data in this.bytes, construct idxUrls and idxOffsets
                var numIdxItems = (int)((this.cell.numUrls + this.indexStride - 1) / this.indexStride);
                // Note that for numUrls=0 ^^this^^ is NOT the same as (int)((this.cell.numUrls - 1) / this.indexStride) + 1 !!
                this.idxUrls = new byte[numIdxItems][];
                this.idxOffsets = new ulong[numIdxItems];
                ulong pos = 0;    // position in this.bytes -- starts at 0
                ulong lastPos = 0;
                var res = new byte[0];
                int resLen = 0;
                long cuid = 0;

                while (cuid < this.cell.numUrls) {
                  int prefLen = this.ReadCompressedSize(ref pos);
                  int suffLen = this.ReadCompressedSize(ref pos);
                  resLen = prefLen + suffLen;

                  // Enlarge the result buffer if necessary
                  if (resLen > res.Length) {
                var tmp = new byte[resLen];
                for (int i = 0; i < prefLen; i++) {  // care only about first prefLen bytes
                  tmp[i] = res[i];
                }
                res = tmp;
                  }

                  // Read the suffix
                  for (int i = prefLen; i < resLen; i++) {
                res[i] = this.bytes.GetUInt8(pos + (ulong)(i - prefLen));
                  }
                  pos += (ulong)suffLen;

                  if (++cuid % indexStride == 0) {
                // Save an index item
                int idx = (int)(cuid / indexStride) - 1;
                idxUrls[idx] = SubArray(res, 0, resLen);
                idxOffsets[idx] = lastPos;
                lastPos = pos;
                  }
                }
                // Finally, store a sentinel
                if (cuid % indexStride != 0) {
                  idxUrls[numIdxItems - 1] = SubArray(res, 0, resLen);
                  idxOffsets[numIdxItems - 1] = lastPos;
                }
                var secs2 = 0.001 * sw.ElapsedMilliseconds;
                //Console.Error.WriteLine("PERF: Cell {0} url portion: Loading took {1} seconds, indexing took {2} seconds", this.cell.fileName, secs1, secs2);

                Contract.ForAll(this.idxUrls, url => url != null);
            }
Beispiel #7
0
            private byte[][] idxUrls; // each byte[] is logically a UTF8-encoded string

            #endregion Fields

            #region Constructors

            internal UrlCell(Cell cell, BinaryReader rd)
            {
                this.numBytes = rd.ReadInt64();
                this.indexStride = rd.ReadInt32();
                this.cell = cell;
                this.startPos = -1;
                this.idxUrls = null;
                this.idxOffsets = null;
                this.bytes = null;
                this.hashToUidCache = null;
                this.hasher = new Hash64();
            }
Beispiel #8
0
            internal void Load()
            {
                var sw = Stopwatch.StartNew();
                // Somewhat of a hack: If this LinkCell is not yet sealed, return immediately.
                // This is OK as long as noone tries to use the cell subsequently.
                // A better solution would be not to call Load on unsealed LinkCell objects.
                if (this.numBytes == -1) return;

                using (var rd = new BinaryReader(new BufferedStream(new FileStream(this.cell.fileName, FileMode.Open, FileAccess.Read, FileShare.Read)))) {
                  rd.BaseStream.Seek(this.startPos, SeekOrigin.Begin);
                  this.bytes = new CachedStream(rd.BaseStream, (ulong)this.numBytes);
                }
                var secs1 = 0.001 * sw.ElapsedMilliseconds;
                sw.Restart();

                // Construct idxOffsets from main
                var supraPuid = this.cell.part.ping.PUID(this.cell.supraUID);
                long numIdxItems = (int)((supraPuid - 1) / this.indexStride) + 1;
                this.idxOffsets = new ulong[numIdxItems];
                var decompressor = this.NewDecompressor();
                int idx = 0;
                for (long puid = 0; puid < supraPuid; puid++) {
                  if (puid % this.indexStride == 0) {
                idxOffsets[idx++] = decompressor.GetPosition();
                  }
                  uint m = decompressor.GetUInt32();
                  for (uint j = 0; j < m; j++) {
                var linkUid = decompressor.GetUInt64();  // don't care that first gap is signed
                  }
                }
                var secs2 = 0.001 * sw.ElapsedMilliseconds;
                //Console.Error.WriteLine("PERF: Cell {0} {1} portion: Loading took {2} seconds, indexing took {3} seconds", this.cell.fileName, this == this.cell.fwdCell ? "fwd" : "bwd", secs1, secs2);
            }
Beispiel #9
0
 internal LinkCell(Cell cell, BinaryReader rd)
 {
     this.numBytes = rd.ReadInt64();
     this.numLinks = rd.ReadInt64();
     this.indexStride = rd.ReadInt32();
     this.compressionCode = (LinkCompression)rd.ReadUInt32();
     this.cell = cell;
     this.startPos = -1;
     this.idxOffsets = null;
     this.bytes = null;
 }