/// <summary> /// Adds the item. Does NOT check for whether the item is already present. /// </summary> private void AddCore(uint[] sequence, int min, int lim, uint hash) { Contracts.Assert(0 <= min && min <= lim && lim <= Utils.Size(sequence)); Contracts.Assert(Hashing.HashSequence(sequence, min, lim) == hash); if (_idLim + 1 >= _start.Length) { Contracts.Check(_start.Length != Utils.ArrayMaxSize, "Cannot allocate memory for the sequence pool"); Contracts.Assert(_idLim + 1 == _start.Length); long newSize = (long)_start.Length + _start.Length / 2; int size = (newSize > Utils.ArrayMaxSize) ? Utils.ArrayMaxSize : (int)newSize; Array.Resize(ref _start, size); } Contracts.Assert(_hash.Length >= _next.Length); if (_idLim >= _next.Length) { Contracts.Check(_next.Length != Utils.ArrayMaxSize, "Cannot allocate memory for the sequence pool"); Contracts.Assert(_idLim == _next.Length); long newSize = (long)_next.Length + _next.Length / 2; int size = (newSize > Utils.ArrayMaxSize) ? Utils.ArrayMaxSize : (int)newSize; Array.Resize(ref _hash, size); Array.Resize(ref _next, size); } var cbMax = checked (5 * (lim - min)); var ibLim = _start[_idLim]; if (ibLim > _bytes.Length - cbMax) { Contracts.Check(_bytes.Length != Utils.ArrayMaxSize, "Cannot allocate memory for the sequence pool"); long newSize = Math.Max((long)_bytes.Length + _bytes.Length / 2, ibLim + cbMax); int size = (newSize > Utils.ArrayMaxSize) ? Utils.ArrayMaxSize : (int)newSize; Array.Resize(ref _bytes, size); } Contracts.Assert(_idLim < _next.Length); Contracts.Assert(ibLim <= _bytes.Length - cbMax); int i = GetBucketIndex(hash); _next[_idLim] = _buckets[i]; _hash[_idLim] = hash; _buckets[i] = _idLim; _idLim++; _start[_idLim] = _start[_idLim - 1]; UIntArrayToLeb128(sequence, min, lim, _bytes, ref _start[_idLim]); if (_idLim >= _buckets.Length) { GrowTable(); } AssertValid(); }
public SequencePool(BinaryReader reader) { // *** Binary format *** // int: _idLim (the number of sequences) // int[]: _start (length is _idLim+1) // byte[]: _bytes (length is _start[_idLim]) _idLim = reader.ReadInt32(); Contracts.CheckDecode(0 <= _idLim && _idLim < int.MaxValue); _start = reader.ReadIntArray(_idLim + 1); Contracts.CheckDecode(Utils.Size(_start) > 0 && _start[0] == 0); Contracts.CheckDecode(_start[_idLim] >= 0); _bytes = reader.ReadByteArray(_start[_idLim]); if (_idLim < 10) { Array.Resize(ref _start, 11); } if (Utils.Size(_bytes) < 40) { Array.Resize(ref _bytes, 40); } // Find the smallest power of 2 that is greater than _idLim. int ibit = Utils.IbitHigh((uint)Math.Max(_idLim, 31)); Contracts.Assert(4 <= ibit && ibit <= 31); if (ibit < 31) { ibit++; } _mask = (1 << ibit) - 1; _buckets = Utils.CreateArray <int>(_mask + 1, -1); _hash = new uint[Math.Max(_idLim, 10)]; _next = new int[Math.Max(_idLim, 10)]; uint[] sequence = null; var cb = _start[_idLim]; for (int id = 0; id < _idLim; id++) { Contracts.CheckDecode(_start[id] <= _start[id + 1] && _start[id + 1] <= cb); int count = Leb128ToUIntArray(_bytes, _start[id], _start[id + 1], ref sequence); _hash[id] = Hashing.HashSequence(sequence, 0, count); int i = GetBucketIndex(_hash[id]); _next[id] = _buckets[i]; _buckets[i] = id; } AssertValid(); }
// Returns the ID of the requested sequence, or -1 if it is not found. private int GetCore(uint[] sequence, int min, int lim, out uint hash) { AssertValid(); Contracts.Assert(0 <= min && min <= lim && lim <= Utils.Size(sequence)); hash = Hashing.HashSequence(sequence, min, lim); for (int idCur = GetFirstIdInBucket(hash); idCur >= 0; idCur = _next[idCur]) { Contracts.Assert(0 <= idCur && idCur < _idLim); if (_hash[idCur] != hash) { continue; } var ibCur = _start[idCur]; var ibLim = _start[idCur + 1]; for (int i = min; ; i++) { Contracts.Assert(ibCur <= ibLim); if (i >= lim) { // Need to make sure that we have reached the end of the sequence in the pool at the // same time that we reached the end of sequence. if (ibCur == ibLim) { return(idCur); } break; } if (ibCur >= ibLim) { break; } uint decoded; var success = TryDecodeOne(_bytes, ref ibCur, _start[idCur + 1], out decoded); Contracts.Assert(success); if (sequence[i] != decoded) { break; } } } return(-1); }