private IPackedInts ReadPacked() { if (!EnsureBuffer(1 + 4)) { throw new Exception("Wrong data"); } var bits = (int)buffer[indxInBuffer]; var count = (int)buffer[indxInBuffer + 1] | ((int)buffer[indxInBuffer + 2] << 8) | ((int)buffer[indxInBuffer + 3] << 16) | ((int)buffer[indxInBuffer + 4] << 24); var size = 1 + 4 + (7 + count * bits) / 8; if (!EnsureBuffer(size)) { throw new Exception("Wrong data"); } // NOTE: Instead of creating instance of packed integers, // one can decode data from the read buffer directly. var packed = PackedInts.Load(buffer, indxInBuffer, size); indxInBuffer += size; return(packed); }
public PostingListAddress EndList() { if (bufferIndex > 0) { buffer[deltaSelectorIndex] = deltaSelector; var packed = PackedInts.Convert(buffer, 0, bufferIndex).GetBytes(); persistentStorage.WriteAll(persistentStorage.Length, packed, 0, packed.Length); totalSize += packed.Length; } // Write length of the list persistentStorage.WriteAll(listStart + sizeof(long), BitConverter.GetBytes(totalSize), 0, sizeof(int)); var listEnd = persistentStorage.Length; if (listEnd - listStart != totalSize + sizeof(long) + sizeof(int)) { throw new InvalidOperationException(); } return(new PostingListAddress(listStart)); }
public void AddOccurrence(Occurrence occurrence) { if (first) { checked { buffer[bufferIndex++] = (int)occurrence.DocumentId; buffer[bufferIndex++] = (int)occurrence.FieldId; buffer[bufferIndex++] = (int)occurrence.TokenId; } previous = occurrence; first = false; deltaSelectorIndex = bufferIndex; bufferIndex++; } else { int n; if (previous.DocumentId == occurrence.DocumentId) { if (previous.FieldId == occurrence.FieldId) { n = 1; checked { buffer[bufferIndex++] = (int)occurrence.TokenId - (int)previous.TokenId; } // NOTE: Removed zero value as it will lead to extra trailing occurrences // because the last deltaSelector might have unsed bits, // i.e. when deltaSelectorOffset < 32 // if (previous.TokenId == occurrence.TokenId) // { // n = 0; // } // else // { // n = 1; // checked // { // buffer[bufferIndex++] = (int)occurrence.TokenId - (int)previous.TokenId; // } // } } else { n = 2; checked { buffer[bufferIndex++] = (int)occurrence.FieldId - (int)previous.FieldId; buffer[bufferIndex++] = (int)occurrence.TokenId; } } } else { n = 3; checked { buffer[bufferIndex++] = (int)occurrence.DocumentId - (int)previous.DocumentId; buffer[bufferIndex++] = (int)occurrence.FieldId; buffer[bufferIndex++] = (int)occurrence.TokenId; } } previous = occurrence; deltaSelector |= (n << deltaSelectorOffset); deltaSelectorOffset += 2; // NOTE: Use less 16 bits of delta selector, so that PackedInts can compress better // Might be worth experimenting with other values 4, 8 bits or even dynamic // based on the contents of the buffer. if (deltaSelectorOffset == 16) { buffer[deltaSelectorIndex] = deltaSelector; deltaSelector = 0; deltaSelectorOffset = 0; if (remainingBlocks == 1) { var packed = PackedInts.Convert(buffer, 0, bufferIndex).GetBytes(); persistentStorage.WriteAll(persistentStorage.Length, packed, 0, packed.Length); totalSize += packed.Length; deltaSelectorIndex = 0; bufferIndex = 1; remainingBlocks = BlocksInMemory; } else { // Reserve space for delta selector deltaSelectorIndex = bufferIndex; bufferIndex++; remainingBlocks--; } } } }