public PostingListBinaryWriter(IPersistentStorage storage) { this.persistentStorage = storage; this.buffer = new int[MemoryBufferSize]; this.flushBuffer = new byte[GroupVarint.GetMaxEncodedSize(MemoryBufferSize)]; this.bufferIndex = 0; }
private int NextInteger() { if (selectorIndex == 4) { if (!EnsureBuffer(1)) { throw new Exception("Wrong data"); } int selector = (int)buffer[indxInBuffer++]; selectors[3] = (selector & 0b11) + 1; selectors[2] = ((selector >> 2) & 0b11) + 1; selectors[1] = ((selector >> 4) & 0b11) + 1; selectors[0] = ((selector >> 6) & 0b11) + 1; selectorIndex = 0; } if (!EnsureBuffer(selectors[selectorIndex])) { throw new Exception("Wrong data"); } var result = GroupVarint.ReadInt(buffer, indxInBuffer, selectors[selectorIndex]); indxInBuffer += selectors[selectorIndex]; selectorIndex++; return(result); }
public void FlushBuffer() { var encodedSize = GroupVarint.Encode(buffer, 0, bufferIndex, flushBuffer, 0); // Write data persistentStorage.WriteAll(persistentStorage.Length, flushBuffer, 0, encodedSize); totalSize += encodedSize; bufferIndex = 0; }
public PostingListAddress EndList() { if (bufferIndex > 0) { buffer[deltaSelectorIndex] = deltaSelector; var encodedSize = GroupVarint.Encode(buffer, 0, bufferIndex, flushBuffer, 0); persistentStorage.WriteAll(persistentStorage.Length, flushBuffer, 0, encodedSize); totalSize += encodedSize; } // Write length of the list persistentStorage.WriteAll(listStart + sizeof(long), BitConverter.GetBytes(totalSize), 0, sizeof(int)); var listEnd = persistentStorage.Length; if (listEnd - listStart != totalSize + sizeof(long) + sizeof(int)) { throw new InvalidOperationException(); } return(new PostingListAddress(listStart)); }
private static void ParseBufferTo(byte[] buffer, IList <Occurrence> occurrences) { var numbers = GroupVarint.Decode(buffer); var o = new Occurrence((ulong)numbers[0], (ulong)numbers[1], (ulong)numbers[2]); occurrences.Add(o); int i = 3; while (i < numbers.Count) { uint deltaSelector = (uint)numbers[i]; ++i; while (deltaSelector > 0) { int delta = (int)(deltaSelector & 0b00000011); deltaSelector >>= 2; if (i + delta > numbers.Count) { throw new Exception("Attempt to read above data"); } switch (delta) { case 0: { throw new Exception("Zero delta is not used, see comments in DeltaWriter"); } case 1: { o = Occurrence.O(o.DocumentId, o.FieldId, o.TokenId + (ulong)numbers[i]); i += 1; occurrences.Add(o); break; } case 2: { o = Occurrence.O(o.DocumentId, o.FieldId + (ulong)numbers[i], (ulong)numbers[i + 1]); i += 2; occurrences.Add(o); break; } case 3: { o = Occurrence.O(o.DocumentId + (ulong)numbers[i], (ulong)numbers[i + 1], (ulong)numbers[i + 2]); i += 3; occurrences.Add(o); break; } default: { throw new Exception("Something wrong"); } } } } }
public bool MoveNext() { while (true) { if (state == 0) { if (continuationOffset > 0) { readOffset = continuationOffset; isEof = false; dataInBuffer = 0; indxInBuffer = 0; } if (isEof) { return(false); } var buffer = new byte[HeaderLength]; persistentStorage.ReadAll(readOffset, buffer, 0, buffer.Length); continuationOffset = BitConverter.ToInt64(buffer, 0); listEndOffset = readOffset + HeaderLength + BitConverter.ToInt32(buffer, sizeof(long)); readOffset += buffer.Length; state = 1; } if (state == 1) { if (!ReadSelectors()) { throw new Exception("Wrong data"); } EnsureBuffer(selector1 + selector2 + selector3); var docId = GroupVarint.ReadInt(buffer, indxInBuffer, selector1); indxInBuffer += selector1; var fieldId = GroupVarint.ReadInt(buffer, indxInBuffer, selector2); indxInBuffer += selector2; var tokenId = GroupVarint.ReadInt(buffer, indxInBuffer, selector3); indxInBuffer += selector3; state = (isEof && indxInBuffer >= dataInBuffer) ? 0 : 4; current = Occurrence.O((ulong)docId, (ulong)fieldId, (ulong)tokenId); return(true); } if (state == 4) { EnsureBuffer(selector4); var docId = GroupVarint.ReadInt(buffer, indxInBuffer, selector4); indxInBuffer += selector4; if (!ReadSelectors()) { throw new Exception("Wrong data"); } EnsureBuffer(selector1 + selector2); var fieldId = GroupVarint.ReadInt(buffer, indxInBuffer, selector1); indxInBuffer += selector1; var tokenId = GroupVarint.ReadInt(buffer, indxInBuffer, selector2); indxInBuffer += selector2; state = (isEof && indxInBuffer >= dataInBuffer) ? 0 : 3; current = Occurrence.O((ulong)docId, (ulong)fieldId, (ulong)tokenId); return(true); } if (state == 3) { EnsureBuffer(selector3 + selector4); var docId = GroupVarint.ReadInt(buffer, indxInBuffer, selector3); indxInBuffer += selector3; var fieldId = GroupVarint.ReadInt(buffer, indxInBuffer, selector4); indxInBuffer += selector4; if (!ReadSelectors()) { throw new Exception("Wrong data"); } EnsureBuffer(selector1); var tokenId = GroupVarint.ReadInt(buffer, indxInBuffer, selector1); indxInBuffer += selector1; state = (isEof && indxInBuffer >= dataInBuffer) ? 0 : 2; current = Occurrence.O((ulong)docId, (ulong)fieldId, (ulong)tokenId); return(true); } if (state == 2) { EnsureBuffer(selector2 + selector3 + selector4); var docId = GroupVarint.ReadInt(buffer, indxInBuffer, selector2); indxInBuffer += selector2; var fieldId = GroupVarint.ReadInt(buffer, indxInBuffer, selector3); indxInBuffer += selector3; var tokenId = GroupVarint.ReadInt(buffer, indxInBuffer, selector4); indxInBuffer += selector4; state = (isEof && indxInBuffer >= dataInBuffer) ? 0 : 1; current = Occurrence.O((ulong)docId, (ulong)fieldId, (ulong)tokenId); return(true); } throw new Exception("What?"); } }
public void AddOccurrence(Occurrence occurrence) { if (first) { checked { buffer[bufferIndex++] = (int)occurrence.DocumentId; buffer[bufferIndex++] = (int)occurrence.FieldId; buffer[bufferIndex++] = (int)occurrence.TokenId; } previous = occurrence; first = false; deltaSelectorIndex = bufferIndex; bufferIndex++; } else { int n; if (previous.DocumentId == occurrence.DocumentId) { if (previous.FieldId == occurrence.FieldId) { n = 1; checked { buffer[bufferIndex++] = (int)occurrence.TokenId - (int)previous.TokenId; } // NOTE: Removed zero value as it will lead to extra trailing occurrences // because the last deltaSelector might have unsed bits, // i.e. when deltaSelectorOffset < 32 // if (previous.TokenId == occurrence.TokenId) // { // n = 0; // } // else // { // n = 1; // checked // { // buffer[bufferIndex++] = (int)occurrence.TokenId - (int)previous.TokenId; // } // } } else { n = 2; checked { buffer[bufferIndex++] = (int)occurrence.FieldId - (int)previous.FieldId; buffer[bufferIndex++] = (int)occurrence.TokenId; } } } else { n = 3; checked { buffer[bufferIndex++] = (int)occurrence.DocumentId - (int)previous.DocumentId; buffer[bufferIndex++] = (int)occurrence.FieldId; buffer[bufferIndex++] = (int)occurrence.TokenId; } } previous = occurrence; deltaSelector |= (n << deltaSelectorOffset); deltaSelectorOffset += 2; if (deltaSelectorOffset == 32) { buffer[deltaSelectorIndex] = deltaSelector; deltaSelector = 0; deltaSelectorOffset = 0; if (remainingBlocks == 1) { // Write data var toKeep = bufferIndex % 4; var toEncode = bufferIndex - toKeep; var encodedSize = GroupVarint.Encode(buffer, 0, toEncode, flushBuffer, 0); persistentStorage.WriteAll(persistentStorage.Length, flushBuffer, 0, encodedSize); totalSize += encodedSize; // Copy not encoded bytes (0,1,2,3) Array.Copy(buffer, bufferIndex - toKeep, buffer, 0, toKeep); bufferIndex = toKeep; deltaSelectorIndex = bufferIndex; bufferIndex++; remainingBlocks = BlocksInMemory; } else { // Reserve space for delta selector deltaSelectorIndex = bufferIndex; bufferIndex++; remainingBlocks--; } } } }
public PostingListBinaryDeltaWriter(IPersistentStorage storage) { this.buffer = new int[FlushThreshold * 2]; this.flushBuffer = new byte[GroupVarint.GetMaxEncodedSize(buffer.Length)]; this.persistentStorage = storage; }