public void ChainedRangeReadWriteTest() { var stream = new MemoryStream(); var chain = new ListChain <DocumentId>() { new RangePostingsList() { 0, 1, 2 }, GetDocIds(10, 11), new RangePostingsList() { 12, 13, 14, 15, 100, 111 }, }; using var writer = new PostingsListWriter(stream); writer.Write(chain); stream.Seek(0, SeekOrigin.Begin); using var reader = new PostingsListReader(stream, leaveOpen: false); var count = reader.ReadCount(0); var deserialized = reader.Read(0); Assert.Equal(chain.Count, count); Assert.True(deserialized is RangePostingsList); Assert.Equal(chain, deserialized); }
public ExternalIndex <T> Build() { var composer = new ExternalIndexComposer <T>(stream); var indexInfoList = indexManager.GetIndices(); var indices = indexManager.GetIndices().Where(i => i.GetCount() > 0).ToArray(); AddAllDocs(composer, indices); var minHeapComparer = Comparer <IEnumerator <(T Term, IReadOnlyCollection <DocumentId> PostingsList)> > .Create( (x, y) => ComparePostingLists(y.Current, x.Current)); var queue = new PriorityQueue <IEnumerator <(T Term, IReadOnlyCollection <DocumentId> PostingsList)> >( indices.Length, minHeapComparer); foreach (var index in indices) { var enumerator = ReadIndex(index); if (enumerator.MoveNext()) { queue.Push(enumerator); } } var docs = new ListChain <DocumentId>(indices.Length); T currentTerm = default !;
private void WriteChainedVarint(ListChain <DocumentId> chain) { var varint = new VarintPostingsList(32); foreach (var id in chain) { varint.Add(id); } WriteVarint(varint); }
public void ListChainCreateTest() { var chain = new ListChain <int> { new[] { 4, 5, 6, 7 }, new[] { 1, 2 }, new[] { 3 } }; Assert.Equal(7, chain.Count); Assert.Equal(new[] { 4, 5, 6, 7, 1, 2, 3 }, chain.ToArray()); Assert.Equal(3, chain.Chains.Count); }
public void ListChainClearTest() { var chain = new ListChain <int> { new[] { 4, 5, 6, 7 }, new[] { 1, 2 }, new[] { 3 } }; chain.Clear(); Assert.Empty(chain); Assert.Equal(Array.Empty <int>(), chain.ToArray()); }
public void ListChainSortTest() { var chain = new ListChain <int> { new[] { 4, 5, 6, 7 }, new[] { 1, 2 }, new[] { 3 } }; chain.Sort((a, b) => a.First().CompareTo(b.First())); Assert.Equal(7, chain.Count); Assert.Equal(new[] { 1, 2, 3, 4, 5, 6, 7 }, chain.ToArray()); }
private static PostingsListType DetectType(ListChain <DocumentId> chain) { if (chain.Chains.Count > 0 && chain.Chains[0] is VarintPostingsList) { return(PostingsListType.Varint); } if (chain.Chains.Count > 0 && chain.Chains[0] is RangePostingsList) { return(PostingsListType.Ranged); } if (chain.Chains.Count > 1 && chain.Chains[1] is RangePostingsList) { return(PostingsListType.Ranged); } return(PostingsListType.Uncompressed); }
/// <summary> /// This complicated logis below was implemented for the sake of performance only. /// We can always process ListChain as uncompressed list of Ids. /// </summary> /// <param name="chain">List of chains</param> private void WriteChained(ListChain <DocumentId> chain) { switch (DetectType(chain)) { case PostingsListType.Ranged: writer.Write((byte)PostingsListType.Ranged); WriteChainedRanges(chain); break; case PostingsListType.Varint: writer.Write((byte)PostingsListType.Varint); WriteChainedVarint(chain); break; default: writer.Write((byte)PostingsListType.Uncompressed); WriteUncompressed(chain); break; } }
private void WriteChainedRanges(ListChain <DocumentId> chain) { var start = stream.Position; int numBlocks = 0; writer.Write(numBlocks); // we do not know a number of block at the moment foreach (var c in chain.Chains) { var range = GetRange(c); foreach (var v in range.Ranges) { writer.Write(v); ++numBlocks; } } var finish = stream.Position; stream.Seek(start, SeekOrigin.Begin); writer.Write(numBlocks); // write correct number of blocks stream.Seek(finish, SeekOrigin.Begin); // go back }