public void ChainedRangeReadWriteTest()
        {
            var stream = new MemoryStream();

            var chain = new ListChain <DocumentId>()
            {
                new RangePostingsList()
                {
                    0, 1, 2
                },
                GetDocIds(10, 11),
                new RangePostingsList()
                {
                    12, 13, 14, 15, 100, 111
                },
            };

            using var writer = new PostingsListWriter(stream);
            writer.Write(chain);

            stream.Seek(0, SeekOrigin.Begin);

            using var reader = new PostingsListReader(stream, leaveOpen: false);
            var count        = reader.ReadCount(0);
            var deserialized = reader.Read(0);

            Assert.Equal(chain.Count, count);
            Assert.True(deserialized is RangePostingsList);
            Assert.Equal(chain, deserialized);
        }
        public ExternalIndex <T> Build()
        {
            var composer = new ExternalIndexComposer <T>(stream);

            var indexInfoList = indexManager.GetIndices();
            var indices       = indexManager.GetIndices().Where(i => i.GetCount() > 0).ToArray();

            AddAllDocs(composer, indices);

            var minHeapComparer = Comparer <IEnumerator <(T Term, IReadOnlyCollection <DocumentId> PostingsList)> > .Create(
                (x, y) => ComparePostingLists(y.Current, x.Current));

            var queue = new PriorityQueue <IEnumerator <(T Term, IReadOnlyCollection <DocumentId> PostingsList)> >(
                indices.Length,
                minHeapComparer);

            foreach (var index in indices)
            {
                var enumerator = ReadIndex(index);
                if (enumerator.MoveNext())
                {
                    queue.Push(enumerator);
                }
            }

            var docs = new ListChain <DocumentId>(indices.Length);

            T currentTerm = default !;
        private void WriteChainedVarint(ListChain <DocumentId> chain)
        {
            var varint = new VarintPostingsList(32);

            foreach (var id in chain)
            {
                varint.Add(id);
            }

            WriteVarint(varint);
        }
Ejemplo n.º 4
0
        public void ListChainCreateTest()
        {
            var chain = new ListChain <int>
            {
                new[] { 4, 5, 6, 7 },
                new[] { 1, 2 },
                new[] { 3 }
            };

            Assert.Equal(7, chain.Count);
            Assert.Equal(new[] { 4, 5, 6, 7, 1, 2, 3 }, chain.ToArray());
            Assert.Equal(3, chain.Chains.Count);
        }
Ejemplo n.º 5
0
        public void ListChainClearTest()
        {
            var chain = new ListChain <int>
            {
                new[] { 4, 5, 6, 7 },
                new[] { 1, 2 },
                new[] { 3 }
            };

            chain.Clear();

            Assert.Empty(chain);
            Assert.Equal(Array.Empty <int>(), chain.ToArray());
        }
Ejemplo n.º 6
0
        public void ListChainSortTest()
        {
            var chain = new ListChain <int>
            {
                new[] { 4, 5, 6, 7 },
                new[] { 1, 2 },
                new[] { 3 }
            };

            chain.Sort((a, b) => a.First().CompareTo(b.First()));

            Assert.Equal(7, chain.Count);
            Assert.Equal(new[] { 1, 2, 3, 4, 5, 6, 7 }, chain.ToArray());
        }
        private static PostingsListType DetectType(ListChain <DocumentId> chain)
        {
            if (chain.Chains.Count > 0 && chain.Chains[0] is VarintPostingsList)
            {
                return(PostingsListType.Varint);
            }

            if (chain.Chains.Count > 0 && chain.Chains[0] is RangePostingsList)
            {
                return(PostingsListType.Ranged);
            }

            if (chain.Chains.Count > 1 && chain.Chains[1] is RangePostingsList)
            {
                return(PostingsListType.Ranged);
            }

            return(PostingsListType.Uncompressed);
        }
        /// <summary>
        /// This complicated logis below was implemented for the sake of performance only.
        /// We can always process ListChain as uncompressed list of Ids.
        /// </summary>
        /// <param name="chain">List of chains</param>
        private void WriteChained(ListChain <DocumentId> chain)
        {
            switch (DetectType(chain))
            {
            case PostingsListType.Ranged:
                writer.Write((byte)PostingsListType.Ranged);
                WriteChainedRanges(chain);
                break;

            case PostingsListType.Varint:
                writer.Write((byte)PostingsListType.Varint);
                WriteChainedVarint(chain);
                break;

            default:
                writer.Write((byte)PostingsListType.Uncompressed);
                WriteUncompressed(chain);
                break;
            }
        }
        private void WriteChainedRanges(ListChain <DocumentId> chain)
        {
            var start     = stream.Position;
            int numBlocks = 0;

            writer.Write(numBlocks);  // we do not know a number of block at the moment

            foreach (var c in chain.Chains)
            {
                var range = GetRange(c);
                foreach (var v in range.Ranges)
                {
                    writer.Write(v);
                    ++numBlocks;
                }
            }

            var finish = stream.Position;

            stream.Seek(start, SeekOrigin.Begin);
            writer.Write(numBlocks);                // write correct number of blocks
            stream.Seek(finish, SeekOrigin.Begin);  // go back
        }