public void Can_persist_inverted_index_entries()
        {
            using var lmdb = new LmdbIndex(_tempDir.NewDirectory());

            var builder = new Builder();

            builder.AddField("title");
            builder.Add(new Document
            {
                { "id", "id" },
                { "title", "test" },
                { "body", "missing" }
            }).ConfigureAwait(false).GetAwaiter().GetResult();
            Index index = builder.Build();

            var firstKey = index.InvertedIndex.Keys.FirstOrDefault() ?? throw new InvalidOperationException();

            Assert.NotNull(firstKey);

            var added = lmdb.AddInvertedIndexEntry(firstKey, index.InvertedIndex[firstKey], CancellationToken.None);

            Assert.True(added);

            var getInvertedIndexEntry = lmdb.GetInvertedIndexEntryByKey(firstKey);

            Assert.NotNull(getInvertedIndexEntry);

            var tokenSet = lmdb.IntersectTokenSets(index.TokenSet);

            Assert.Single(tokenSet.Edges);
        }
        public async Task RebuildAsync(IRecordStore store, CancellationToken cancellationToken = default)
        {
            _index = await Index.Build(async builder =>
            {
                var fields = new HashSet <string>();
                var sw     = Stopwatch.StartNew();
                var count  = 0UL;

                await foreach (var entry in store.StreamRecordsAsync(cancellationToken))
                {
                    foreach (var column in entry.Columns.Where(column => !fields.Contains(column.Name)))
                    {
                        builder.AddField(column.Name);
                        fields.Add(column.Name);
                    }

                    var document = new Document {
                        { "id", entry.Uuid }
                    };
                    foreach (var column in entry.Columns)
                    {
                        document.Add(column.Name, column.Value);
                    }

                    await builder.Add(document);
                    count++;
                }

                _logger?.LogInformation($"Indexing {count} documents took {sw.Elapsed.TotalMilliseconds}ms");
            });
        }
Example #3
0
        private DelegatedIndex CopyIndex(Lunr.Index index)
        {
            var path = _tempDir.NewDirectory();

            var lmdb = new LmdbIndex(path);

            foreach (var field in index.Fields)
            {
                Assert.True(lmdb.AddField(field));
            }

            foreach (var(k, v) in index.FieldVectors)
            {
                Assert.True(lmdb.AddFieldVector(k, v));
            }

            foreach (var(k, v) in index.InvertedIndex)
            {
                Assert.True(lmdb.AddInvertedIndexEntry(k, v));
            }

            var idx = new DelegatedIndex(lmdb, index.Pipeline);

            return(idx);
        }
Example #4
0
        public async Task BuilderBuildsInvertedIndex()
        {
            var builder = new Builder();

            builder.AddField("title");
            await builder.Add(new Document
            {
                { "id", "id" },
                { "title", "test" },
                { "body", "missing" }
            });

            Index index = builder.Build();

            Assert.Empty(builder.InvertedIndex["test"]["title"]["id"]);

            Assert.IsType <Vector>(builder.FieldVectors["title/id"]);

            Assert.False(builder.InvertedIndex.ContainsKey("missing"));

            var needle = TokenSet.FromString("test");

            Assert.Contains("test", builder.TokenSet.Intersect(needle).ToEnumeration());

            Assert.Equal(1, builder.DocumentCount);

            Assert.Equal(1, builder.AverageFieldLength["title"]);

            Assert.NotNull(index);
        }
        public void Can_round_trip_inverted_indexes()
        {
            var builder = new Builder();

            builder.AddField("title");
            builder.Add(new Document
            {
                { "id", "id" },
                { "title", "test" },
                { "body", "missing" }
            }).ConfigureAwait(false).GetAwaiter().GetResult();
            Index index = builder.Build();

            var original     = index.InvertedIndex;
            var deserialized = original.Serialize().DeserializeInvertedIndex();

            AssertInvertedIndex(original, deserialized);
        }
        public void Can_round_trip_token_set()
        {
            var builder = new Builder();

            builder.AddField("title");
            builder.Add(new Document
            {
                { "id", "id" },
                { "title", "test" },
                { "body", "missing" }
            }).ConfigureAwait(false).GetAwaiter().GetResult();
            Index index = builder.Build();

            var original     = index.TokenSet;
            var deserialized = original.Serialize().DeserializeTokenSet();

            Assert.NotSame(original, deserialized);
            Assert.Equal(original.ToEnumeration(), deserialized.ToEnumeration());
        }
Example #7
0
        public async Task BuilderCanIncludeTokenPositions()
        {
            Index index = await Index.Build(async builder =>
            {
                builder.MetadataAllowList.Add("position");
                builder.AddField("href", 3);
                builder.AddField("title", 2);
                builder.AddField("body", 1);

                await builder.Add(new Document
                {
                    { "id", "me" },
                    { "href", "http://bertrandleroy.net" },
                    { "title", "Bertrand" },
                    { "body", "I am developer." }
                });
            });

            Result developer = (await index.Search("developer").ToList()).Single();

            Assert.Equal(new Slice(5, 10), (Slice?)developer.MatchData.Posting["develop"]["body"]["position"].Single());
        }