public void Can_persist_inverted_index_entries() { using var lmdb = new LmdbIndex(_tempDir.NewDirectory()); var builder = new Builder(); builder.AddField("title"); builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }).ConfigureAwait(false).GetAwaiter().GetResult(); Index index = builder.Build(); var firstKey = index.InvertedIndex.Keys.FirstOrDefault() ?? throw new InvalidOperationException(); Assert.NotNull(firstKey); var added = lmdb.AddInvertedIndexEntry(firstKey, index.InvertedIndex[firstKey], CancellationToken.None); Assert.True(added); var getInvertedIndexEntry = lmdb.GetInvertedIndexEntryByKey(firstKey); Assert.NotNull(getInvertedIndexEntry); var tokenSet = lmdb.IntersectTokenSets(index.TokenSet); Assert.Single(tokenSet.Edges); }
public async Task RebuildAsync(IRecordStore store, CancellationToken cancellationToken = default) { _index = await Index.Build(async builder => { var fields = new HashSet <string>(); var sw = Stopwatch.StartNew(); var count = 0UL; await foreach (var entry in store.StreamRecordsAsync(cancellationToken)) { foreach (var column in entry.Columns.Where(column => !fields.Contains(column.Name))) { builder.AddField(column.Name); fields.Add(column.Name); } var document = new Document { { "id", entry.Uuid } }; foreach (var column in entry.Columns) { document.Add(column.Name, column.Value); } await builder.Add(document); count++; } _logger?.LogInformation($"Indexing {count} documents took {sw.Elapsed.TotalMilliseconds}ms"); }); }
private DelegatedIndex CopyIndex(Lunr.Index index) { var path = _tempDir.NewDirectory(); var lmdb = new LmdbIndex(path); foreach (var field in index.Fields) { Assert.True(lmdb.AddField(field)); } foreach (var(k, v) in index.FieldVectors) { Assert.True(lmdb.AddFieldVector(k, v)); } foreach (var(k, v) in index.InvertedIndex) { Assert.True(lmdb.AddInvertedIndexEntry(k, v)); } var idx = new DelegatedIndex(lmdb, index.Pipeline); return(idx); }
public async Task BuilderBuildsInvertedIndex() { var builder = new Builder(); builder.AddField("title"); await builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }); Index index = builder.Build(); Assert.Empty(builder.InvertedIndex["test"]["title"]["id"]); Assert.IsType <Vector>(builder.FieldVectors["title/id"]); Assert.False(builder.InvertedIndex.ContainsKey("missing")); var needle = TokenSet.FromString("test"); Assert.Contains("test", builder.TokenSet.Intersect(needle).ToEnumeration()); Assert.Equal(1, builder.DocumentCount); Assert.Equal(1, builder.AverageFieldLength["title"]); Assert.NotNull(index); }
public void Can_round_trip_inverted_indexes() { var builder = new Builder(); builder.AddField("title"); builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }).ConfigureAwait(false).GetAwaiter().GetResult(); Index index = builder.Build(); var original = index.InvertedIndex; var deserialized = original.Serialize().DeserializeInvertedIndex(); AssertInvertedIndex(original, deserialized); }
public void Can_round_trip_token_set() { var builder = new Builder(); builder.AddField("title"); builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }).ConfigureAwait(false).GetAwaiter().GetResult(); Index index = builder.Build(); var original = index.TokenSet; var deserialized = original.Serialize().DeserializeTokenSet(); Assert.NotSame(original, deserialized); Assert.Equal(original.ToEnumeration(), deserialized.ToEnumeration()); }
public async Task BuilderCanIncludeTokenPositions() { Index index = await Index.Build(async builder => { builder.MetadataAllowList.Add("position"); builder.AddField("href", 3); builder.AddField("title", 2); builder.AddField("body", 1); await builder.Add(new Document { { "id", "me" }, { "href", "http://bertrandleroy.net" }, { "title", "Bertrand" }, { "body", "I am developer." } }); }); Result developer = (await index.Search("developer").ToList()).Single(); Assert.Equal(new Slice(5, 10), (Slice?)developer.MatchData.Posting["develop"]["body"]["position"].Single()); }