public void Flush() { if (_flushed) { return; } _flushed = true; foreach (var column in _index) { GraphBuilder.SetIdsOnAllNodes(column.Value); using (var indexStream = _sessionFactory.CreateAppendStream(Path.Combine(_sessionFactory.Dir, $"{_collectionId}.{column.Key}.ix"))) using (var columnWriter = new ColumnStreamWriter(_collectionId, column.Key, indexStream)) using (var pageIndexWriter = new PageIndexWriter(_sessionFactory.CreateAppendStream(Path.Combine(_sessionFactory.Dir, $"{_collectionId}.{column.Key}.ixtp")))) { var size = columnWriter.CreatePage(column.Value, _vectorStream, _postingsStream, pageIndexWriter); _logger.LogInformation($"serialized column {column.Key} weight {column.Value.Weight} {size}"); } } _sessionFactory.ClearPageInfo(); }
public void Can_traverse_streamed() { var model = new TextModel(); var tree = GraphBuilder.CreateTree(model, _data); using (var indexStream = new MemoryStream()) using (var vectorStream = new MemoryStream()) using (var pageStream = new MemoryStream()) { using (var writer = new ColumnStreamWriter(indexStream, keepStreamOpen: true)) { writer.CreatePage(tree, vectorStream, new PageIndexWriter(pageStream, keepStreamOpen: true)); } pageStream.Position = 0; Assert.DoesNotThrow(() => { using (var reader = new ColumnStreamReader(new PageIndexReader(pageStream), indexStream, vectorStream, _sessionFactory, _loggerFactory.CreateLogger <ColumnStreamReader>())) { foreach (var word in _data) { foreach (var queryVector in model.Tokenize(word)) { var hit = reader.ClosestMatch(queryVector, model); if (hit == null) { throw new Exception($"unable to find {word} in tree."); } if (hit.Score < model.IdenticalAngle) { throw new Exception($"unable to score {word}."); } Debug.WriteLine($"{word} matched vector in disk with {hit.Score * 100}% certainty."); } } } }); } }