Exemple #1
0
        public void Flush()
        {
            if (_flushed)
            {
                return;
            }

            _flushed = true;

            foreach (var column in _index)
            {
                GraphBuilder.SetIdsOnAllNodes(column.Value);

                using (var indexStream = _sessionFactory.CreateAppendStream(Path.Combine(_sessionFactory.Dir, $"{_collectionId}.{column.Key}.ix")))
                    using (var columnWriter = new ColumnStreamWriter(_collectionId, column.Key, indexStream))
                        using (var pageIndexWriter = new PageIndexWriter(_sessionFactory.CreateAppendStream(Path.Combine(_sessionFactory.Dir, $"{_collectionId}.{column.Key}.ixtp"))))
                        {
                            var size = columnWriter.CreatePage(column.Value, _vectorStream, _postingsStream, pageIndexWriter);

                            _logger.LogInformation($"serialized column {column.Key} weight {column.Value.Weight} {size}");
                        }
            }

            _sessionFactory.ClearPageInfo();
        }
Exemple #2
0
        public void Can_traverse_streamed()
        {
            var model = new TextModel();
            var tree  = GraphBuilder.CreateTree(model, _data);

            using (var indexStream = new MemoryStream())
                using (var vectorStream = new MemoryStream())
                    using (var pageStream = new MemoryStream())
                    {
                        using (var writer = new ColumnStreamWriter(indexStream, keepStreamOpen: true))
                        {
                            writer.CreatePage(tree, vectorStream, new PageIndexWriter(pageStream, keepStreamOpen: true));
                        }

                        pageStream.Position = 0;

                        Assert.DoesNotThrow(() =>
                        {
                            using (var reader = new ColumnStreamReader(new PageIndexReader(pageStream), indexStream, vectorStream, _sessionFactory, _loggerFactory.CreateLogger <ColumnStreamReader>()))
                            {
                                foreach (var word in _data)
                                {
                                    foreach (var queryVector in model.Tokenize(word))
                                    {
                                        var hit = reader.ClosestMatch(queryVector, model);

                                        if (hit == null)
                                        {
                                            throw new Exception($"unable to find {word} in tree.");
                                        }

                                        if (hit.Score < model.IdenticalAngle)
                                        {
                                            throw new Exception($"unable to score {word}.");
                                        }

                                        Debug.WriteLine($"{word} matched vector in disk with {hit.Score * 100}% certainty.");
                                    }
                                }
                            }
                        });
                    }
        }