Esempio n. 1
0
        public void Can_search_filestreamed_with_multiple_pages()
        {
            var          model        = new BagOfCharsModel();
            const string collection   = "Can_search_streamed_with_one_page_per_document";
            var          collectionId = collection.ToHash();
            const string fieldName    = "description";

            _sessionFactory.Truncate(collectionId);

            using (var stream = new WritableIndexStream(collectionId, _sessionFactory))
                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, _sessionFactory)))
                {
                    var keyId = writeSession.EnsureKeyExists(fieldName);

                    for (long i = 0; i < _data.Length; i++)
                    {
                        var data = _data[i];

                        using (var indexSession = new IndexSession <string>(model, model))
                        {
                            var doc = new Document(new Field[] { new Field(fieldName, data, index: true, store: true) });

                            writeSession.Put(doc);
                            indexSession.Put(doc.Id, keyId, data);
                            stream.Write(indexSession.GetInMemoryIndex());
                        }
                    }
                }

            var queryParser = new QueryParser <string>(_sessionFactory, model);

            using (var searchSession = new SearchSession(_sessionFactory, model, new PostingsReader(_sessionFactory)))
            {
                Assert.DoesNotThrow(() =>
                {
                    foreach (var word in _data)
                    {
                        var query    = queryParser.Parse(collection, word, fieldName, fieldName, and: true, or: false);
                        var result   = searchSession.Search(query, 0, 1);
                        var document = result.Documents.FirstOrDefault();

                        if (document == null)
                        {
                            throw new Exception($"unable to find {word}.");
                        }

                        if (document.Score < model.IdenticalAngle)
                        {
                            throw new Exception($"unable to score {word}.");
                        }

                        Debug.WriteLine($"{word} matched with {document.Score * 100}% certainty.");
                    }
                });
            }
        }
Esempio n. 2
0
        public void Run(IDictionary <string, string> args, ILogger logger)
        {
            var        time          = Stopwatch.StartNew();
            var        dataDirectory = args["dataDirectory"];
            var        collectionId  = args["collection"].ToHash();
            var        images        = new MnistReader(args["imageFileName"], args["labelFileName"]).Read();
            VectorNode tree;
            var        debugger = new IndexDebugger(logger);
            var        model    = new LinearClassifierImageModel();

            using (var sessionFactory = new SessionFactory(dataDirectory, logger))
            {
                sessionFactory.Truncate(collectionId);

                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory)))
                    using (var indexSession = new IndexSession <IImage>(model, model))
                    {
                        var imageIndexId = writeSession.EnsureKeyExists("image");

                        foreach (var image in images)
                        {
                            var imageField = new Field("image", image.Pixels, index: true, store: true);
                            var labelField = new Field("label", image.Label, index: false, store: true);
                            var document   = new Document(new Field[] { imageField, labelField });

                            writeSession.Put(document);
                            indexSession.Put(document.Id, imageField.KeyId, image);

                            debugger.Step(indexSession);
                        }

                        var indices = indexSession.GetInMemoryIndex();

                        tree = indices[imageIndexId];

                        using (var stream = new WritableIndexStream(collectionId, sessionFactory, logger: logger))
                        {
                            stream.Write(indices);
                        }
                    }
            }

            logger.LogInformation($"indexed {debugger.Steps} mnist images in {time.Elapsed}");

            Print(tree);
        }