public void Can_search_filestreamed_with_multiple_pages() { var model = new BagOfCharsModel(); const string collection = "Can_search_streamed_with_one_page_per_document"; var collectionId = collection.ToHash(); const string fieldName = "description"; _sessionFactory.Truncate(collectionId); using (var stream = new WritableIndexStream(collectionId, _sessionFactory)) using (var writeSession = new WriteSession(new DocumentWriter(collectionId, _sessionFactory))) { var keyId = writeSession.EnsureKeyExists(fieldName); for (long i = 0; i < _data.Length; i++) { var data = _data[i]; using (var indexSession = new IndexSession <string>(model, model)) { var doc = new Document(new Field[] { new Field(fieldName, data, index: true, store: true) }); writeSession.Put(doc); indexSession.Put(doc.Id, keyId, data); stream.Write(indexSession.GetInMemoryIndex()); } } } var queryParser = new QueryParser <string>(_sessionFactory, model); using (var searchSession = new SearchSession(_sessionFactory, model, new PostingsReader(_sessionFactory))) { Assert.DoesNotThrow(() => { foreach (var word in _data) { var query = queryParser.Parse(collection, word, fieldName, fieldName, and: true, or: false); var result = searchSession.Search(query, 0, 1); var document = result.Documents.FirstOrDefault(); if (document == null) { throw new Exception($"unable to find {word}."); } if (document.Score < model.IdenticalAngle) { throw new Exception($"unable to score {word}."); } Debug.WriteLine($"{word} matched with {document.Score * 100}% certainty."); } }); } }
public void Run(IDictionary <string, string> args, ILogger logger) { var time = Stopwatch.StartNew(); var dataDirectory = args["dataDirectory"]; var collectionId = args["collection"].ToHash(); var images = new MnistReader(args["imageFileName"], args["labelFileName"]).Read(); VectorNode tree; var debugger = new IndexDebugger(logger); var model = new LinearClassifierImageModel(); using (var sessionFactory = new SessionFactory(dataDirectory, logger)) { sessionFactory.Truncate(collectionId); using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory))) using (var indexSession = new IndexSession <IImage>(model, model)) { var imageIndexId = writeSession.EnsureKeyExists("image"); foreach (var image in images) { var imageField = new Field("image", image.Pixels, index: true, store: true); var labelField = new Field("label", image.Label, index: false, store: true); var document = new Document(new Field[] { imageField, labelField }); writeSession.Put(document); indexSession.Put(document.Id, imageField.KeyId, image); debugger.Step(indexSession); } var indices = indexSession.GetInMemoryIndex(); tree = indices[imageIndexId]; using (var stream = new WritableIndexStream(collectionId, sessionFactory, logger: logger)) { stream.Write(indices); } } } logger.LogInformation($"indexed {debugger.Steps} mnist images in {time.Elapsed}"); Print(tree); }