public void Run(IDictionary <string, string> args, ILogger logger) { var time = Stopwatch.StartNew(); var dataDirectory = args["dataDirectory"]; var collectionId = args["collection"].ToHash(); var images = new MnistReader(args["imageFileName"], args["labelFileName"]).Read(); VectorNode tree; var debugger = new IndexDebugger(logger); var model = new LinearClassifierImageModel(); using (var sessionFactory = new SessionFactory(dataDirectory, logger)) { sessionFactory.Truncate(collectionId); using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory))) using (var indexSession = new IndexSession <IImage>(model, model)) { var imageIndexId = writeSession.EnsureKeyExists("image"); foreach (var image in images) { var imageField = new Field("image", image.Pixels, index: true, store: true); var labelField = new Field("label", image.Label, index: false, store: true); var document = new Document(new Field[] { imageField, labelField }); writeSession.Put(document); indexSession.Put(document.Id, imageField.KeyId, image); debugger.Step(indexSession); } var indices = indexSession.GetInMemoryIndex(); tree = indices[imageIndexId]; using (var stream = new WritableIndexStream(collectionId, sessionFactory, logger: logger)) { stream.Write(indices); } } } logger.LogInformation($"indexed {debugger.Steps} mnist images in {time.Elapsed}"); Print(tree); }
public void Run(IDictionary <string, string> args, ILogger logger) { var time = Stopwatch.StartNew(); var dataDirectory = args["dataDirectory"]; var images = new MnistReader(args["imageFileName"], args["labelFileName"]).Read(); var collection = args["collection"]; var count = 0; var errors = 0; var model = new LinearClassifierImageModel(); using (var sessionFactory = new SessionFactory(directory: dataDirectory, logger: logger)) using (var querySession = sessionFactory.CreateSearchSession(model)) { var queryParser = new QueryParser <IImage>(sessionFactory, model, logger); foreach (var image in images) { var query = queryParser.Parse(collection, image, field: "image", select: "label", and: true, or: false); var result = querySession.Search(query, 0, 1); count++; if (result.Total == 0) { errors++; } else { var documentLabel = (string)result.Documents.First().Get("label").Value; if (!documentLabel.Equals(image.Label)) { errors++; logger.LogDebug($"error. label: {image.Label} document label: {documentLabel}\n{((MnistImage)image).Print()}\n{((MnistImage)image).Print()}"); } } logger.LogInformation($"errors: {errors}. total tests {count}. error rate: {(float)errors / count * 100}%"); } } logger.LogInformation($"tested {count} mnist images in {time.Elapsed}"); }
public void Can_train_in_memory() { var model = new LinearClassifierImageModel(); var tree = model.CreateTree(model, _data); Print(tree); Assert.DoesNotThrow(() => { var count = 0; var errors = 0; foreach (var word in _data) { foreach (var queryVector in model.Tokenize(word)) { var hit = PathFinder.ClosestMatch(tree, queryVector, model); if (hit == null) { throw new Exception($"unable to find {word} in tree."); } if (!hit.Node.Vector.Label.Equals(word.Label)) { errors++; } Debug.WriteLine($"{word} matched with {hit.Node.Vector.Label} with {hit.Score * 100}% certainty."); count++; } } var errorRate = (float)errors / count; if (errorRate > 0) { throw new Exception($"error rate: {errorRate * 100}%. too many errors."); } Debug.WriteLine($"error rate: {errorRate}"); }); }