public void Index <T>(ulong collectionId, IEnumerable <Document> job, IModel <T> model, int reportSize = 1000) { using (var indexSession = new IndexSession <T>(model, model)) { Index(collectionId, job, model, indexSession); using (var stream = new WritableIndexStream(collectionId, this, logger: Logger)) { stream.Write(indexSession.GetInMemoryIndex()); } } }
public void Write <T>( Document document, WriteSession writeSession, IndexSession <T> indexSession) { writeSession.Put(document); foreach (var field in document.Fields) { if (field.Value != null && field.Index) { indexSession.Put(document.Id, field.KeyId, (T)field.Value); } } }
public void Index <T>(ulong collectionId, IEnumerable <Document> job, IModel <T> model, IndexSession <T> indexSession) { LogInformation($"indexing collection {collectionId}"); var time = Stopwatch.StartNew(); using (var queue = new ProducerConsumerQueue <Document>(document => { foreach (var field in document.Fields) { if (field.Value != null && field.Index) { indexSession.Put(field.DocumentId, field.KeyId, field.Tokens); } } })) { foreach (var document in job) { foreach (var field in document.Fields) { if (field.Value != null && field.Index) { field.Analyze(model); } } queue.Enqueue(document); } } LogInformation($"processed indexing job (collection {collectionId}) in {time.Elapsed}"); }
public void Write <T>(ulong collectionId, IEnumerable <Document> job, IModel <T> model, WriteSession writeSession, IndexSession <T> indexSession, int reportSize = 1000) { LogInformation($"writing to collection {collectionId}"); var time = Stopwatch.StartNew(); var debugger = new IndexDebugger(Logger, reportSize); foreach (var document in job) { writeSession.Put(document); //Parallel.ForEach(document, kv => foreach (var field in document.Fields) { if (field.Value != null && field.Index) { indexSession.Put(document.Id, field.KeyId, (T)field.Value); } }//); debugger.Step(indexSession); } Logger.LogInformation($"processed write&index job (collection {collectionId}) in {time.Elapsed}"); }
public void Optimize <T>( string collection, HashSet <string> selectFields, IModel <T> model, int skipDocuments = 0, int takeDocuments = 0, int reportFrequency = 1000, int pageSize = 100000, bool truncateIndex = true) { var collectionId = collection.ToHash(); if (truncateIndex) { TruncateIndex(collectionId); } using (var debugger = new IndexDebugger(Logger, reportFrequency)) using (var documents = new DocumentStreamSession(this)) { using (var writeQueue = new ProducerConsumerQueue <IndexSession <T> >(indexSession => { using (var stream = new WritableIndexStream(collectionId, this, logger: Logger)) { stream.Write(indexSession.GetInMemoryIndex()); } })) { var took = 0; var skip = skipDocuments; while (took < takeDocuments) { var payload = documents.ReadDocumentVectors( collectionId, selectFields, model, skip, pageSize); var count = 0; using (var indexSession = new IndexSession <T>(model, model)) { Parallel.ForEach(payload, document => { foreach (var node in document.Nodes) { indexSession.Put(node); } Interlocked.Increment(ref count); debugger.Step(indexSession); }); //foreach (var document in payload) //{ // foreach (var node in document.Nodes) // { // indexSession.Put(node); // } // count++; // debugger.Step(indexSession); //} writeQueue.Enqueue(indexSession); } if (count == 0) { break; } took += count; skip += pageSize; } } } LogInformation($"optimized collection {collection}"); }