Example #1
0
        public void Index <T>(ulong collectionId, IEnumerable <Document> job, IModel <T> model, int reportSize = 1000)
        {
            using (var indexSession = new IndexSession <T>(model, model))
            {
                Index(collectionId, job, model, indexSession);

                using (var stream = new WritableIndexStream(collectionId, this, logger: Logger))
                {
                    stream.Write(indexSession.GetInMemoryIndex());
                }
            }
        }
Example #2
0
        public void Write <T>(
            Document document,
            WriteSession writeSession,
            IndexSession <T> indexSession)
        {
            writeSession.Put(document);

            foreach (var field in document.Fields)
            {
                if (field.Value != null && field.Index)
                {
                    indexSession.Put(document.Id, field.KeyId, (T)field.Value);
                }
            }
        }
Example #3
0
        public void Index <T>(ulong collectionId, IEnumerable <Document> job, IModel <T> model, IndexSession <T> indexSession)
        {
            LogInformation($"indexing collection {collectionId}");

            var time = Stopwatch.StartNew();

            using (var queue = new ProducerConsumerQueue <Document>(document =>
            {
                foreach (var field in document.Fields)
                {
                    if (field.Value != null && field.Index)
                    {
                        indexSession.Put(field.DocumentId, field.KeyId, field.Tokens);
                    }
                }
            }))
            {
                foreach (var document in job)
                {
                    foreach (var field in document.Fields)
                    {
                        if (field.Value != null && field.Index)
                        {
                            field.Analyze(model);
                        }
                    }

                    queue.Enqueue(document);
                }
            }

            LogInformation($"processed indexing job (collection {collectionId}) in {time.Elapsed}");
        }
Example #4
0
        public void Write <T>(ulong collectionId, IEnumerable <Document> job, IModel <T> model, WriteSession writeSession, IndexSession <T> indexSession, int reportSize = 1000)
        {
            LogInformation($"writing to collection {collectionId}");

            var time     = Stopwatch.StartNew();
            var debugger = new IndexDebugger(Logger, reportSize);

            foreach (var document in job)
            {
                writeSession.Put(document);

                //Parallel.ForEach(document, kv =>
                foreach (var field in document.Fields)
                {
                    if (field.Value != null && field.Index)
                    {
                        indexSession.Put(document.Id, field.KeyId, (T)field.Value);
                    }
                }//);

                debugger.Step(indexSession);
            }

            Logger.LogInformation($"processed write&index job (collection {collectionId}) in {time.Elapsed}");
        }
Example #5
0
        public void Optimize <T>(
            string collection,
            HashSet <string> selectFields,
            IModel <T> model,
            int skipDocuments   = 0,
            int takeDocuments   = 0,
            int reportFrequency = 1000,
            int pageSize        = 100000,
            bool truncateIndex  = true)
        {
            var collectionId = collection.ToHash();

            if (truncateIndex)
            {
                TruncateIndex(collectionId);
            }

            using (var debugger = new IndexDebugger(Logger, reportFrequency))
                using (var documents = new DocumentStreamSession(this))
                {
                    using (var writeQueue = new ProducerConsumerQueue <IndexSession <T> >(indexSession =>
                    {
                        using (var stream = new WritableIndexStream(collectionId, this, logger: Logger))
                        {
                            stream.Write(indexSession.GetInMemoryIndex());
                        }
                    }))
                    {
                        var took = 0;
                        var skip = skipDocuments;

                        while (took < takeDocuments)
                        {
                            var payload = documents.ReadDocumentVectors(
                                collectionId,
                                selectFields,
                                model,
                                skip,
                                pageSize);

                            var count = 0;

                            using (var indexSession = new IndexSession <T>(model, model))
                            {
                                Parallel.ForEach(payload, document =>
                                {
                                    foreach (var node in document.Nodes)
                                    {
                                        indexSession.Put(node);
                                    }

                                    Interlocked.Increment(ref count);

                                    debugger.Step(indexSession);
                                });
                                //foreach (var document in payload)
                                //{
                                //    foreach (var node in document.Nodes)
                                //    {
                                //        indexSession.Put(node);
                                //    }

                                //    count++;

                                //    debugger.Step(indexSession);
                                //}

                                writeQueue.Enqueue(indexSession);
                            }

                            if (count == 0)
                            {
                                break;
                            }

                            took += count;
                            skip += pageSize;
                        }
                    }
                }

            LogInformation($"optimized collection {collection}");
        }