/// <summary>
        /// Merges the given index into the called index. The given index gets changed and should be discarded
        /// </summary>
        public WriteableIndex Merge(WriteableIndex other)
        {
            var offset = _documents.Count;

            // add documents to list
            _documents.AddRange(other._documents);
            _documentLength.AddRange(other._documentLength);

            // add dictionary (with added offset)
            foreach (var item in other._store)
            {
                // add to dict (existing or new)
                var index   = _store.InitOrGetPosition(item.Key);
                var itemOld = _store.GetAtPosition(index);
                if (itemOld == null)
                {
                    item.Value.IncreaseDocumentIndex(offset);
                    _store.StoreAtPosition(index, item.Value);
                }
                else
                {
                    itemOld.Append(item.Value, offset);
                }
            }

            return(this);
        }
 public static void SerializeIndexToDisk(WriteableIndex index, string filePath)
 {
     using (Stream stream = File.Open(filePath, FileMode.Create))
     {
         index.Serialize(stream);
     }
 }
Ejemplo n.º 3
0
        public unsafe (WriteableIndex index, int files, int docs, long size) IndexAllParallel(IndexOptions options, string folder)
        {
            var timer = Stopwatch.StartNew();

            var files = Directory.GetFiles(folder, "*", SearchOption.AllDirectories);

            Console.WriteLine("Found files: " + files.Length + " - took: " + timer.ElapsedMilliseconds + "ms");

            timer.Restart();

            var tasks    = new List <Task <WriteableIndex> >();
            var parallel = Environment.ProcessorCount;

            var docsCount = 0;
            var fileCount = 0;
            var sizeSum   = 0L;
            var sizeLocal = 0L;

            for (var p = 0; p < parallel; p++)
            {
                var taskNumber = p;
                tasks.Add(Task.Run(() =>
                {
                    var localIndex   = new WriteableIndex(options);
                    var localStemmer = new Stemmer();
                    var localParser  = new Parser();

                    var localPart = (files.Length / parallel);
                    var from      = taskNumber * localPart;
                    var to        = taskNumber == parallel - 1 ? files.Length : from + localPart;

                    for (var i = from; i < to; i++)
                    {
                        using (var mmf = MemoryMappedFile.CreateFromFile(files[i], FileMode.Open))
                            using (var accessor = mmf.CreateViewAccessor())
                            {
                                byte *buffer = null;
                                accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref buffer);
                                var len = new FileInfo(files[i]).Length;

                                var docs = localParser.ParseFileFast(buffer, len);

                                Interlocked.Add(ref docsCount, docs.Count);
                                Interlocked.Add(ref sizeSum, len);
                                Interlocked.Add(ref sizeLocal, len);

                                IndexDocuments(localIndex, localStemmer, buffer, docs);

                                accessor.SafeMemoryMappedViewHandle.ReleasePointer();
                            }


                        if (Interlocked.Increment(ref fileCount) % 100 == 0)
                        {
                            Console.WriteLine(
                                "Finished: " + fileCount + " - "
                                + Math.Round((Interlocked.Read(ref sizeLocal) * 0.000001d)) + " mb - " +
                                +Math.Round((Interlocked.Read(ref sizeLocal) * 0.000001d) / (timer.ElapsedMilliseconds / 1000d), 2) + " mb/s");
                            timer.Restart();

                            Interlocked.Exchange(ref sizeLocal, 0);
                        }
                    }

                    Console.WriteLine("task finished: " + taskNumber);

                    return(localIndex);
                }
                                   ));
            }

            var all = Task.WhenAll(tasks).Result;

            Console.WriteLine("Index building completed. Merging indices ...");

            var master = all[0];

            var mergeTime = Stopwatch.StartNew();

            for (var i = 1; i < all.Length; i++)
            {
                master.Merge(all[i]);
            }

            mergeTime.Stop();
            Console.WriteLine("Merge complete after: " + mergeTime.ElapsedMilliseconds + " ms");

            timer.Stop();

            return(master, fileCount, docsCount, sizeSum);
        }
Ejemplo n.º 4
0
 private unsafe void IndexDocuments(WriteableIndex index, Stemmer stemmer, byte *buffer, List <(string id, int from, int length)> docs)