/// <summary> /// Merges the given index into the called index. The given index gets changed and should be discarded /// </summary> public WriteableIndex Merge(WriteableIndex other) { var offset = _documents.Count; // add documents to list _documents.AddRange(other._documents); _documentLength.AddRange(other._documentLength); // add dictionary (with added offset) foreach (var item in other._store) { // add to dict (existing or new) var index = _store.InitOrGetPosition(item.Key); var itemOld = _store.GetAtPosition(index); if (itemOld == null) { item.Value.IncreaseDocumentIndex(offset); _store.StoreAtPosition(index, item.Value); } else { itemOld.Append(item.Value, offset); } } return(this); }
public static void SerializeIndexToDisk(WriteableIndex index, string filePath) { using (Stream stream = File.Open(filePath, FileMode.Create)) { index.Serialize(stream); } }
public unsafe (WriteableIndex index, int files, int docs, long size) IndexAllParallel(IndexOptions options, string folder) { var timer = Stopwatch.StartNew(); var files = Directory.GetFiles(folder, "*", SearchOption.AllDirectories); Console.WriteLine("Found files: " + files.Length + " - took: " + timer.ElapsedMilliseconds + "ms"); timer.Restart(); var tasks = new List <Task <WriteableIndex> >(); var parallel = Environment.ProcessorCount; var docsCount = 0; var fileCount = 0; var sizeSum = 0L; var sizeLocal = 0L; for (var p = 0; p < parallel; p++) { var taskNumber = p; tasks.Add(Task.Run(() => { var localIndex = new WriteableIndex(options); var localStemmer = new Stemmer(); var localParser = new Parser(); var localPart = (files.Length / parallel); var from = taskNumber * localPart; var to = taskNumber == parallel - 1 ? files.Length : from + localPart; for (var i = from; i < to; i++) { using (var mmf = MemoryMappedFile.CreateFromFile(files[i], FileMode.Open)) using (var accessor = mmf.CreateViewAccessor()) { byte *buffer = null; accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref buffer); var len = new FileInfo(files[i]).Length; var docs = localParser.ParseFileFast(buffer, len); Interlocked.Add(ref docsCount, docs.Count); Interlocked.Add(ref sizeSum, len); Interlocked.Add(ref sizeLocal, len); IndexDocuments(localIndex, localStemmer, buffer, docs); accessor.SafeMemoryMappedViewHandle.ReleasePointer(); } if (Interlocked.Increment(ref fileCount) % 100 == 0) { Console.WriteLine( "Finished: " + fileCount + " - " + Math.Round((Interlocked.Read(ref sizeLocal) * 0.000001d)) + " mb - " + +Math.Round((Interlocked.Read(ref sizeLocal) * 0.000001d) / (timer.ElapsedMilliseconds / 1000d), 2) + " mb/s"); timer.Restart(); Interlocked.Exchange(ref sizeLocal, 0); } } Console.WriteLine("task finished: " + taskNumber); return(localIndex); } )); } var all = Task.WhenAll(tasks).Result; Console.WriteLine("Index building completed. Merging indices ..."); var master = all[0]; var mergeTime = Stopwatch.StartNew(); for (var i = 1; i < all.Length; i++) { master.Merge(all[i]); } mergeTime.Stop(); Console.WriteLine("Merge complete after: " + mergeTime.ElapsedMilliseconds + " ms"); timer.Stop(); return(master, fileCount, docsCount, sizeSum); }
private unsafe void IndexDocuments(WriteableIndex index, Stemmer stemmer, byte *buffer, List <(string id, int from, int length)> docs)