private async Task <DataFileCommitInfo> CommitIfRequired() { if (_nonPersistedQ.Count < _batchSize) { return(null); } await _commitLock.WaitAsync().ConfigureAwait(false); try { if (_nonPersistedQ.Count < _batchSize) { return(null); } using (Measured.Operation("datafile_commit")) { return(await Commit().ConfigureAwait(false)); } } finally { _commitLock.Release(); } }
public async Task AddTerm(string term, long dataFileOffset) { using (Measured.Operation("add_term_to_in_memory_ix")) { AddTermToInMemoryIndex(term, dataFileOffset); } await CommitIfHighMemoryPressure(); }
public IEnumerable <Document> Filter(SearchQuery query, IEnumerable <Document> docs) { using (Measured.Operation("grep_filter")) { foreach (var matchingDoc in docs.Where(doc => IsMatch(query, doc))) { yield return(matchingDoc); } } }
private IEnumerable <long> SearchIndexFile(string targetTerm, string indexFilePath) { var comparer = StringComparer.OrdinalIgnoreCase; using (Measured.Operation("search_index_file")) using (var ixFile = new FileStream(indexFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { // roughly navigate to the position in the index file where the term could be var preliminaryPosition = SearchIndexFileFooter(ixFile, targetTerm, comparer, out var footerPos); if (preliminaryPosition == null) { yield break; } ixFile.Position = preliminaryPosition.Value; while (ixFile.Position != footerPos) { var nextTermOffset = ixFile.ReadLong(); var termLengthInBytes = ixFile.ReadInt(); var term = ixFile.ReadUtf8String(termLengthInBytes); var cmpResult = comparer.Compare(term, targetTerm); if (cmpResult > 0) { // We've got past the place where the target term could have been in the index // (we conclude this because terms are sorted alphabetically) break; } else if (cmpResult < 0) { ixFile.Position = nextTermOffset; continue; } else { var postingListLength = ixFile.ReadInt(); for (int i = 0; i < postingListLength; i++) { var entry = ixFile.ReadLong(); yield return(entry); } break; } } } }
private async Task TruncateByDocId(long docId) { using (Measured.Operation("wal_truncate")) { _file.Position = _file.Length; while (_file.Position != 0) { var batch = await DocumentSerializer.DeserializeBatchFromRightToLeft(_file); var firstDocIdInBatch = batch[0].Id; if (docId >= firstDocIdInBatch) { await TruncateByFileOffset(_file.Position); break; } } } }
public async Task CommitTran(Tran tran) { await _commitLock.WaitAsync().ConfigureAwait(false); try { using (Measured.Operation("wal_commit")) { foreach (var doc in tran.Documents) { doc.Id = _docIdGenerator.GetNextId(); } await DocumentSerializer.SerializeBatch(tran.Documents, _file).ConfigureAwait(false); await _file.FlushAsync().ConfigureAwait(false); } } finally { _commitLock.Release(); } }
private async Task CommitInternal() { using (Measured.Operation("commit_index_file")) { // detaching old in memory index so that all new updates will go to the new // in memory index while the old one is being committed to disk var newInMemoryIx = new InMemoryIndex(); var oldInMemoryIx = Interlocked.Exchange(ref _inMemoryIx, newInMemoryIx); Interlocked.Exchange(ref _memoryPressureScore, 0); if (oldInMemoryIx.IsEmpty) { // nothing to commit return; } /* * Index file structure: * * <file offset (in bytes) of footer (long)> * <main part> * <footer> * * Footer includes sqrt(N) sorted terms with their position in the main part and allows for faster seeks in O(2*sqrt(N)) */ /* * <main part> structure: * <file offset (in bytes) of term 2> <term 1 length in bytes> <term 1 UTF-8 bytes> <number of items in the posting list 1> <item 1 of the posting list 1> ... <item N of the posting list 1> * <file offset (in bytes) of term 3> <term 2 length in bytes> <term 2 UTF-8 bytes> <number of items in the posting list 2> <item 2 of the posting list 2> ... <item N of the posting list 2> * ... * (this goes on for all terms in the index) */ /* * <footer> structure: * (N - total number of terms in the index file, K = sqrt(N)) * * <term 1 length in bytes> <term 1 UTF-8 bytes> <term 1 position in main index part> * <term K length in bytes> <term K UTF-8 bytes> <term K position in main index part> * <term 2K length in bytes> <term 2K UTF-8 bytes> <term 2K position in main index part> * ... * <last term length in bytes> <last term UTF-8 bytes> <last term position in main index part> * * (note that 1st and last terms are always included) */ var footeringFactor = (int)Math.Sqrt(oldInMemoryIx.Count); var footerItems = new List <KeyValuePair <string, long> >(footeringFactor + 1); var i = 0; using (var newIxFile = OpenNextIndexFileForWrite()) { newIxFile.Position += sizeof(long); foreach (var termWithPostingList in oldInMemoryIx.OrderBy(x => x.Key)) { var isLastItem = i == oldInMemoryIx.Count - 1; if (i % footeringFactor == 0 || isLastItem) { footerItems.Add(new KeyValuePair <string, long>(termWithPostingList.Key, newIxFile.Position)); } await WriteTermAndPostingList(newIxFile, termWithPostingList.Key, termWithPostingList.Value); ++i; } var footerStartPosition = newIxFile.Position; await WriteIndexFooter(newIxFile, footerItems); newIxFile.Position = 0; await newIxFile.WriteLongAsync(footerStartPosition); await newIxFile.FlushAsync(); } } }