Пример #1
0
        private async Task <DataFileCommitInfo> CommitIfRequired()
        {
            if (_nonPersistedQ.Count < _batchSize)
            {
                return(null);
            }

            await _commitLock.WaitAsync().ConfigureAwait(false);

            try
            {
                if (_nonPersistedQ.Count < _batchSize)
                {
                    return(null);
                }

                using (Measured.Operation("datafile_commit"))
                {
                    return(await Commit().ConfigureAwait(false));
                }
            }
            finally
            {
                _commitLock.Release();
            }
        }
Пример #2
0
        public async Task AddTerm(string term, long dataFileOffset)
        {
            using (Measured.Operation("add_term_to_in_memory_ix"))
            {
                AddTermToInMemoryIndex(term, dataFileOffset);
            }

            await CommitIfHighMemoryPressure();
        }
Пример #3
0
 public IEnumerable <Document> Filter(SearchQuery query, IEnumerable <Document> docs)
 {
     using (Measured.Operation("grep_filter"))
     {
         foreach (var matchingDoc in docs.Where(doc => IsMatch(query, doc)))
         {
             yield return(matchingDoc);
         }
     }
 }
Пример #4
0
        private IEnumerable <long> SearchIndexFile(string targetTerm, string indexFilePath)
        {
            var comparer = StringComparer.OrdinalIgnoreCase;

            using (Measured.Operation("search_index_file"))
                using (var ixFile = new FileStream(indexFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                {
                    // roughly navigate to the position in the index file where the term could be
                    var preliminaryPosition = SearchIndexFileFooter(ixFile, targetTerm, comparer, out var footerPos);
                    if (preliminaryPosition == null)
                    {
                        yield break;
                    }

                    ixFile.Position = preliminaryPosition.Value;

                    while (ixFile.Position != footerPos)
                    {
                        var nextTermOffset = ixFile.ReadLong();

                        var termLengthInBytes = ixFile.ReadInt();
                        var term      = ixFile.ReadUtf8String(termLengthInBytes);
                        var cmpResult = comparer.Compare(term, targetTerm);

                        if (cmpResult > 0)
                        {
                            // We've got past the place where the target term could have been in the index
                            // (we conclude this because terms are sorted alphabetically)
                            break;
                        }
                        else if (cmpResult < 0)
                        {
                            ixFile.Position = nextTermOffset;
                            continue;
                        }
                        else
                        {
                            var postingListLength = ixFile.ReadInt();

                            for (int i = 0; i < postingListLength; i++)
                            {
                                var entry = ixFile.ReadLong();
                                yield return(entry);
                            }

                            break;
                        }
                    }
                }
        }
Пример #5
0
        private async Task TruncateByDocId(long docId)
        {
            using (Measured.Operation("wal_truncate"))
            {
                _file.Position = _file.Length;

                while (_file.Position != 0)
                {
                    var batch = await DocumentSerializer.DeserializeBatchFromRightToLeft(_file);

                    var firstDocIdInBatch = batch[0].Id;

                    if (docId >= firstDocIdInBatch)
                    {
                        await TruncateByFileOffset(_file.Position);

                        break;
                    }
                }
            }
        }
Пример #6
0
        public async Task CommitTran(Tran tran)
        {
            await _commitLock.WaitAsync().ConfigureAwait(false);

            try
            {
                using (Measured.Operation("wal_commit"))
                {
                    foreach (var doc in tran.Documents)
                    {
                        doc.Id = _docIdGenerator.GetNextId();
                    }

                    await DocumentSerializer.SerializeBatch(tran.Documents, _file).ConfigureAwait(false);

                    await _file.FlushAsync().ConfigureAwait(false);
                }
            }
            finally
            {
                _commitLock.Release();
            }
        }
Пример #7
0
        private async Task CommitInternal()
        {
            using (Measured.Operation("commit_index_file"))
            {
                // detaching old in memory index so that all new updates will go to the new
                // in memory index while the old one is being committed to disk
                var newInMemoryIx = new InMemoryIndex();
                var oldInMemoryIx = Interlocked.Exchange(ref _inMemoryIx, newInMemoryIx);
                Interlocked.Exchange(ref _memoryPressureScore, 0);

                if (oldInMemoryIx.IsEmpty)
                {
                    // nothing to commit
                    return;
                }

                /*
                 * Index file structure:
                 *
                 * <file offset (in bytes) of footer (long)>
                 * <main part>
                 * <footer>
                 *
                 * Footer includes sqrt(N) sorted terms with their position in the main part and allows for faster seeks in O(2*sqrt(N))
                 */

                /*
                 * <main part> structure:
                 * <file offset (in bytes) of term 2> <term 1 length in bytes> <term 1 UTF-8 bytes> <number of items in the posting list 1> <item 1 of the posting list 1> ... <item N of the posting list 1>
                 * <file offset (in bytes) of term 3> <term 2 length in bytes> <term 2 UTF-8 bytes> <number of items in the posting list 2> <item 2 of the posting list 2> ... <item N of the posting list 2>
                 * ...
                 * (this goes on for all terms in the index)
                 */

                /*
                 * <footer> structure:
                 * (N - total number of terms in the index file, K = sqrt(N))
                 *
                 * <term 1 length in bytes> <term 1 UTF-8 bytes> <term 1 position in main index part>
                 * <term K length in bytes> <term K UTF-8 bytes> <term K position in main index part>
                 * <term 2K length in bytes> <term 2K UTF-8 bytes> <term 2K position in main index part>
                 * ...
                 * <last term length in bytes> <last term UTF-8 bytes> <last term position in main index part>
                 *
                 * (note that 1st and last terms are always included)
                 */

                var footeringFactor = (int)Math.Sqrt(oldInMemoryIx.Count);
                var footerItems     = new List <KeyValuePair <string, long> >(footeringFactor + 1);

                var i = 0;
                using (var newIxFile = OpenNextIndexFileForWrite())
                {
                    newIxFile.Position += sizeof(long);

                    foreach (var termWithPostingList in oldInMemoryIx.OrderBy(x => x.Key))
                    {
                        var isLastItem = i == oldInMemoryIx.Count - 1;
                        if (i % footeringFactor == 0 || isLastItem)
                        {
                            footerItems.Add(new KeyValuePair <string, long>(termWithPostingList.Key, newIxFile.Position));
                        }
                        await WriteTermAndPostingList(newIxFile, termWithPostingList.Key, termWithPostingList.Value);

                        ++i;
                    }

                    var footerStartPosition = newIxFile.Position;
                    await WriteIndexFooter(newIxFile, footerItems);

                    newIxFile.Position = 0;
                    await newIxFile.WriteLongAsync(footerStartPosition);

                    await newIxFile.FlushAsync();
                }
            }
        }