Esempio n. 1
0
            /// <summary>Estimate the number of keys in the specified range.</summary>
            /// <param name="db">Database used for the operation</param>
            /// <param name="beginInclusive">Key defining the beginning of the range</param>
            /// <param name="endExclusive">Key defining the end of the range</param>
            /// <param name="onProgress">Optional callback called everytime the count is updated. The first argument is the current count, and the second argument is the last key that was found.</param>
            /// <param name="cancellationToken">Token used to cancel the operation</param>
            /// <returns>Number of keys k such that <paramref name="beginInclusive"/> &lt;= k &gt; <paramref name="endExclusive"/></returns>
            /// <remarks>If the range contains a large of number keys, the operation may need more than one transaction to complete, meaning that the number will not be transactionally accurate.</remarks>
            public static async Task <long> EstimateCountAsync([NotNull] IFdbDatabase db, Slice beginInclusive, Slice endExclusive, IProgress <FdbTuple <long, Slice> > onProgress, CancellationToken cancellationToken)
            {
                const int INIT_WINDOW_SIZE = 1 << 8;            // start at 256 //1024
                const int MAX_WINDOW_SIZE  = 1 << 13;           // never use more than 4096
                const int MIN_WINDOW_SIZE  = 64;                // use range reads when the windows size is smaller than 64

                if (db == null)
                {
                    throw new ArgumentNullException("db");
                }
                if (endExclusive < beginInclusive)
                {
                    throw new ArgumentException("The end key cannot be less than the begin key", "endExclusive");
                }

                cancellationToken.ThrowIfCancellationRequested();

                // To count the number of items in the range, we will scan it using a key selector with an offset equal to our window size
                // > if the returned key is still inside the range, we add the window size to the counter, and start again from the current key
                // > if the returned key is outside the range, we reduce the size of the window, and start again from the previous key
                // > if the returned key is exactly equal to the end of range, OR if the window size was 1, then we stop

                // Since we don't know in advance if the range contains 1 key or 1 Billion keys, choosing a good value for the window size is critical:
                // > if it is too small and the range is very large, we will need too many sequential reads and the network latency will quickly add up
                // > if it is too large and the range is small, we will spend too many times halving the window size until we get the correct value

                // A few optimizations are possible:
                // > we could start with a small window size, and then double its size on every full segment (up to a maximum)
                // > for the last segment, we don't need to wait for a GetKey to complete before issuing the next, so we could split the segment into 4 (or more), do the GetKeyAsync() in parallel, detect the quarter that cross the boundary, and iterate again until the size is small
                // > once the window size is small enough, we can switch to using GetRange to read the last segment in one shot, instead of iterating with window size 16, 8, 4, 2 and 1 (the wost case being 2^N - 1 items remaning)

                // note: we make a copy of the keys because the operation could take a long time and the key's could prevent a potentially large underlying buffer from being GCed
                var cursor = beginInclusive.Memoize();
                var end    = endExclusive.Memoize();

                using (var tr = db.BeginReadOnlyTransaction(cancellationToken))
                {
#if TRACE_COUNTING
                    tr.Annotate("Estimating number of keys in range {0}", FdbKeyRange.Create(beginInclusive, endExclusive));
#endif

                    tr.SetOption(FdbTransactionOption.ReadYourWritesDisable);

                    // start looking for the first key in the range
                    cursor = await tr.Snapshot.GetKeyAsync(FdbKeySelector.FirstGreaterOrEqual(cursor)).ConfigureAwait(false);

                    if (cursor >= end)
                    {                     // the range is empty !
                        return(0);
                    }

                    // we already have seen one key, so add it to the count
#if TRACE_COUNTING
                    int iter = 1;
#endif
                    long counter = 1;
                    // start with a medium-sized window
                    int  windowSize = INIT_WINDOW_SIZE;
                    bool last       = false;

                    while (cursor < end)
                    {
                        Contract.Assert(windowSize > 0);

                        var          selector = FdbKeySelector.FirstGreaterOrEqual(cursor) + windowSize;
                        Slice        next     = Slice.Nil;
                        FdbException error    = null;
                        try
                        {
                            next = await tr.Snapshot.GetKeyAsync(selector).ConfigureAwait(false);

#if TRACE_COUNTING
                            ++iter;
#endif
                        }
                        catch (FdbException e)
                        {
                            error = e;
                        }

                        if (error != null)
                        {
                            // => from this point, the count returned will not be transactionally accurate
                            if (error.Code == FdbError.PastVersion)
                            {                             // the transaction used up its time window
                                tr.Reset();
                            }
                            else
                            {                             // check to see if we can continue...
                                await tr.OnErrorAsync(error.Code).ConfigureAwait(false);
                            }
                            // retry
                            tr.SetOption(FdbTransactionOption.ReadYourWritesDisable);
                            continue;
                        }

                        //BUGBUG: GetKey(...) always truncate the result to \xFF if the selected key would be past the end,
                        // so we need to fall back immediately to the binary search and/or get_range if next == \xFF

                        if (next > end)
                        {                         // we have reached past the end, switch to binary search
                            last = true;

                            // if window size is already 1, then we have counted everything (the range.End key does not exist in the db)
                            if (windowSize == 1)
                            {
                                break;
                            }

                            if (windowSize <= MIN_WINDOW_SIZE)
                            {                             // The window is small enough to switch to reading for counting (will be faster than binary search)
#if TRACE_COUNTING
                                tr.Annotate("Switch to reading all items (window size = {0})", windowSize);
#endif

                                // Count the keys by reading them. Also, we know that there can not be more than windowSize - 1 remaining
                                int n = await tr.Snapshot
                                        .GetRange(
                                    FdbKeySelector.FirstGreaterThan(cursor),                                             // cursor has already been counted once
                                    FdbKeySelector.FirstGreaterOrEqual(end),
                                    new FdbRangeOptions()
                                {
                                    Limit = windowSize - 1
                                }
                                    )
                                        .CountAsync()
                                        .ConfigureAwait(false);

                                counter += n;
                                if (onProgress != null)
                                {
                                    onProgress.Report(FdbTuple.Create(counter, end));
                                }
#if TRACE_COUNTING
                                ++iter;
#endif
                                break;
                            }

                            windowSize >>= 1;
                            continue;
                        }

                        // the range is not finished, advance the cursor
                        counter += windowSize;
                        cursor   = next;
                        if (onProgress != null)
                        {
                            onProgress.Report(FdbTuple.Create(counter, cursor));
                        }

                        if (!last)
                        {                         // double the size of the window if we are not in the last segment
                            windowSize = Math.Min(windowSize << 1, MAX_WINDOW_SIZE);
                        }
                    }
#if TRACE_COUNTING
                    tr.Annotate("Found {0} keys in {1} iterations", counter, iter);
#endif
                    return(counter);
                }
            }
Esempio n. 2
0
            private static async Task <List <Slice> > GetBoundaryKeysInternalAsync([NotNull] IFdbReadOnlyTransaction trans, Slice begin, Slice end)
            {
                Contract.Requires(trans != null && end >= begin);

#if TRACE_COUNTING
                trans.Annotate("Get boundary keys in range {0}", FdbKeyRange.Create(begin, end));
#endif

                trans.WithReadAccessToSystemKeys();

                var results    = new List <Slice>();
                int iterations = 0;
                var options    = new FdbRangeOptions {
                    Mode = FdbStreamingMode.WantAll
                };
                while (begin < end)
                {
                    FdbException error     = null;
                    Slice        lastBegin = begin;
                    try
                    {
                        var chunk = await trans.Snapshot.GetRangeAsync(KeyServers + begin, KeyServers + end, options, iterations).ConfigureAwait(false);

                        ++iterations;
                        if (chunk.Count > 0)
                        {
                            foreach (var kvp in chunk.Chunk)
                            {
                                results.Add(kvp.Key.Substring(KeyServers.Count));
                            }
                            begin = chunk.Last.Key.Substring(KeyServers.Count) + (byte)0;
                        }
                        if (!chunk.HasMore)
                        {
                            begin = end;
                        }
                    }
                    catch (FdbException e)
                    {
                        error = e;
                    }

                    if (error != null)
                    {
                        if (error.Code == FdbError.PastVersion && begin != lastBegin)
                        {                         // if we get a PastVersion and *something* has happened, then we are no longer transactionnal
                            trans.Reset();
                        }
                        else
                        {
                            await trans.OnErrorAsync(error.Code).ConfigureAwait(false);
                        }
                        iterations = 0;
                        trans.WithReadAccessToSystemKeys();
                    }
                }

#if TRACE_COUNTING
                if (results.Count == 0)
                {
                    trans.Annotate("There is no chunk boundary in range {0}", FdbKeyRange.Create(begin, end));
                }
                else
                {
                    trans.Annotate("Found {0} boundaries in {1} iteration(s)", results.Count, iterations);
                }
#endif

                return(results);
            }