Example #1
0
            /// <inheritdoc />
            public GarbageCollectResult GarbageCollect(Func <byte[], byte[], bool> canCollect, string columnFamilyName = null, CancellationToken cancellationToken = default, byte[] startValue = null)
            {
                var gcResult = new GarbageCollectResult();

                // The implementation below ignores batching and removes keys one by one
                gcResult.BatchSize = 1;

                var columnFamilyInfo = GetColumnFamilyInfo(columnFamilyName);

                var columnFamilyHandleToUse = columnFamilyInfo.Handle;

                using (var iterator = m_store.NewIterator(columnFamilyHandleToUse))
                {
                    if (startValue != null)
                    {
                        iterator.Seek(startValue);
                    }
                    else
                    {
                        iterator.SeekToFirst();
                    }

                    while (iterator.Valid() && !cancellationToken.IsCancellationRequested)
                    {
                        var startTime = TimestampUtilities.Timestamp;
                        gcResult.TotalCount++;
                        var bytesKey   = iterator.Key();
                        var bytesValue = iterator.Value();

                        gcResult.LastKey = bytesKey;
                        if (canCollect(bytesKey, bytesValue))
                        {
                            Remove(bytesKey, columnFamilyName);
                            gcResult.RemovedCount++;
                        }

                        iterator.Next();

                        var duration = TimestampUtilities.Timestamp - startTime;
                        if (duration > gcResult.MaxBatchEvictionTime)
                        {
                            gcResult.MaxBatchEvictionTime = duration;
                        }
                    }
                }

                return(gcResult);
            }
Example #2
0
            /// <inheritdoc />
            public GarbageCollectResult GarbageCollectByKeyValue(
                Func <Iterator, bool> canCollect,
                string columnFamilyName = null,
                IEnumerable <string> additionalColumnFamilies = null,
                CancellationToken cancellationToken           = default,
                byte[] startValue = null)
            {
                var gcStats = new GarbageCollectResult
                {
                    BatchSize = GarbageCollectionBatchSize
                };

                var columnFamilyInfo = GetColumnFamilyInfo(columnFamilyName);

                var columnFamilyHandleToUse = columnFamilyInfo.Handle;

                // According to RocksDB documentation, an iterator always loads both the key
                // and the value. To avoid this, when possible, we use the key-tracked column with just keys
                // and empty values and use that for eviction to avoid the load cost of full content.
                if (columnFamilyInfo.UseKeyTracking)
                {
                    columnFamilyHandleToUse = columnFamilyInfo.KeyHandle;
                }

                var keysToRemove  = new List <byte[]>();
                var primaryColumn = new string[] { columnFamilyName };
                var columnsToUse  = additionalColumnFamilies == null ?  primaryColumn : additionalColumnFamilies.Concat(primaryColumn);

                using (Iterator iterator = m_store.NewIterator(columnFamilyHandleToUse, m_readOptions))
                {
                    if (startValue != null)
                    {
                        iterator.Seek(startValue);
                    }
                    else
                    {
                        iterator.SeekToFirst();
                    }

                    bool reachedEnd = !iterator.Valid();
                    while (!reachedEnd && !cancellationToken.IsCancellationRequested)
                    {
                        gcStats.TotalCount++;
                        bool canCollectResult = canCollect(iterator);

                        if (canCollectResult)
                        {
                            var bytesKey = iterator.Key();
                            keysToRemove.Add(bytesKey);
                        }

                        iterator.Next();
                        reachedEnd = !iterator.Valid();

                        if (keysToRemove.Count == GarbageCollectionBatchSize ||
                            (reachedEnd && keysToRemove.Count > 0))
                        {
                            var startTime = TimestampUtilities.Timestamp;
                            // Remove the key across all specified columns
                            RemoveBatch(keysToRemove, columnFamilyNames: columnsToUse);

                            var duration = TimestampUtilities.Timestamp - startTime;

                            if (duration > gcStats.MaxBatchEvictionTime)
                            {
                                gcStats.MaxBatchEvictionTime = duration;
                            }

                            gcStats.LastKey       = keysToRemove.Last();
                            gcStats.RemovedCount += keysToRemove.Count;
                            keysToRemove.Clear();
                        }
                    }
                }

                gcStats.Canceled = cancellationToken.IsCancellationRequested;
                return(gcStats);
            }