/// <inheritdoc /> public GarbageCollectResult GarbageCollect(Func <byte[], byte[], bool> canCollect, string columnFamilyName = null, CancellationToken cancellationToken = default, byte[] startValue = null) { var gcResult = new GarbageCollectResult(); // The implementation below ignores batching and removes keys one by one gcResult.BatchSize = 1; var columnFamilyInfo = GetColumnFamilyInfo(columnFamilyName); var columnFamilyHandleToUse = columnFamilyInfo.Handle; using (var iterator = m_store.NewIterator(columnFamilyHandleToUse)) { if (startValue != null) { iterator.Seek(startValue); } else { iterator.SeekToFirst(); } while (iterator.Valid() && !cancellationToken.IsCancellationRequested) { var startTime = TimestampUtilities.Timestamp; gcResult.TotalCount++; var bytesKey = iterator.Key(); var bytesValue = iterator.Value(); gcResult.LastKey = bytesKey; if (canCollect(bytesKey, bytesValue)) { Remove(bytesKey, columnFamilyName); gcResult.RemovedCount++; } iterator.Next(); var duration = TimestampUtilities.Timestamp - startTime; if (duration > gcResult.MaxBatchEvictionTime) { gcResult.MaxBatchEvictionTime = duration; } } } return(gcResult); }
/// <inheritdoc /> public GarbageCollectResult GarbageCollectByKeyValue( Func <Iterator, bool> canCollect, string columnFamilyName = null, IEnumerable <string> additionalColumnFamilies = null, CancellationToken cancellationToken = default, byte[] startValue = null) { var gcStats = new GarbageCollectResult { BatchSize = GarbageCollectionBatchSize }; var columnFamilyInfo = GetColumnFamilyInfo(columnFamilyName); var columnFamilyHandleToUse = columnFamilyInfo.Handle; // According to RocksDB documentation, an iterator always loads both the key // and the value. To avoid this, when possible, we use the key-tracked column with just keys // and empty values and use that for eviction to avoid the load cost of full content. if (columnFamilyInfo.UseKeyTracking) { columnFamilyHandleToUse = columnFamilyInfo.KeyHandle; } var keysToRemove = new List <byte[]>(); var primaryColumn = new string[] { columnFamilyName }; var columnsToUse = additionalColumnFamilies == null ? primaryColumn : additionalColumnFamilies.Concat(primaryColumn); using (Iterator iterator = m_store.NewIterator(columnFamilyHandleToUse, m_readOptions)) { if (startValue != null) { iterator.Seek(startValue); } else { iterator.SeekToFirst(); } bool reachedEnd = !iterator.Valid(); while (!reachedEnd && !cancellationToken.IsCancellationRequested) { gcStats.TotalCount++; bool canCollectResult = canCollect(iterator); if (canCollectResult) { var bytesKey = iterator.Key(); keysToRemove.Add(bytesKey); } iterator.Next(); reachedEnd = !iterator.Valid(); if (keysToRemove.Count == GarbageCollectionBatchSize || (reachedEnd && keysToRemove.Count > 0)) { var startTime = TimestampUtilities.Timestamp; // Remove the key across all specified columns RemoveBatch(keysToRemove, columnFamilyNames: columnsToUse); var duration = TimestampUtilities.Timestamp - startTime; if (duration > gcStats.MaxBatchEvictionTime) { gcStats.MaxBatchEvictionTime = duration; } gcStats.LastKey = keysToRemove.Last(); gcStats.RemovedCount += keysToRemove.Count; keysToRemove.Clear(); } } } gcStats.Canceled = cancellationToken.IsCancellationRequested; return(gcStats); }