Exemplo n.º 1
0
            /// <summary>
            /// Provides access to and/or creates a RocksDb persistent key-value store.
            /// </summary>
            public RocksDbStore(RocksDbStoreArguments arguments)
            {
                m_storeDirectory = arguments.StoreDirectory;
                m_openBulkLoad   = arguments.OpenBulkLoad;

                m_defaults.DbOptions = new DbOptions()
                                       .SetCreateIfMissing(true)
                                       .SetCreateMissingColumnFamilies(true)
                                       // The background compaction threads run in low priority, so they should not hamper the rest of
                                       // the system. The number of cores in the system is what we want here according to official docs,
                                       // and we are setting this to the number of logical processors, which may be higher.
                                       // See: https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide#parallelism-options
#if !PLATFORM_OSX
                                       .SetMaxBackgroundCompactions(Environment.ProcessorCount)
                                       .SetMaxBackgroundFlushes(1)
#else
                                       // The memtable uses significant chunks of available system memory on macOS, we increase the number
                                       // of background flushing threads (low priority) and set the DB write buffer size. This allows for
                                       // up to 128 MB in memtables across all column families before we flush to disk.
                                       .SetMaxBackgroundCompactions(Environment.ProcessorCount / 4)
                                       .SetMaxBackgroundFlushes(Environment.ProcessorCount / 4)
                                       .SetDbWriteBufferSize(128 << 20)
#endif
                                       .IncreaseParallelism(Environment.ProcessorCount / 2);

                if (arguments.EnableStatistics)
                {
                    m_defaults.DbOptions.EnableStatistics();
                }

                if (arguments.OpenBulkLoad)
                {
                    m_defaults.DbOptions.PrepareForBulkLoad();
                }

                // Maximum number of information log files
                if (arguments.RotateLogsNumFiles != null)
                {
                    m_defaults.DbOptions.SetKeepLogFileNum(arguments.RotateLogsNumFiles.Value);
                }

                // Do not rotate information logs based on file size
                if (arguments.RotateLogsMaxFileSizeBytes != null)
                {
                    m_defaults.DbOptions.SetMaxLogFileSize(arguments.RotateLogsMaxFileSizeBytes.Value);
                }

                // How long before we rotate the current information log file
                if (arguments.RotateLogsMaxAge != null)
                {
                    m_defaults.DbOptions.SetLogFileTimeToRoll((ulong)arguments.RotateLogsMaxAge.Value.Seconds);
                }

                if (arguments.FastOpen)
                {
                    // max_file_opening_threads is defaulted to 16, so no need to update here.
                    RocksDbSharp.Native.Instance.rocksdb_options_set_skip_stats_update_on_db_open(m_defaults.DbOptions.Handle, true);
                }

                if (arguments.DisableAutomaticCompactions)
                {
                    m_defaults.DbOptions.SetDisableAutoCompactions(1);
                }

                // A small comment on things tested that did not work:
                //  * SetAllowMmapReads(true) and SetAllowMmapWrites(true) produce a dramatic performance drop
                //  * SetUseDirectReads(true) disables the OS cache, and although that's good for random point lookups,
                //    it produces a dramatic performance drop otherwise.

                m_defaults.WriteOptions = new WriteOptions()
                                          // Disable the write ahead log to reduce disk IO. The write ahead log
                                          // is used to recover the store on crashes, so a crash will lose some writes.
                                          // Writes will be made in-memory only until the write buffer size
                                          // is reached and then they will be flushed to storage files.
                                          .DisableWal(1)
                                          // This option is off by default, but just making sure that the C# wrapper
                                          // doesn't change anything. The idea is that the DB won't wait for fsync to
                                          // return before acknowledging the write as successful. This affects
                                          // correctness, because a write may be ACKd before it is actually on disk,
                                          // but it is much faster.
                                          .SetSync(false);


                var blockBasedTableOptions = new BlockBasedTableOptions()
                                             // Use a bloom filter to help reduce read amplification on point lookups. 10 bits per key yields a
                                             // ~1% false positive rate as per the RocksDB documentation. This builds one filter per SST, which
                                             // means its optimized for not having a key.
                                             .SetFilterPolicy(BloomFilterPolicy.Create(10, false))
                                             // Use a hash index in SST files to speed up point lookup.
                                             .SetIndexType(BlockBasedTableIndexType.HashSearch)
                                             // Whether to use the whole key or a prefix of it (obtained through the prefix extractor below).
                                             // Since the prefix extractor is a no-op, better performance is achieved by turning this off (i.e.
                                             // setting it to true).
                                             .SetWholeKeyFiltering(true);

                m_defaults.ColumnFamilyOptions = new ColumnFamilyOptions()
#if PLATFORM_OSX
                                                 // As advised by the official documentation, LZ4 is the preferred compression algorithm, our RocksDB
                                                 // dynamic library has been compiled to support this on macOS. Fallback to Snappy on other systems (default).
                                                 .SetCompression(CompressionTypeEnum.rocksdb_lz4_compression)
#endif
                                                 .SetBlockBasedTableFactory(blockBasedTableOptions)
                                                 .SetPrefixExtractor(SliceTransform.CreateNoOp());

                m_columns = new Dictionary <string, ColumnFamilyInfo>();

                // The columns that exist in the store on disk may not be in sync with the columns being passed into the constructor
                HashSet <string> existingColumns;
                try
                {
                    existingColumns = new HashSet <string>(RocksDb.ListColumnFamilies(m_defaults.DbOptions, m_storeDirectory));
                }
                catch (RocksDbException)
                {
                    // If there is no existing store, an exception will be thrown, ignore it
                    existingColumns = new HashSet <string>();
                }

                // In read-only mode, open all existing columns in the store without attempting to validate it against the expected column families
                if (arguments.ReadOnly)
                {
                    var columnFamilies = new ColumnFamilies();
                    foreach (var name in existingColumns)
                    {
                        columnFamilies.Add(name, m_defaults.ColumnFamilyOptions);
                    }

                    m_store = RocksDb.OpenReadOnly(m_defaults.DbOptions, m_storeDirectory, columnFamilies, errIfLogFileExists: false);
                }
                else
                {
                    // For read-write mode, column families may be added, so set up column families schema
                    var additionalColumns = arguments.AdditionalColumns ?? CollectionUtilities.EmptyArray <string>();
                    var columnsSchema     = new HashSet <string>(additionalColumns);

                    // Default column
                    columnsSchema.Add(ColumnFamilies.DefaultName);

                    // For key-tracked column familiies, create two columns:
                    // 1: Normal column of { key : value }
                    // 2: Key-tracking column of { key : empty-value }
                    if (arguments.DefaultColumnKeyTracked)
                    {
                        // To be robust to the RocksDB-selected default column name changing,
                        // just name the default column's key-tracking column KeyColumnSuffix
                        columnsSchema.Add(KeyColumnSuffix);
                    }

                    var additionalKeyTrackedColumns = arguments.AdditionalKeyTrackedColumns ?? CollectionUtilities.EmptyArray <string>();
                    foreach (var name in additionalKeyTrackedColumns)
                    {
                        columnsSchema.Add(name);
                        columnsSchema.Add(name + KeyColumnSuffix);
                    }

                    // Figure out which columns are not part of the schema
                    var outsideSchemaColumns = new List <string>(existingColumns.Except(columnsSchema));

                    // RocksDB requires all columns in the store to be opened in read-write mode, so merge existing columns
                    // with the columns schema that was passed into the constructor
                    existingColumns.UnionWith(columnsSchema);

                    var columnFamilies = new ColumnFamilies();
                    foreach (var name in existingColumns)
                    {
                        columnFamilies.Add(name, m_defaults.ColumnFamilyOptions);
                    }

                    m_store = RocksDb.Open(m_defaults.DbOptions, m_storeDirectory, columnFamilies);

                    // Provide an opportunity to update the store to the new column family schema
                    if (arguments.DropMismatchingColumns)
                    {
                        foreach (var name in outsideSchemaColumns)
                        {
                            m_store.DropColumnFamily(name);
                            existingColumns.Remove(name);
                        }
                    }
                }

                var userFacingColumns = existingColumns.Where(name => !name.EndsWith(KeyColumnSuffix));

                foreach (var name in userFacingColumns)
                {
                    var isKeyTracked = existingColumns.Contains(name + KeyColumnSuffix);
                    m_columns.Add(name, new ColumnFamilyInfo()
                    {
                        Handle         = m_store.GetColumnFamily(name),
                        UseKeyTracking = isKeyTracked,
                        KeyHandle      = isKeyTracked ? m_store.GetColumnFamily(name + KeyColumnSuffix) : null,
                    });
                }

                m_columns.TryGetValue(ColumnFamilies.DefaultName, out m_defaultColumnFamilyInfo);
            }
Exemplo n.º 2
0
            /// <summary>
            /// Provides access to and/or creates a RocksDb persistent key-value store.
            /// </summary>
            /// <param name="storeDirectory">
            /// The directory containing the key-value store.
            /// </param>
            /// <param name="defaultColumnKeyTracked">
            /// Whether the default column should be key-tracked.
            /// This will create two columns for the same data,
            /// one with just keys and the other with key and value.
            /// </param>
            /// <param name="additionalColumns">
            /// The names of any additional column families in the key-value store.
            /// If no additional column families are provided, all entries will be stored
            /// in the default column.
            /// Column families are analogous to tables in relational databases.
            /// </param>
            /// <param name="additionalKeyTrackedColumns">
            /// The names of any additional column families in the key-value store that
            /// should also be key-tracked. This will create two columns for the same data,
            /// one with just keys and the other with key and value.
            /// Column families are analogous to tables in relational databases.
            /// </param>
            /// <param name="readOnly">
            /// Whether the database should be opened read-only. This prevents modifications and
            /// creating unnecessary metadata files related to write sessions.
            /// </param>
            /// <param name="dropMismatchingColumns">
            /// If a store already exists at the given directory, whether any columns that mismatch the the columns that were passed into the constructor
            /// should be dropped. This will cause data loss and can only be applied in read-write mode.
            /// </param>
            /// <param name="rotateLogs">
            /// Have RocksDb rotate logs, useful for debugging performance issues. It will rotate logs every 12 hours,
            /// up to a maximum of 60 logs (i.e. 30 days). When the maximum amount of logs is reached, the oldest logs
            /// are overwritten in a circular fashion.
            ///
            /// Every time the RocksDb instance is open, the current log file is truncated, which means that if you
            /// open the DB more than once in a 12 hour period, you will only have partial information.
            /// </param>
            public RocksDbStore(
                string storeDirectory,
                bool defaultColumnKeyTracked                     = false,
                IEnumerable <string> additionalColumns           = null,
                IEnumerable <string> additionalKeyTrackedColumns = null,
                bool readOnly = false,
                bool dropMismatchingColumns = false,
                bool rotateLogs             = false)
            {
                m_storeDirectory = storeDirectory;

                m_defaults.DbOptions = new DbOptions()
                                       .SetCreateIfMissing(true)
                                       .SetCreateMissingColumnFamilies(true)
                                       // The background compaction threads run in low priority, so they should not hamper the rest of
                                       // the system. The number of cores in the system is what we want here according to official docs,
                                       // and we are setting this to the number of logical processors, which may be higher.
                                       .SetMaxBackgroundCompactions(Environment.ProcessorCount)
                                       .SetMaxBackgroundFlushes(1)
                                       .IncreaseParallelism(Environment.ProcessorCount / 2)
                                       // Ensure we have performance statistics for profiling
                                       .EnableStatistics();

                // A small comment on things tested that did not work:
                //  * SetAllowMmapReads(true) and SetAllowMmapWrites(true) produce a dramatic performance drop
                //  * SetUseDirectReads(true) disables the OS cache, and although that's good for random point lookups,
                //    it produces a dramatic performance drop otherwise.

                m_defaults.WriteOptions = new WriteOptions()
                                          // Disable the write ahead log to reduce disk IO. The write ahead log
                                          // is used to recover the store on crashes, so a crash will lose some writes.
                                          // Writes will be made in-memory only until the write buffer size
                                          // is reached and then they will be flushed to storage files.
                                          .DisableWal(1)
                                          // This option is off by default, but just making sure that the C# wrapper
                                          // doesn't change anything. The idea is that the DB won't wait for fsync to
                                          // return before acknowledging the write as successful. This affects
                                          // correctness, because a write may be ACKd before it is actually on disk,
                                          // but it is much faster.
                                          .SetSync(false);


                var blockBasedTableOptions = new BlockBasedTableOptions()
                                             // Use a bloom filter to help reduce read amplification on point lookups. 10 bits per key yields a
                                             // ~1% false positive rate as per the RocksDB documentation. This builds one filter per SST, which
                                             // means its optimized for not having a key.
                                             .SetFilterPolicy(BloomFilterPolicy.Create(10, false))
                                             // Use a hash index in SST files to speed up point lookup.
                                             .SetIndexType(BlockBasedTableIndexType.HashSearch)
                                             // Whether to use the whole key or a prefix of it (obtained through the prefix extractor below).
                                             // Since the prefix extractor is a no-op, better performance is achieved by turning this off (i.e.
                                             // setting it to true).
                                             .SetWholeKeyFiltering(true);

                m_defaults.ColumnFamilyOptions = new ColumnFamilyOptions()
                                                 .SetBlockBasedTableFactory(blockBasedTableOptions)
                                                 .SetPrefixExtractor(SliceTransform.CreateNoOp());

                if (rotateLogs)
                {
                    // Maximum number of information log files
                    m_defaults.DbOptions.SetKeepLogFileNum(60);

                    // Do not rotate information logs based on file size
                    m_defaults.DbOptions.SetMaxLogFileSize(0);

                    // How long before we rotate the current information log file
                    m_defaults.DbOptions.SetLogFileTimeToRoll((ulong)TimeSpan.FromHours(12).Seconds);
                }

                m_columns = new Dictionary <string, ColumnFamilyInfo>();

                additionalColumns           = additionalColumns ?? CollectionUtilities.EmptyArray <string>();
                additionalKeyTrackedColumns = additionalKeyTrackedColumns ?? CollectionUtilities.EmptyArray <string>();

                // The columns that exist in the store on disk may not be in sync with the columns being passed into the constructor
                HashSet <string> existingColumns;

                try
                {
                    existingColumns = new HashSet <string>(RocksDb.ListColumnFamilies(m_defaults.DbOptions, m_storeDirectory));
                }
                catch (RocksDbException)
                {
                    // If there is no existing store, an exception will be thrown, ignore it
                    existingColumns = new HashSet <string>();
                }

                // In read-only mode, open all existing columns in the store without attempting to validate it against the expected column families
                if (readOnly)
                {
                    var columnFamilies = new ColumnFamilies();
                    foreach (var name in existingColumns)
                    {
                        columnFamilies.Add(name, m_defaults.ColumnFamilyOptions);
                    }

                    m_store = RocksDb.OpenReadOnly(m_defaults.DbOptions, m_storeDirectory, columnFamilies, errIfLogFileExists: false);
                }
                else
                {
                    // For read-write mode, column families may be added, so set up column families schema
                    var columnsSchema = new HashSet <string>(additionalColumns);

                    // Default column
                    columnsSchema.Add(ColumnFamilies.DefaultName);

                    // For key-tracked column familiies, create two columns:
                    // 1: Normal column of { key : value }
                    // 2: Key-tracking column of { key : empty-value }
                    if (defaultColumnKeyTracked)
                    {
                        // To be robust to the RocksDB-selected default column name changing,
                        // just name the default column's key-tracking column KeyColumnSuffix
                        columnsSchema.Add(KeyColumnSuffix);
                    }

                    foreach (var name in additionalKeyTrackedColumns)
                    {
                        columnsSchema.Add(name);
                        columnsSchema.Add(name + KeyColumnSuffix);
                    }

                    // Figure out which columns are not part of the schema
                    var outsideSchemaColumns = new List <string>(existingColumns.Except(columnsSchema));

                    // RocksDB requires all columns in the store to be opened in read-write mode, so merge existing columns
                    // with the columns schema that was passed into the constructor
                    existingColumns.UnionWith(columnsSchema);

                    var columnFamilies = new ColumnFamilies();
                    foreach (var name in existingColumns)
                    {
                        columnFamilies.Add(name, m_defaults.ColumnFamilyOptions);
                    }

                    m_store = RocksDb.Open(m_defaults.DbOptions, m_storeDirectory, columnFamilies);

                    // Provide an opportunity to update the store to the new column family schema
                    if (dropMismatchingColumns)
                    {
                        foreach (var name in outsideSchemaColumns)
                        {
                            m_store.DropColumnFamily(name);
                            existingColumns.Remove(name);
                        }
                    }
                }

                var userFacingColumns = existingColumns.Where(name => !name.EndsWith(KeyColumnSuffix));

                foreach (var name in userFacingColumns)
                {
                    var isKeyTracked = existingColumns.Contains(name + KeyColumnSuffix);
                    m_columns.Add(name, new ColumnFamilyInfo()
                    {
                        Handle         = m_store.GetColumnFamily(name),
                        UseKeyTracking = isKeyTracked,
                        KeyHandle      = isKeyTracked ? m_store.GetColumnFamily(name + KeyColumnSuffix) : null,
                    });
                }

                m_columns.TryGetValue(ColumnFamilies.DefaultName, out m_defaultColumnFamilyInfo);
            }
Exemplo n.º 3
0
        public void Begin()
        {
            Random rd = new Random();

            config = new StreamConfig();
            config.ApplicationId = $"RocksDbOptionsTests";
            config.UseRandomRocksDbConfigForTest();
            config.RocksDbConfigHandler = (name, options) =>
            {
                options
                .EnableStatistics()
                .IncreaseParallelism(parallelism)
                .OptimizeForPointLookup(blockCacheSizeMb)
                .OptimizeLevelStyleCompaction(memTableMemoryBudget)
                .OptimizeUniversalStyleCompaction(memtableMemoryBudget2)
                .PrepareForBulkLoad()
                .SetAccessHintOnCompactionStart(accessHintOnCompactionStart)
                .SetAdviseRandomOnOpen(adviseRandomOnOpen)
                .SetAllowConcurrentMemtableWrite(allowConcurrentMemtableWrite)
                .SetAllowMmapReads(false)
                .SetAllowMmapWrites(false)
                .SetArenaBlockSize(arenaBlockSize)
                .SetBaseBackgroundCompactions(baseBackgroundCompactions)
                .SetBloomLocality(bloomLocality)
                .SetBytesPerSync(bytesPerSync)
                // .SetCompactionFilter(IntPtr.Zero)
                // .SetCompactionFilterFactory(IntPtr.Zero)
                .SetCompactionReadaheadSize(1200)
                .SetCompactionStyle(RocksDbSharp.Compaction.Level)
                //.SetComparator(IntPtr.Zero)
                .SetCompression(RocksDbSharp.Compression.Lz4)
                .SetCompressionOptions(1, 2, 3, 4)
                .SetCompressionPerLevel(new[] { RocksDbSharp.Compression.Lz4 }, 1)
                .SetCreateIfMissing()
                .SetCreateMissingColumnFamilies()
                .SetDbLogDir("test")
                .SetDbWriteBufferSize(1000)
                .SetDeleteObsoleteFilesPeriodMicros(50)
                .SetDisableAutoCompactions(1)
                .SetEnableWriteThreadAdaptiveYield(true)
                //.SetEnv(IntPtr.Zero)
                .SetErrorIfExists()
                //.SetFifoCompactionOptions(IntPtr.Zero)
                .SetHardPendingCompactionBytesLimit(1)
                .SetHardRateLimit(40597)
                .SetHashLinkListRep(12)
                .SetHashSkipListRep(56, 4, 2)
                //.SetInfoLog(IntPtr.Zero)
                .SetInfoLogLevel(RocksLogLevel.NUM_INFO_LOG)
                .SetInplaceUpdateNumLocks(134)
                .SetIsFdCloseOnExec(false)
                .SetKeepLogFileNum(1)
                .SetLevel0FileNumCompactionTrigger(14)
                .SetLevel0SlowdownWritesTrigger(144)
                .SetInplaceUpdateSupport(true)
                .SetLevel0StopWritesTrigger(24)
                .SetLevelCompactionDynamicLevelBytes(true)
                .SetLogFileTimeToRoll(154)
                .SetManifestPreallocationSize(153)
                .SetMaxBackgroundFlushes(3)
                .SetMaxBytesForLevelBase(1453)
                .SetMaxBytesForLevelMultiplier(2)
                .SetMaxBytesForLevelMultiplierAdditional(new[] { 1 }, 1)
                .SetMaxCompactionBytes(345678)
                .SetMaxFileOpeningThreads(2)
                .SetMaxLogFileSize(1)
                .SetMaxManifestFileSize(131)
                .SetMaxMemCompactionLevel(12)
                .SetMaxOpenFiles(20)
                .SetMaxSequentialSkipInIterations(13)
                .SetMaxSuccessiveMerges(12)
                .SetMaxTotalWalSize(12)
                .SetMaxWriteBufferNumber(15543)
                .SetMaxWriteBufferNumberToMaintain(126)
                .SetMemtableHugePageSize(64317)
                .SetMemtableHugePageSize(64317)
                .SetMemtablePrefixBloomSizeRatio(12)
                //.SetMergeOperator(IntPtr.Zero)
                .SetMinLevelToCompress(1)
                .SetMemtableVectorRep()
                .SetMinWriteBufferNumberToMerge(1312)
                .SetNumLevels(1)
                .SetOptimizeFiltersForHits(56)
                .SetParanoidChecks()
                .SetSkipLogErrorOnRecovery(true)
                .SetSoftPendingCompactionBytesLimit(134)
                .SetSoftRateLimit(131)
                .SetStatsDumpPeriodSec(532)
                .SetTableCacheNumShardbits(12)
                .SetTableCacheRemoveScanCountLimit(66)
                .SetTargetFileSizeBase(6)
                .SetTargetFileSizeMultiplier(2)
                .SetUint64addMergeOperator()
                //.SetUniversalCompactionOptions(IntPtr.Zero)
                .SetUseAdaptiveMutex(false)
                .SetUseDirectIoForFlushAndCompaction(true)
                .SetUseDirectReads(true)
                .SetUseFsync(1)
                .SetWalRecoveryMode(RocksDbSharp.Recovery.SkipAnyCorruptedRecords)
                .SetWALSizeLimitMB(40)
                .SetWALTtlSeconds(1454151413)
                .SetWriteBufferSize(45678976543)
                .SetPrefixExtractor(SliceTransform.CreateNoOp())
                .SetPurgeRedundantKvsWhileFlush(false)
                .SetRateLimitDelayMaxMilliseconds(1762)
                .SetRecycleLogFileNum(1)
                .SetReportBgIoStats(true)
                .SetPlainTableFactory(1, 23, 4, 2)
                .SetMaxBackgroundCompactions(1);
            };

            id = new TaskId {
                Id = 0, Partition = 0
            };
            partition    = new TopicPartition("source", 0);
            stateManager = new ProcessorStateManager(
                id,
                new List <TopicPartition> {
                partition
            },
                null,
                new MockChangelogRegister(),
                new MockOffsetCheckpointManager());

            task = new Mock <AbstractTask>();
            task.Setup(k => k.Id).Returns(id);

            context = new ProcessorContext(task.Object, config, stateManager, new StreamMetricsRegistry());

            store = new RocksDbKeyValueStore("test-store");
            store.Init(context, store);
        }