/// <summary> /// Opens or creates a versioned key value store and returns a <see cref="KeyValueStoreAccessor"/> to the store. /// </summary> /// <param name="storeArguments"> /// Arguments for the underlying key value store. /// </param> /// <param name="storeVersion"> /// The version of the caller's store. /// </param> /// <param name="failureHandler"> /// Allows for custom exception handling such as context-specific logging. /// </param> /// <param name="onFailureDeleteExistingStoreAndRetry"> /// On failure to open an existing store at the given directory, whether an attempt to delete the existing store should be made /// to create a new one in its place. This will cause data loss of the old store. /// </param> /// <param name="invalidationHandler"> /// <see cref="m_invalidationHandler"/> /// </param> /// <param name="onStoreReset"> /// Callback for when the store gets reset due to <paramref name="onFailureDeleteExistingStoreAndRetry"/> /// </param> public static Possible <KeyValueStoreAccessor> OpenWithVersioning( RocksDbStoreArguments storeArguments, int storeVersion, Action <RocksDbFailureEvent> failureHandler = null, bool onFailureDeleteExistingStoreAndRetry = false, Action <Failure <Exception> > invalidationHandler = null, Action <Failure> onStoreReset = null) { // First attempt var possibleAccessor = OpenInternal( storeArguments, storeVersion, failureHandler, createNewStore: !FileUtilities.DirectoryExistsNoFollow(storeArguments.StoreDirectory), invalidationHandler); if (!possibleAccessor.Succeeded && onFailureDeleteExistingStoreAndRetry && /* Fall-back on deleting the store and creating a new one */ !storeArguments.ReadOnly /* But only if there's write permissions (no point in reading from an empty store) */) { onStoreReset?.Invoke(possibleAccessor.Failure); possibleAccessor = OpenInternal( storeArguments, storeVersion, failureHandler, createNewStore: true, invalidationHandler); } return(possibleAccessor); }
/// <summary> /// Opens or creates a unversioned key value store and returns a <see cref="KeyValueStoreAccessor"/> to the store. /// </summary> /// <param name="storeArguments"> /// Arguments for the underlying key value store. /// </param> /// <param name="failureHandler"> /// Allows for custom exception handling such as context-specific logging. /// </param> /// <param name="onFailureDeleteExistingStoreAndRetry"> /// On failure to open an existing store at the given directory, whether an attempt to delete the existing store should be made /// to create a new one in its place. This will cause data loss of the old store. /// </param> /// <param name="invalidationHandler"> /// <see cref="m_invalidationHandler"/> /// </param> /// <param name="onStoreReset"> /// Callback for when the store gets reset due to <paramref name="onFailureDeleteExistingStoreAndRetry"/> /// </param> public static Possible <KeyValueStoreAccessor> Open( RocksDbStoreArguments storeArguments, Action <RocksDbFailureEvent> failureHandler = null, bool onFailureDeleteExistingStoreAndRetry = false, Action <Failure <Exception> > invalidationHandler = null, Action <Failure> onStoreReset = null) { return(OpenWithVersioning( storeArguments, VersionConstants.UnversionedStore, failureHandler, onFailureDeleteExistingStoreAndRetry, invalidationHandler, onStoreReset)); }
private KeyValueStoreAccessor( RocksDbStoreArguments storeArguments, int storeVersion, Action <RocksDbFailureEvent> failureHandler, bool createdNewStore, Action <Failure <Exception> > invalidationHandler) { Contract.Assert(storeVersion != VersionConstants.InvalidStore, "No store should pass the invalid store version since it is not safe to open an invalid store."); StoreDirectory = storeArguments.StoreDirectory; ReadOnly = storeArguments.ReadOnly; StoreVersion = storeVersion; CreatedNewStore = createdNewStore; m_store = new RocksDbStore(storeArguments); m_failureHandler = failureHandler; m_invalidationHandler = invalidationHandler; }
/// <summary> /// Provides access to and/or creates a RocksDb persistent key-value store. /// </summary> public RocksDbStore(RocksDbStoreArguments arguments) { m_storeDirectory = arguments.StoreDirectory; m_openBulkLoad = arguments.OpenBulkLoad; m_defaults.DbOptions = new DbOptions() .SetCreateIfMissing(true) .SetCreateMissingColumnFamilies(true) // The background compaction threads run in low priority, so they should not hamper the rest of // the system. The number of cores in the system is what we want here according to official docs, // and we are setting this to the number of logical processors, which may be higher. // See: https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide#parallelism-options #if !PLATFORM_OSX .SetMaxBackgroundCompactions(Environment.ProcessorCount) .SetMaxBackgroundFlushes(1) #else // The memtable uses significant chunks of available system memory on macOS, we increase the number // of background flushing threads (low priority) and set the DB write buffer size. This allows for // up to 128 MB in memtables across all column families before we flush to disk. .SetMaxBackgroundCompactions(Environment.ProcessorCount / 4) .SetMaxBackgroundFlushes(Environment.ProcessorCount / 4) .SetDbWriteBufferSize(128 << 20) #endif .IncreaseParallelism(Environment.ProcessorCount / 2); if (arguments.EnableStatistics) { m_defaults.DbOptions.EnableStatistics(); } if (arguments.OpenBulkLoad) { m_defaults.DbOptions.PrepareForBulkLoad(); } // Maximum number of information log files if (arguments.RotateLogsNumFiles != null) { m_defaults.DbOptions.SetKeepLogFileNum(arguments.RotateLogsNumFiles.Value); } // Do not rotate information logs based on file size if (arguments.RotateLogsMaxFileSizeBytes != null) { m_defaults.DbOptions.SetMaxLogFileSize(arguments.RotateLogsMaxFileSizeBytes.Value); } // How long before we rotate the current information log file if (arguments.RotateLogsMaxAge != null) { m_defaults.DbOptions.SetLogFileTimeToRoll((ulong)arguments.RotateLogsMaxAge.Value.Seconds); } if (arguments.FastOpen) { // max_file_opening_threads is defaulted to 16, so no need to update here. RocksDbSharp.Native.Instance.rocksdb_options_set_skip_stats_update_on_db_open(m_defaults.DbOptions.Handle, true); } if (arguments.DisableAutomaticCompactions) { m_defaults.DbOptions.SetDisableAutoCompactions(1); } // A small comment on things tested that did not work: // * SetAllowMmapReads(true) and SetAllowMmapWrites(true) produce a dramatic performance drop // * SetUseDirectReads(true) disables the OS cache, and although that's good for random point lookups, // it produces a dramatic performance drop otherwise. m_defaults.WriteOptions = new WriteOptions() // Disable the write ahead log to reduce disk IO. The write ahead log // is used to recover the store on crashes, so a crash will lose some writes. // Writes will be made in-memory only until the write buffer size // is reached and then they will be flushed to storage files. .DisableWal(1) // This option is off by default, but just making sure that the C# wrapper // doesn't change anything. The idea is that the DB won't wait for fsync to // return before acknowledging the write as successful. This affects // correctness, because a write may be ACKd before it is actually on disk, // but it is much faster. .SetSync(false); var blockBasedTableOptions = new BlockBasedTableOptions() // Use a bloom filter to help reduce read amplification on point lookups. 10 bits per key yields a // ~1% false positive rate as per the RocksDB documentation. This builds one filter per SST, which // means its optimized for not having a key. .SetFilterPolicy(BloomFilterPolicy.Create(10, false)) // Use a hash index in SST files to speed up point lookup. .SetIndexType(BlockBasedTableIndexType.HashSearch) // Whether to use the whole key or a prefix of it (obtained through the prefix extractor below). // Since the prefix extractor is a no-op, better performance is achieved by turning this off (i.e. // setting it to true). .SetWholeKeyFiltering(true); m_defaults.ColumnFamilyOptions = new ColumnFamilyOptions() #if PLATFORM_OSX // As advised by the official documentation, LZ4 is the preferred compression algorithm, our RocksDB // dynamic library has been compiled to support this on macOS. Fallback to Snappy on other systems (default). .SetCompression(CompressionTypeEnum.rocksdb_lz4_compression) #endif .SetBlockBasedTableFactory(blockBasedTableOptions) .SetPrefixExtractor(SliceTransform.CreateNoOp()); m_columns = new Dictionary <string, ColumnFamilyInfo>(); // The columns that exist in the store on disk may not be in sync with the columns being passed into the constructor HashSet <string> existingColumns; try { existingColumns = new HashSet <string>(RocksDb.ListColumnFamilies(m_defaults.DbOptions, m_storeDirectory)); } catch (RocksDbException) { // If there is no existing store, an exception will be thrown, ignore it existingColumns = new HashSet <string>(); } // In read-only mode, open all existing columns in the store without attempting to validate it against the expected column families if (arguments.ReadOnly) { var columnFamilies = new ColumnFamilies(); foreach (var name in existingColumns) { columnFamilies.Add(name, m_defaults.ColumnFamilyOptions); } m_store = RocksDb.OpenReadOnly(m_defaults.DbOptions, m_storeDirectory, columnFamilies, errIfLogFileExists: false); } else { // For read-write mode, column families may be added, so set up column families schema var additionalColumns = arguments.AdditionalColumns ?? CollectionUtilities.EmptyArray <string>(); var columnsSchema = new HashSet <string>(additionalColumns); // Default column columnsSchema.Add(ColumnFamilies.DefaultName); // For key-tracked column familiies, create two columns: // 1: Normal column of { key : value } // 2: Key-tracking column of { key : empty-value } if (arguments.DefaultColumnKeyTracked) { // To be robust to the RocksDB-selected default column name changing, // just name the default column's key-tracking column KeyColumnSuffix columnsSchema.Add(KeyColumnSuffix); } var additionalKeyTrackedColumns = arguments.AdditionalKeyTrackedColumns ?? CollectionUtilities.EmptyArray <string>(); foreach (var name in additionalKeyTrackedColumns) { columnsSchema.Add(name); columnsSchema.Add(name + KeyColumnSuffix); } // Figure out which columns are not part of the schema var outsideSchemaColumns = new List <string>(existingColumns.Except(columnsSchema)); // RocksDB requires all columns in the store to be opened in read-write mode, so merge existing columns // with the columns schema that was passed into the constructor existingColumns.UnionWith(columnsSchema); var columnFamilies = new ColumnFamilies(); foreach (var name in existingColumns) { columnFamilies.Add(name, m_defaults.ColumnFamilyOptions); } m_store = RocksDb.Open(m_defaults.DbOptions, m_storeDirectory, columnFamilies); // Provide an opportunity to update the store to the new column family schema if (arguments.DropMismatchingColumns) { foreach (var name in outsideSchemaColumns) { m_store.DropColumnFamily(name); existingColumns.Remove(name); } } } var userFacingColumns = existingColumns.Where(name => !name.EndsWith(KeyColumnSuffix)); foreach (var name in userFacingColumns) { var isKeyTracked = existingColumns.Contains(name + KeyColumnSuffix); m_columns.Add(name, new ColumnFamilyInfo() { Handle = m_store.GetColumnFamily(name), UseKeyTracking = isKeyTracked, KeyHandle = isKeyTracked ? m_store.GetColumnFamily(name + KeyColumnSuffix) : null, }); } m_columns.TryGetValue(ColumnFamilies.DefaultName, out m_defaultColumnFamilyInfo); }
private static Possible <KeyValueStoreAccessor> OpenInternal( RocksDbStoreArguments storeArguments, int storeVersion, Action <RocksDbFailureEvent> failureHandler, bool createNewStore, Action <Failure <Exception> > invalidationHandler) { KeyValueStoreAccessor accessor = null; bool useVersioning = storeVersion != VersionConstants.IgnoreStore; try { var persistedStoreVersion = -1; if (createNewStore) { accessor?.Dispose(); if (FileUtilities.FileExistsNoFollow(storeArguments.StoreDirectory)) { FileUtilities.DeleteFile(storeArguments.StoreDirectory); } else if (FileUtilities.DirectoryExistsNoFollow(storeArguments.StoreDirectory)) { FileUtilities.DeleteDirectoryContents(storeArguments.StoreDirectory); } FileUtilities.CreateDirectory(storeArguments.StoreDirectory); if (useVersioning) { WriteVersionFile(storeArguments.StoreDirectory, storeVersion); } persistedStoreVersion = storeVersion; } else { var possibleStoreVersion = ReadStoreVersion(storeArguments.StoreDirectory); if (possibleStoreVersion.Succeeded) { persistedStoreVersion = possibleStoreVersion.Result; // Even if the store does not use the built in versioning, checks for an invalid store will be done to ensure a corrupt store is not opened if (persistedStoreVersion == VersionConstants.InvalidStore) { return(new Failure <string>("The existing store is invalid and and may not be safe to open.")); } // First check for invalid (corrupt) stores before incompatible store format versions if (useVersioning && persistedStoreVersion != storeVersion) { return(new Failure <string>($"The existing store format version is incompatible expected format version. Existing store version: {persistedStoreVersion}, expected format version: {storeVersion}.")); } } else { return(possibleStoreVersion.Failure); } } accessor = new KeyValueStoreAccessor( storeArguments, persistedStoreVersion, failureHandler, createNewStore, invalidationHandler); } catch (Exception ex) { return(new Failure <Exception>(ex)); } return(accessor); }