/// <summary> /// Delete an object stored in the deduplication index. /// This method will use the callbacks supplied in the method signature. /// </summary> /// <param name="objectName">The name of the object.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <returns>True if successful.</returns> public bool DeleteObject(string objectName, CallbackMethods callbacks) { if (String.IsNullOrEmpty(objectName)) { throw new ArgumentNullException(nameof(objectName)); } if (callbacks == null) { throw new ArgumentNullException(nameof(callbacks)); } if (callbacks.DeleteChunk == null) { throw new ArgumentException("DeleteChunk callback must be specified."); } objectName = DedupeCommon.SanitizeString(objectName); List <string> garbageCollectChunks = null; lock (_ChunkLock) { _Database.DeleteObjectChunks(objectName, out garbageCollectChunks); if (garbageCollectChunks != null && garbageCollectChunks.Count > 0) { foreach (string key in garbageCollectChunks) { if (!callbacks.DeleteChunk(key)) { Log("Unable to delete chunk: " + key); } } } } return(true); }
/// <summary> /// Initialize an existing index using an external database. Tables must be created ahead of time. /// </summary> /// <param name="database">Database provider implemented using the Database.DbProvider class.</param> /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param> /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param> /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param> /// <param name="debugDedupe">Enable console logging for deduplication operations.</param> /// <param name="debugSql">Enable console logging for SQL operations.</param> public DedupeLibrary(DbProvider database, Func <Chunk, bool> writeChunkMethod, Func <string, byte[]> readChunkMethod, Func <string, bool> deleteChunkMethod, bool debugDedupe, bool debugSql) { if (writeChunkMethod == null) { throw new ArgumentNullException(nameof(writeChunkMethod)); } if (readChunkMethod == null) { throw new ArgumentNullException(nameof(readChunkMethod)); } if (deleteChunkMethod == null) { throw new ArgumentNullException(nameof(deleteChunkMethod)); } _Database = database ?? throw new ArgumentNullException(nameof(database)); Callbacks = new CallbackMethods { WriteChunk = writeChunkMethod, ReadChunk = readChunkMethod, DeleteChunk = deleteChunkMethod }; DebugDedupe = debugDedupe; DebugSql = debugSql; _ChunkLock = new object(); InitFromExistingIndex(); }
/// <summary> /// Retrieve a read-only stream over an object that has been stored. /// </summary> /// <param name="objectName">The name of the object.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <param name="stream">Read-only stream.</param> /// <returns>True if successful.</returns> public bool RetrieveObjectStream(string objectName, CallbackMethods callbacks, out DedupeStream stream) { stream = null; if (String.IsNullOrEmpty(objectName)) { throw new ArgumentNullException(nameof(objectName)); } if (callbacks == null) { throw new ArgumentNullException(nameof(callbacks)); } if (callbacks.ReadChunk == null) { throw new ArgumentException("ReadChunk callback must be specified."); } objectName = DedupeCommon.SanitizeString(objectName); if (!RetrieveObjectMetadata(objectName, out ObjectMetadata md)) { return(false); } stream = new DedupeStream(md, _Database, callbacks); return(true); }
/// <summary> /// Retrieve an object from the deduplication index. /// This method will use the callbacks supplied in the method signature. /// </summary> /// <param name="objectName">The name of the object.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <param name="data">The byte data from the object.</param> /// <returns>True if successful.</returns> public bool RetrieveObject(string objectName, CallbackMethods callbacks, out byte[] data) { bool success = RetrieveObject(objectName, callbacks, out long contentLength, out Stream stream); data = DedupeCommon.StreamToBytes(stream); return(success); }
/// <summary> /// Store an object within a container in the deduplication index if it doesn't already exist, or, replace the object if it does. /// This method will use the callbacks supplied in the method signature. /// </summary> /// <param name="objectName">The name of the object. Must be unique in the index.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <param name="data">The byte data for the object.</param> /// <param name="chunks">The list of chunks identified during the deduplication operation.</param> /// <returns>True if successful.</returns> public bool StoreOrReplaceObject(string objectName, CallbackMethods callbacks, byte[] data, out List <Chunk> chunks) { if (data == null || data.Length < 1) { throw new ArgumentNullException(nameof(data)); } return(StoreOrReplaceObject(objectName, callbacks, data.Length, DedupeCommon.BytesToStream(data), out chunks)); }
/// <summary> /// Retrieve an object from the deduplication index. /// This method will use the callbacks supplied in the method signature. /// </summary> /// <param name="objectName">The name of the object.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <param name="contentLength">The length of the data.</param> /// <param name="stream">The stream containing the data.</param> /// <returns>True if successful.</returns> public bool RetrieveObject(string objectName, CallbackMethods callbacks, out long contentLength, out Stream stream) { stream = null; contentLength = 0; if (String.IsNullOrEmpty(objectName)) { throw new ArgumentNullException(nameof(objectName)); } if (callbacks == null) { throw new ArgumentNullException(nameof(callbacks)); } if (callbacks.ReadChunk == null) { throw new ArgumentException("ReadChunk callback must be specified."); } objectName = DedupeCommon.SanitizeString(objectName); ObjectMetadata md = null; lock (_ChunkLock) { if (!_Database.GetObjectMetadata(objectName, out md)) { Log("Unable to retrieve object metadata for object " + objectName); return(false); } if (md.Chunks == null || md.Chunks.Count < 1) { Log("No chunks returned"); return(false); } stream = new MemoryStream(); foreach (Chunk curr in md.Chunks) { byte[] chunkData = callbacks.ReadChunk(curr.Key); if (chunkData == null || chunkData.Length < 1) { Log("Unable to read chunk " + curr.Key); return(false); } stream.Write(chunkData, 0, chunkData.Length); contentLength += chunkData.Length; } if (contentLength > 0) { stream.Seek(0, SeekOrigin.Begin); } } return(true); }
/// <summary> /// Store an object within a container in the deduplication index if it doesn't already exist, or, replace the object if it does. /// This method will use the callbacks supplied in the method signature. /// </summary> /// <param name="objectName">The name of the object. Must be unique in the index.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <param name="contentLength">The length of the data.</param> /// <param name="stream">The stream containing the data.</param> /// <param name="chunks">The list of chunks identified during the deduplication operation.</param> /// <returns>True if successful.</returns> public bool StoreOrReplaceObject(string objectName, CallbackMethods callbacks, long contentLength, Stream stream, out List <Chunk> chunks) { #region Initialize chunks = new List <Chunk>(); if (String.IsNullOrEmpty(objectName)) { throw new ArgumentNullException(nameof(objectName)); } if (callbacks == null) { throw new ArgumentNullException(nameof(callbacks)); } if (callbacks.WriteChunk == null) { throw new ArgumentException("WriteChunk callback must be specified."); } if (callbacks.DeleteChunk == null) { throw new ArgumentException("DeleteChunk callback must be specified."); } if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!stream.CanRead) { throw new ArgumentException("Cannot read from supplied stream."); } objectName = DedupeCommon.SanitizeString(objectName); #endregion #region Delete-if-Exists if (_Database.ObjectExists(objectName)) { Log("Object " + objectName + " already exists, deleting"); if (!DeleteObject(objectName)) { Log("Unable to delete existing object"); return(false); } else { Log("Successfully deleted object for replacement"); } } #endregion return(StoreObject(objectName, callbacks, contentLength, stream, out chunks)); }
internal DedupeStream(ObjectMetadata md, DbProvider db, CallbackMethods callbacks) { if (md == null) { throw new ArgumentNullException(nameof(md)); } if (db == null) { throw new ArgumentNullException(nameof(db)); } if (callbacks == null) { throw new ArgumentNullException(nameof(callbacks)); } _Metadata = md; _Database = db; _Callbacks = callbacks; }
/// <summary> /// Initialize an existing index using an internal Sqlite database. /// </summary> /// <param name="indexFile">Path and filename.</param> /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param> /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param> /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param> /// <param name="debugDedupe">Enable console logging for deduplication operations.</param> /// <param name="debugSql">Enable console logging for SQL operations.</param> public DedupeLibrary(string indexFile, Func <Chunk, bool> writeChunkMethod, Func <string, byte[]> readChunkMethod, Func <string, bool> deleteChunkMethod, bool debugDedupe, bool debugSql) { if (String.IsNullOrEmpty(indexFile)) { throw new ArgumentNullException(nameof(indexFile)); } if (!File.Exists(indexFile)) { throw new FileNotFoundException("Index file not found."); } if (writeChunkMethod == null) { throw new ArgumentNullException(nameof(writeChunkMethod)); } if (readChunkMethod == null) { throw new ArgumentNullException(nameof(readChunkMethod)); } if (deleteChunkMethod == null) { throw new ArgumentNullException(nameof(deleteChunkMethod)); } _IndexFile = DedupeCommon.SanitizeString(indexFile); Callbacks = new CallbackMethods { WriteChunk = writeChunkMethod, ReadChunk = readChunkMethod, DeleteChunk = deleteChunkMethod }; DebugDedupe = debugDedupe; DebugSql = debugSql; _ChunkLock = new object(); _Database = new SqliteProvider(_IndexFile, DebugSql); InitFromExistingIndex(); }
/// <summary> /// Store an object in the deduplication index. /// This method will use the callbacks supplied in the method signature. /// </summary> /// <param name="objectName">The name of the object. Must be unique in the index.</param> /// <param name="callbacks">CallbackMethods object containing callback methods.</param> /// <param name="contentLength">The length of the data.</param> /// <param name="stream">The stream containing the data.</param> /// <param name="chunks">The list of chunks identified during the deduplication operation.</param> /// <returns>True if successful.</returns> public bool StoreObject(string objectName, CallbackMethods callbacks, long contentLength, Stream stream, out List <Chunk> chunks) { #region Initialize chunks = new List <Chunk>(); if (String.IsNullOrEmpty(objectName)) { throw new ArgumentNullException(nameof(objectName)); } if (callbacks == null) { throw new ArgumentNullException(nameof(callbacks)); } if (callbacks.WriteChunk == null) { throw new ArgumentException("WriteChunk callback must be specified."); } if (callbacks.DeleteChunk == null) { throw new ArgumentException("DeleteChunk callback must be specified."); } if (contentLength < 1) { throw new ArgumentException("Content length must be at least one byte."); } if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!stream.CanRead) { throw new ArgumentException("Cannot read from supplied stream."); } objectName = DedupeCommon.SanitizeString(objectName); if (_Database.ObjectExists(objectName)) { Log("Object " + objectName + " already exists"); return(false); } bool garbageCollectionRequired = false; #endregion #region Chunk-Data try { Func <Chunk, bool> processChunk = delegate(Chunk chunk) { if (chunk == null) { return(false); } lock (_ChunkLock) { if (!_Database.AddObjectChunk(objectName, contentLength, chunk)) { Log("Unable to add chunk key " + chunk.Key); garbageCollectionRequired = true; return(false); } if (!callbacks.WriteChunk(chunk)) { Log("Unable to write chunk key " + chunk.Key); garbageCollectionRequired = true; return(false); } } return(true); }; if (!ChunkStream(contentLength, stream, processChunk, out chunks)) { Log("Unable to chunk object " + objectName); garbageCollectionRequired = true; return(false); } } finally { if (garbageCollectionRequired) { List <string> garbageCollectKeys = new List <string>(); _Database.DeleteObjectChunks(objectName, out garbageCollectKeys); if (garbageCollectKeys != null && garbageCollectKeys.Count > 0) { foreach (string key in garbageCollectKeys) { if (!callbacks.DeleteChunk(key)) { Log("Unable to garbage collect chunk " + key); } } } } } #endregion return(true); }
/// <summary> /// Create a new index using an external database. Tables must be created ahead of time. /// </summary> /// <param name="database">Database provider implemented using the Database.DbProvider class.</param> /// <param name="minChunkSize">Minimum chunk size, must be divisible by 8, divisible by 64, and 128 or greater.</param> /// <param name="maxChunkSize">Maximum chunk size, must be divisible by 8, divisible by 64, and at least 8 times larger than minimum chunk size.</param> /// <param name="shiftCount">Number of bytes to shift while identifying chunk boundaries, must be less than or equal to minimum chunk size.</param> /// <param name="boundaryCheckBytes">Number of bytes to examine while checking for a chunk boundary, must be 8 or fewer.</param> /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param> /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param> /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param> /// <param name="debugDedupe">Enable console logging for deduplication operations.</param> /// <param name="debugSql">Enable console logging for SQL operations.</param> public DedupeLibrary( DbProvider database, int minChunkSize, int maxChunkSize, int shiftCount, int boundaryCheckBytes, Func <Chunk, bool> writeChunkMethod, Func <string, byte[]> readChunkMethod, Func <string, bool> deleteChunkMethod, bool debugDedupe, bool debugSql) { if (minChunkSize % 8 != 0) { throw new ArgumentException("Value for minChunkSize must be evenly divisible by 8."); } if (maxChunkSize % 8 != 0) { throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 8."); } if (minChunkSize % 64 != 0) { throw new ArgumentException("Value for minChunkSize must be evenly divisible by 64."); } if (maxChunkSize % 64 != 0) { throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 64."); } if (minChunkSize < 1024) { throw new ArgumentOutOfRangeException("Value for minChunkSize must be 256 or greater."); } if (maxChunkSize <= minChunkSize) { throw new ArgumentOutOfRangeException("Value for maxChunkSize must be greater than minChunkSize and " + (8 * minChunkSize) + " or less."); } if (maxChunkSize < (8 * minChunkSize)) { throw new ArgumentOutOfRangeException("Value for maxChunkSize must be " + (8 * minChunkSize) + " or greater."); } if (shiftCount > minChunkSize) { throw new ArgumentOutOfRangeException("Value for shiftCount must be less than or equal to minChunkSize."); } if (writeChunkMethod == null) { throw new ArgumentNullException(nameof(writeChunkMethod)); } if (readChunkMethod == null) { throw new ArgumentNullException(nameof(readChunkMethod)); } if (deleteChunkMethod == null) { throw new ArgumentNullException(nameof(deleteChunkMethod)); } if (boundaryCheckBytes < 1 || boundaryCheckBytes > 8) { throw new ArgumentNullException(nameof(boundaryCheckBytes)); } _Database = database ?? throw new ArgumentNullException(nameof(database)); _MinChunkSize = minChunkSize; _MaxChunkSize = maxChunkSize; _ShiftCount = shiftCount; _BoundaryCheckBytes = boundaryCheckBytes; Callbacks = new CallbackMethods { WriteChunk = writeChunkMethod, ReadChunk = readChunkMethod, DeleteChunk = deleteChunkMethod }; DebugDedupe = debugDedupe; DebugSql = debugSql; _ChunkLock = new object(); InitNewIndex(); }
/// <summary> /// Create a new index using an internal Sqlite database. /// </summary> /// <param name="indexFile">Path and filename.</param> /// <param name="minChunkSize">Minimum chunk size, must be divisible by 8, divisible by 64, and 128 or greater.</param> /// <param name="maxChunkSize">Maximum chunk size, must be divisible by 8, divisible by 64, and at least 8 times larger than minimum chunk size.</param> /// <param name="shiftCount">Number of bytes to shift while identifying chunk boundaries, must be less than or equal to minimum chunk size.</param> /// <param name="boundaryCheckBytes">Number of bytes to examine while checking for a chunk boundary, must be 8 or fewer.</param> /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param> /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param> /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param> /// <param name="debugDedupe">Enable console logging for deduplication operations.</param> /// <param name="debugSql">Enable console logging for SQL operations.</param> public DedupeLibrary( string indexFile, int minChunkSize, int maxChunkSize, int shiftCount, int boundaryCheckBytes, Func <Chunk, bool> writeChunkMethod, Func <string, byte[]> readChunkMethod, Func <string, bool> deleteChunkMethod, bool debugDedupe, bool debugSql) { if (String.IsNullOrEmpty(indexFile)) { throw new ArgumentNullException(nameof(indexFile)); } if (minChunkSize % 8 != 0) { throw new ArgumentException("Value for minChunkSize must be evenly divisible by 8."); } if (maxChunkSize % 8 != 0) { throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 8."); } if (minChunkSize % 64 != 0) { throw new ArgumentException("Value for minChunkSize must be evenly divisible by 64."); } if (maxChunkSize % 64 != 0) { throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 64."); } if (minChunkSize < 1024) { throw new ArgumentOutOfRangeException("Value for minChunkSize must be 256 or greater."); } if (maxChunkSize <= minChunkSize) { throw new ArgumentOutOfRangeException("Value for maxChunkSize must be greater than minChunkSize and " + (8 * minChunkSize) + " or less."); } if (maxChunkSize < (8 * minChunkSize)) { throw new ArgumentOutOfRangeException("Value for maxChunkSize must be " + (8 * minChunkSize) + " or greater."); } if (shiftCount > minChunkSize) { throw new ArgumentOutOfRangeException("Value for shiftCount must be less than or equal to minChunkSize."); } if (writeChunkMethod == null) { throw new ArgumentNullException(nameof(writeChunkMethod)); } if (readChunkMethod == null) { throw new ArgumentNullException(nameof(readChunkMethod)); } if (deleteChunkMethod == null) { throw new ArgumentNullException(nameof(deleteChunkMethod)); } if (boundaryCheckBytes < 1 || boundaryCheckBytes > 8) { throw new ArgumentNullException(nameof(boundaryCheckBytes)); } if (File.Exists(indexFile)) { throw new IOException("Index file already exists."); } _IndexFile = DedupeCommon.SanitizeString(indexFile); _MinChunkSize = minChunkSize; _MaxChunkSize = maxChunkSize; _ShiftCount = shiftCount; _BoundaryCheckBytes = boundaryCheckBytes; Callbacks = new CallbackMethods(); Callbacks.WriteChunk = writeChunkMethod; Callbacks.ReadChunk = readChunkMethod; Callbacks.DeleteChunk = deleteChunkMethod; DebugDedupe = debugDedupe; DebugSql = debugSql; _ChunkLock = new object(); _Database = new SqliteProvider(_IndexFile, DebugSql); InitNewIndex(); }