Example #1
0
        /// <summary>
        /// Delete an object stored in the deduplication index.
        /// This method will use the callbacks supplied in the method signature.
        /// </summary>
        /// <param name="objectName">The name of the object.</param>
        /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
        /// <returns>True if successful.</returns>
        public bool DeleteObject(string objectName, CallbackMethods callbacks)
        {
            if (String.IsNullOrEmpty(objectName))
            {
                throw new ArgumentNullException(nameof(objectName));
            }
            if (callbacks == null)
            {
                throw new ArgumentNullException(nameof(callbacks));
            }
            if (callbacks.DeleteChunk == null)
            {
                throw new ArgumentException("DeleteChunk callback must be specified.");
            }
            objectName = DedupeCommon.SanitizeString(objectName);

            List <string> garbageCollectChunks = null;

            lock (_ChunkLock)
            {
                _Database.DeleteObjectChunks(objectName, out garbageCollectChunks);
                if (garbageCollectChunks != null && garbageCollectChunks.Count > 0)
                {
                    foreach (string key in garbageCollectChunks)
                    {
                        if (!callbacks.DeleteChunk(key))
                        {
                            Log("Unable to delete chunk: " + key);
                        }
                    }
                }
            }

            return(true);
        }
Example #2
0
        /// <summary>
        /// Initialize an existing index using an external database.  Tables must be created ahead of time.
        /// </summary>
        /// <param name="database">Database provider implemented using the Database.DbProvider class.</param>
        /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param>
        /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param>
        /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param>
        /// <param name="debugDedupe">Enable console logging for deduplication operations.</param>
        /// <param name="debugSql">Enable console logging for SQL operations.</param>
        public DedupeLibrary(DbProvider database, Func <Chunk, bool> writeChunkMethod, Func <string, byte[]> readChunkMethod, Func <string, bool> deleteChunkMethod, bool debugDedupe, bool debugSql)
        {
            if (writeChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(writeChunkMethod));
            }
            if (readChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(readChunkMethod));
            }
            if (deleteChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(deleteChunkMethod));
            }

            _Database = database ?? throw new ArgumentNullException(nameof(database));

            Callbacks = new CallbackMethods
            {
                WriteChunk  = writeChunkMethod,
                ReadChunk   = readChunkMethod,
                DeleteChunk = deleteChunkMethod
            };

            DebugDedupe = debugDedupe;
            DebugSql    = debugSql;
            _ChunkLock  = new object();

            InitFromExistingIndex();
        }
Example #3
0
        /// <summary>
        /// Retrieve a read-only stream over an object that has been stored.
        /// </summary>
        /// <param name="objectName">The name of the object.</param>
        /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
        /// <param name="stream">Read-only stream.</param>
        /// <returns>True if successful.</returns>
        public bool RetrieveObjectStream(string objectName, CallbackMethods callbacks, out DedupeStream stream)
        {
            stream = null;
            if (String.IsNullOrEmpty(objectName))
            {
                throw new ArgumentNullException(nameof(objectName));
            }
            if (callbacks == null)
            {
                throw new ArgumentNullException(nameof(callbacks));
            }
            if (callbacks.ReadChunk == null)
            {
                throw new ArgumentException("ReadChunk callback must be specified.");
            }
            objectName = DedupeCommon.SanitizeString(objectName);

            if (!RetrieveObjectMetadata(objectName, out ObjectMetadata md))
            {
                return(false);
            }

            stream = new DedupeStream(md, _Database, callbacks);
            return(true);
        }
Example #4
0
        /// <summary>
        /// Retrieve an object from the deduplication index.
        /// This method will use the callbacks supplied in the method signature.
        /// </summary>
        /// <param name="objectName">The name of the object.</param>
        /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
        /// <param name="data">The byte data from the object.</param>
        /// <returns>True if successful.</returns>
        public bool RetrieveObject(string objectName, CallbackMethods callbacks, out byte[] data)
        {
            bool success = RetrieveObject(objectName, callbacks, out long contentLength, out Stream stream);

            data = DedupeCommon.StreamToBytes(stream);
            return(success);
        }
Example #5
0
 /// <summary>
 /// Store an object within a container in the deduplication index if it doesn't already exist, or, replace the object if it does.
 /// This method will use the callbacks supplied in the method signature.
 /// </summary>
 /// <param name="objectName">The name of the object.  Must be unique in the index.</param>
 /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
 /// <param name="data">The byte data for the object.</param>
 /// <param name="chunks">The list of chunks identified during the deduplication operation.</param>
 /// <returns>True if successful.</returns>
 public bool StoreOrReplaceObject(string objectName, CallbackMethods callbacks, byte[] data, out List <Chunk> chunks)
 {
     if (data == null || data.Length < 1)
     {
         throw new ArgumentNullException(nameof(data));
     }
     return(StoreOrReplaceObject(objectName, callbacks, data.Length, DedupeCommon.BytesToStream(data), out chunks));
 }
Example #6
0
        /// <summary>
        /// Retrieve an object from the deduplication index.
        /// This method will use the callbacks supplied in the method signature.
        /// </summary>
        /// <param name="objectName">The name of the object.</param>
        /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
        /// <param name="contentLength">The length of the data.</param>
        /// <param name="stream">The stream containing the data.</param>
        /// <returns>True if successful.</returns>
        public bool RetrieveObject(string objectName, CallbackMethods callbacks, out long contentLength, out Stream stream)
        {
            stream        = null;
            contentLength = 0;
            if (String.IsNullOrEmpty(objectName))
            {
                throw new ArgumentNullException(nameof(objectName));
            }
            if (callbacks == null)
            {
                throw new ArgumentNullException(nameof(callbacks));
            }
            if (callbacks.ReadChunk == null)
            {
                throw new ArgumentException("ReadChunk callback must be specified.");
            }
            objectName = DedupeCommon.SanitizeString(objectName);

            ObjectMetadata md = null;

            lock (_ChunkLock)
            {
                if (!_Database.GetObjectMetadata(objectName, out md))
                {
                    Log("Unable to retrieve object metadata for object " + objectName);
                    return(false);
                }

                if (md.Chunks == null || md.Chunks.Count < 1)
                {
                    Log("No chunks returned");
                    return(false);
                }

                stream = new MemoryStream();

                foreach (Chunk curr in md.Chunks)
                {
                    byte[] chunkData = callbacks.ReadChunk(curr.Key);
                    if (chunkData == null || chunkData.Length < 1)
                    {
                        Log("Unable to read chunk " + curr.Key);
                        return(false);
                    }

                    stream.Write(chunkData, 0, chunkData.Length);
                    contentLength += chunkData.Length;
                }

                if (contentLength > 0)
                {
                    stream.Seek(0, SeekOrigin.Begin);
                }
            }

            return(true);
        }
Example #7
0
        /// <summary>
        /// Store an object within a container in the deduplication index if it doesn't already exist, or, replace the object if it does.
        /// This method will use the callbacks supplied in the method signature.
        /// </summary>
        /// <param name="objectName">The name of the object.  Must be unique in the index.</param>
        /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
        /// <param name="contentLength">The length of the data.</param>
        /// <param name="stream">The stream containing the data.</param>
        /// <param name="chunks">The list of chunks identified during the deduplication operation.</param>
        /// <returns>True if successful.</returns>
        public bool StoreOrReplaceObject(string objectName, CallbackMethods callbacks, long contentLength, Stream stream, out List <Chunk> chunks)
        {
            #region Initialize

            chunks = new List <Chunk>();
            if (String.IsNullOrEmpty(objectName))
            {
                throw new ArgumentNullException(nameof(objectName));
            }
            if (callbacks == null)
            {
                throw new ArgumentNullException(nameof(callbacks));
            }
            if (callbacks.WriteChunk == null)
            {
                throw new ArgumentException("WriteChunk callback must be specified.");
            }
            if (callbacks.DeleteChunk == null)
            {
                throw new ArgumentException("DeleteChunk callback must be specified.");
            }
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }
            if (!stream.CanRead)
            {
                throw new ArgumentException("Cannot read from supplied stream.");
            }
            objectName = DedupeCommon.SanitizeString(objectName);

            #endregion

            #region Delete-if-Exists

            if (_Database.ObjectExists(objectName))
            {
                Log("Object " + objectName + " already exists, deleting");
                if (!DeleteObject(objectName))
                {
                    Log("Unable to delete existing object");
                    return(false);
                }
                else
                {
                    Log("Successfully deleted object for replacement");
                }
            }

            #endregion

            return(StoreObject(objectName, callbacks, contentLength, stream, out chunks));
        }
Example #8
0
        internal DedupeStream(ObjectMetadata md, DbProvider db, CallbackMethods callbacks)
        {
            if (md == null)
            {
                throw new ArgumentNullException(nameof(md));
            }
            if (db == null)
            {
                throw new ArgumentNullException(nameof(db));
            }
            if (callbacks == null)
            {
                throw new ArgumentNullException(nameof(callbacks));
            }

            _Metadata  = md;
            _Database  = db;
            _Callbacks = callbacks;
        }
Example #9
0
        /// <summary>
        /// Initialize an existing index using an internal Sqlite database.
        /// </summary>
        /// <param name="indexFile">Path and filename.</param>
        /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param>
        /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param>
        /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param>
        /// <param name="debugDedupe">Enable console logging for deduplication operations.</param>
        /// <param name="debugSql">Enable console logging for SQL operations.</param>
        public DedupeLibrary(string indexFile, Func <Chunk, bool> writeChunkMethod, Func <string, byte[]> readChunkMethod, Func <string, bool> deleteChunkMethod, bool debugDedupe, bool debugSql)
        {
            if (String.IsNullOrEmpty(indexFile))
            {
                throw new ArgumentNullException(nameof(indexFile));
            }
            if (!File.Exists(indexFile))
            {
                throw new FileNotFoundException("Index file not found.");
            }
            if (writeChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(writeChunkMethod));
            }
            if (readChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(readChunkMethod));
            }
            if (deleteChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(deleteChunkMethod));
            }

            _IndexFile = DedupeCommon.SanitizeString(indexFile);

            Callbacks = new CallbackMethods
            {
                WriteChunk  = writeChunkMethod,
                ReadChunk   = readChunkMethod,
                DeleteChunk = deleteChunkMethod
            };

            DebugDedupe = debugDedupe;
            DebugSql    = debugSql;
            _ChunkLock  = new object();

            _Database = new SqliteProvider(_IndexFile, DebugSql);

            InitFromExistingIndex();
        }
Example #10
0
        /// <summary>
        /// Store an object in the deduplication index.
        /// This method will use the callbacks supplied in the method signature.
        /// </summary>
        /// <param name="objectName">The name of the object.  Must be unique in the index.</param>
        /// <param name="callbacks">CallbackMethods object containing callback methods.</param>
        /// <param name="contentLength">The length of the data.</param>
        /// <param name="stream">The stream containing the data.</param>
        /// <param name="chunks">The list of chunks identified during the deduplication operation.</param>
        /// <returns>True if successful.</returns>
        public bool StoreObject(string objectName, CallbackMethods callbacks, long contentLength, Stream stream, out List <Chunk> chunks)
        {
            #region Initialize

            chunks = new List <Chunk>();
            if (String.IsNullOrEmpty(objectName))
            {
                throw new ArgumentNullException(nameof(objectName));
            }
            if (callbacks == null)
            {
                throw new ArgumentNullException(nameof(callbacks));
            }
            if (callbacks.WriteChunk == null)
            {
                throw new ArgumentException("WriteChunk callback must be specified.");
            }
            if (callbacks.DeleteChunk == null)
            {
                throw new ArgumentException("DeleteChunk callback must be specified.");
            }
            if (contentLength < 1)
            {
                throw new ArgumentException("Content length must be at least one byte.");
            }
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }
            if (!stream.CanRead)
            {
                throw new ArgumentException("Cannot read from supplied stream.");
            }
            objectName = DedupeCommon.SanitizeString(objectName);

            if (_Database.ObjectExists(objectName))
            {
                Log("Object " + objectName + " already exists");
                return(false);
            }

            bool garbageCollectionRequired = false;

            #endregion

            #region Chunk-Data

            try
            {
                Func <Chunk, bool> processChunk = delegate(Chunk chunk)
                {
                    if (chunk == null)
                    {
                        return(false);
                    }

                    lock (_ChunkLock)
                    {
                        if (!_Database.AddObjectChunk(objectName, contentLength, chunk))
                        {
                            Log("Unable to add chunk key " + chunk.Key);
                            garbageCollectionRequired = true;
                            return(false);
                        }

                        if (!callbacks.WriteChunk(chunk))
                        {
                            Log("Unable to write chunk key " + chunk.Key);
                            garbageCollectionRequired = true;
                            return(false);
                        }
                    }

                    return(true);
                };

                if (!ChunkStream(contentLength, stream, processChunk, out chunks))
                {
                    Log("Unable to chunk object " + objectName);
                    garbageCollectionRequired = true;
                    return(false);
                }
            }
            finally
            {
                if (garbageCollectionRequired)
                {
                    List <string> garbageCollectKeys = new List <string>();
                    _Database.DeleteObjectChunks(objectName, out garbageCollectKeys);

                    if (garbageCollectKeys != null && garbageCollectKeys.Count > 0)
                    {
                        foreach (string key in garbageCollectKeys)
                        {
                            if (!callbacks.DeleteChunk(key))
                            {
                                Log("Unable to garbage collect chunk " + key);
                            }
                        }
                    }
                }
            }

            #endregion

            return(true);
        }
Example #11
0
        /// <summary>
        /// Create a new index using an external database.  Tables must be created ahead of time.
        /// </summary>
        /// <param name="database">Database provider implemented using the Database.DbProvider class.</param>
        /// <param name="minChunkSize">Minimum chunk size, must be divisible by 8, divisible by 64, and 128 or greater.</param>
        /// <param name="maxChunkSize">Maximum chunk size, must be divisible by 8, divisible by 64, and at least 8 times larger than minimum chunk size.</param>
        /// <param name="shiftCount">Number of bytes to shift while identifying chunk boundaries, must be less than or equal to minimum chunk size.</param>
        /// <param name="boundaryCheckBytes">Number of bytes to examine while checking for a chunk boundary, must be 8 or fewer.</param>
        /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param>
        /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param>
        /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param>
        /// <param name="debugDedupe">Enable console logging for deduplication operations.</param>
        /// <param name="debugSql">Enable console logging for SQL operations.</param>
        public DedupeLibrary(
            DbProvider database,
            int minChunkSize,
            int maxChunkSize,
            int shiftCount,
            int boundaryCheckBytes,
            Func <Chunk, bool> writeChunkMethod,
            Func <string, byte[]> readChunkMethod,
            Func <string, bool> deleteChunkMethod,
            bool debugDedupe,
            bool debugSql)
        {
            if (minChunkSize % 8 != 0)
            {
                throw new ArgumentException("Value for minChunkSize must be evenly divisible by 8.");
            }
            if (maxChunkSize % 8 != 0)
            {
                throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 8.");
            }
            if (minChunkSize % 64 != 0)
            {
                throw new ArgumentException("Value for minChunkSize must be evenly divisible by 64.");
            }
            if (maxChunkSize % 64 != 0)
            {
                throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 64.");
            }
            if (minChunkSize < 1024)
            {
                throw new ArgumentOutOfRangeException("Value for minChunkSize must be 256 or greater.");
            }
            if (maxChunkSize <= minChunkSize)
            {
                throw new ArgumentOutOfRangeException("Value for maxChunkSize must be greater than minChunkSize and " + (8 * minChunkSize) + " or less.");
            }
            if (maxChunkSize < (8 * minChunkSize))
            {
                throw new ArgumentOutOfRangeException("Value for maxChunkSize must be " + (8 * minChunkSize) + " or greater.");
            }
            if (shiftCount > minChunkSize)
            {
                throw new ArgumentOutOfRangeException("Value for shiftCount must be less than or equal to minChunkSize.");
            }
            if (writeChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(writeChunkMethod));
            }
            if (readChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(readChunkMethod));
            }
            if (deleteChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(deleteChunkMethod));
            }
            if (boundaryCheckBytes < 1 || boundaryCheckBytes > 8)
            {
                throw new ArgumentNullException(nameof(boundaryCheckBytes));
            }

            _Database           = database ?? throw new ArgumentNullException(nameof(database));
            _MinChunkSize       = minChunkSize;
            _MaxChunkSize       = maxChunkSize;
            _ShiftCount         = shiftCount;
            _BoundaryCheckBytes = boundaryCheckBytes;

            Callbacks = new CallbackMethods
            {
                WriteChunk  = writeChunkMethod,
                ReadChunk   = readChunkMethod,
                DeleteChunk = deleteChunkMethod
            };

            DebugDedupe = debugDedupe;
            DebugSql    = debugSql;
            _ChunkLock  = new object();

            InitNewIndex();
        }
Example #12
0
        /// <summary>
        /// Create a new index using an internal Sqlite database.
        /// </summary>
        /// <param name="indexFile">Path and filename.</param>
        /// <param name="minChunkSize">Minimum chunk size, must be divisible by 8, divisible by 64, and 128 or greater.</param>
        /// <param name="maxChunkSize">Maximum chunk size, must be divisible by 8, divisible by 64, and at least 8 times larger than minimum chunk size.</param>
        /// <param name="shiftCount">Number of bytes to shift while identifying chunk boundaries, must be less than or equal to minimum chunk size.</param>
        /// <param name="boundaryCheckBytes">Number of bytes to examine while checking for a chunk boundary, must be 8 or fewer.</param>
        /// <param name="writeChunkMethod">Method to call to write a chunk to storage.</param>
        /// <param name="readChunkMethod">Method to call to read a chunk from storage.</param>
        /// <param name="deleteChunkMethod">Method to call to delete a chunk from storage.</param>
        /// <param name="debugDedupe">Enable console logging for deduplication operations.</param>
        /// <param name="debugSql">Enable console logging for SQL operations.</param>
        public DedupeLibrary(
            string indexFile,
            int minChunkSize,
            int maxChunkSize,
            int shiftCount,
            int boundaryCheckBytes,
            Func <Chunk, bool> writeChunkMethod,
            Func <string, byte[]> readChunkMethod,
            Func <string, bool> deleteChunkMethod,
            bool debugDedupe,
            bool debugSql)
        {
            if (String.IsNullOrEmpty(indexFile))
            {
                throw new ArgumentNullException(nameof(indexFile));
            }
            if (minChunkSize % 8 != 0)
            {
                throw new ArgumentException("Value for minChunkSize must be evenly divisible by 8.");
            }
            if (maxChunkSize % 8 != 0)
            {
                throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 8.");
            }
            if (minChunkSize % 64 != 0)
            {
                throw new ArgumentException("Value for minChunkSize must be evenly divisible by 64.");
            }
            if (maxChunkSize % 64 != 0)
            {
                throw new ArgumentException("Value for maxChunkSize must be evenly divisible by 64.");
            }
            if (minChunkSize < 1024)
            {
                throw new ArgumentOutOfRangeException("Value for minChunkSize must be 256 or greater.");
            }
            if (maxChunkSize <= minChunkSize)
            {
                throw new ArgumentOutOfRangeException("Value for maxChunkSize must be greater than minChunkSize and " + (8 * minChunkSize) + " or less.");
            }
            if (maxChunkSize < (8 * minChunkSize))
            {
                throw new ArgumentOutOfRangeException("Value for maxChunkSize must be " + (8 * minChunkSize) + " or greater.");
            }
            if (shiftCount > minChunkSize)
            {
                throw new ArgumentOutOfRangeException("Value for shiftCount must be less than or equal to minChunkSize.");
            }
            if (writeChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(writeChunkMethod));
            }
            if (readChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(readChunkMethod));
            }
            if (deleteChunkMethod == null)
            {
                throw new ArgumentNullException(nameof(deleteChunkMethod));
            }
            if (boundaryCheckBytes < 1 || boundaryCheckBytes > 8)
            {
                throw new ArgumentNullException(nameof(boundaryCheckBytes));
            }

            if (File.Exists(indexFile))
            {
                throw new IOException("Index file already exists.");
            }

            _IndexFile          = DedupeCommon.SanitizeString(indexFile);
            _MinChunkSize       = minChunkSize;
            _MaxChunkSize       = maxChunkSize;
            _ShiftCount         = shiftCount;
            _BoundaryCheckBytes = boundaryCheckBytes;

            Callbacks             = new CallbackMethods();
            Callbacks.WriteChunk  = writeChunkMethod;
            Callbacks.ReadChunk   = readChunkMethod;
            Callbacks.DeleteChunk = deleteChunkMethod;

            DebugDedupe = debugDedupe;
            DebugSql    = debugSql;
            _ChunkLock  = new object();

            _Database = new SqliteProvider(_IndexFile, DebugSql);

            InitNewIndex();
        }