private List <DedupeChunk> ChunkStream(string key, long contentLength, Stream stream, Action <DedupeChunk, DedupeObjectMap> processChunk)
        {
            if (String.IsNullOrEmpty(key))
            {
                throw new ArgumentNullException(nameof(key));
            }
            if (contentLength < 1)
            {
                throw new ArgumentException("Content length must be greater than zero.");
            }
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }
            if (!stream.CanRead)
            {
                throw new ArgumentException("Cannot read from supplied stream.");
            }
            if (processChunk == null)
            {
                throw new ArgumentNullException(nameof(processChunk));
            }

            #region Initialize

            List <DedupeChunk> chunks = new List <DedupeChunk>();
            DedupeObjectMap    map    = null;
            DedupeChunk        chunk  = null;
            long   bytesRead          = 0;
            string chunkKey           = null;

            #endregion

            if (contentLength <= _Settings.MinChunkSize)
            {
                #region Single-Chunk

                byte[] chunkData = DedupeCommon.ReadBytesFromStream(stream, contentLength, out bytesRead);
                chunkKey = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData));
                chunk    = new DedupeChunk(chunkKey, chunkData.Length, 1, chunkData);
                chunks.Add(chunk);

                map = new DedupeObjectMap(key, chunkKey, chunk.Length, 0, 0);
                processChunk(chunk, map);
                return(chunks);

                #endregion
            }
            else
            {
                #region Sliding-Window

                Streams streamWindow = new Streams(stream, contentLength, _Settings.MinChunkSize, _Settings.ShiftCount);
                byte[]  chunkData    = null;
                long    chunkAddress = 0;  // should only be set at the beginning of a new chunk

                while (true)
                {
                    byte[] newData    = null;
                    bool   finalChunk = false;

                    long   tempPosition = 0;
                    byte[] window       = streamWindow.GetNextChunk(out tempPosition, out newData, out finalChunk);
                    if (window == null)
                    {
                        return(chunks);
                    }
                    if (chunkData == null)
                    {
                        chunkAddress = tempPosition;
                    }

                    if (chunkData == null)
                    {
                        // starting a new chunk
                        chunkData = new byte[window.Length];
                        Buffer.BlockCopy(window, 0, chunkData, 0, window.Length);
                    }
                    else
                    {
                        // append new data
                        chunkData = DedupeCommon.AppendBytes(chunkData, newData);
                    }

                    byte[] md5Hash = DedupeCommon.Md5(window);
                    if (DedupeCommon.IsZeroBytes(md5Hash, _Settings.BoundaryCheckBytes) ||
                        chunkData.Length >= _Settings.MaxChunkSize)
                    {
                        #region Chunk-Boundary

                        chunkKey = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData));

                        chunk = new DedupeChunk(chunkKey, chunkData.Length, 1, chunkData);
                        map   = new DedupeObjectMap(key, chunk.Key, chunkData.Length, chunks.Count, chunkAddress);
                        processChunk(chunk, map);
                        chunk.Data = null;
                        chunks.Add(chunk);

                        chunk     = null;
                        chunkData = null;

                        streamWindow.AdvanceToNewChunk();

                        #endregion
                    }
                    else
                    {
                        // do nothing, continue;
                    }

                    if (finalChunk)
                    {
                        #region Final-Chunk

                        if (chunkData != null)
                        {
                            chunkKey = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData));
                            chunk    = new DedupeChunk(chunkKey, chunkData.Length, 1, chunkData);
                            map      = new DedupeObjectMap(key, chunk.Key, chunk.Length, chunks.Count, chunkAddress);
                            processChunk(chunk, map);
                            chunk.Data = null;
                            chunks.Add(chunk);
                            break;
                        }

                        #endregion
                    }
                }

                #endregion
            }

            return(chunks);
        }
Beispiel #2
0
        static void RewriteFile()
        {
            while (true)
            {
                Console.Write("Input file [ENTER to exit]: ");
                string inFile = Console.ReadLine();
                if (String.IsNullOrEmpty(inFile))
                {
                    break;
                }

                Console.Write("Output file: ");
                string outFile = Console.ReadLine();

                Console.Write("Chunk size: ");
                int chunkSize = Convert.ToInt32(Console.ReadLine());
                Console.Write("Shift size: ");
                int shiftSize = Convert.ToInt32(Console.ReadLine());

                FileInfo fi            = new FileInfo(inFile);
                long     contentLength = fi.Length;

                using (FileStream fs = new FileStream(inFile, FileMode.Open))
                {
                    _Streams = new Streams(fs, contentLength, chunkSize, shiftSize);

                    Console.WriteLine("Input data size : " + contentLength);
                    Console.WriteLine("Chunk count     : " + _Streams.ChunkCount());

                    byte[] bytes      = null;
                    int    chunkCount = 1;
                    byte[] ret        = null;

                    while (true)
                    {
                        bool   finalChunk = false;
                        long   position   = 0;
                        byte[] newData    = null;
                        bytes = _Streams.GetNextChunk(out position, out newData, out finalChunk);

                        /*
                         *
                         * Uncomment these lines to debug on small files
                         *
                         *
                         * Console.WriteLine(
                         *  "Chunk " + chunkCount + " at index " + position + " [" + bytes.Length + " bytes]: " + Environment.NewLine +
                         *  "   Chunk data : '" + Encoding.UTF8.GetString(bytes) + "'" + Environment.NewLine +
                         *  "   New data   : '" + Encoding.UTF8.GetString(newData) + "'");
                         *
                         */

                        if (ret == null)
                        {
                            ret = new byte[newData.Length];
                            Buffer.BlockCopy(newData, 0, ret, 0, newData.Length);
                            // Console.WriteLine("Returned data is now: '" + Encoding.UTF8.GetString(ret) + "'");
                        }
                        else
                        {
                            ret = AppendBytes(ret, newData);
                            // Console.WriteLine("Returned data is now: '" + Encoding.UTF8.GetString(ret) + "'");
                        }

                        if (!String.IsNullOrEmpty(outFile))
                        {
                            using (FileStream outFs = new FileStream(outFile, FileMode.Append))
                            {
                                outFs.Write(newData);
                            }
                        }

                        if (finalChunk)
                        {
                            break;
                        }
                        chunkCount++;
                    }
                }

                Console.WriteLine();
            }
        }
Beispiel #3
0
        private bool ChunkStream(long contentLength, Stream stream, Func <Chunk, bool> processChunk, out List <Chunk> chunks)
        {
            #region Initialize

            chunks = new List <Chunk>();
            Chunk  chunk     = null;
            long   bytesRead = 0;
            string key       = null;

            if (stream == null || !stream.CanRead || contentLength < 1)
            {
                return(false);
            }

            #endregion

            #region Single-Chunk

            if (contentLength <= _MinChunkSize)
            {
                byte[] chunkData = DedupeCommon.ReadBytesFromStream(stream, contentLength, out bytesRead);
                key   = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData));
                chunk = new Chunk(
                    key,
                    contentLength,
                    0,
                    0,
                    chunkData);
                chunks.Add(chunk);
                return(processChunk(chunk));
            }

            #endregion

            #region Process-Sliding-Window

            Streams streamWindow  = new Streams(stream, contentLength, _MinChunkSize, _ShiftCount);
            byte[]  currChunk     = null;
            long    chunkPosition = 0;  // should only be set at the beginning of a new chunk

            while (true)
            {
                byte[] newData    = null;
                bool   finalChunk = false;

                long   tempPosition = 0;
                byte[] window       = streamWindow.GetNextChunk(out tempPosition, out newData, out finalChunk);
                if (window == null)
                {
                    return(true);
                }
                if (currChunk == null)
                {
                    chunkPosition = tempPosition;
                }

                if (currChunk == null)
                {
                    // starting a new chunk
                    currChunk = new byte[window.Length];
                    Buffer.BlockCopy(window, 0, currChunk, 0, window.Length);
                }
                else
                {
                    // append new data
                    currChunk = DedupeCommon.AppendBytes(currChunk, newData);
                }

                byte[] md5Hash = DedupeCommon.Md5(window);
                if (DedupeCommon.IsZeroBytes(md5Hash, _BoundaryCheckBytes)
                    ||
                    (currChunk.Length >= _MaxChunkSize))
                {
                    #region Chunk-Boundary

                    key   = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(currChunk));
                    chunk = new Chunk(
                        key,
                        currChunk.Length,
                        chunks.Count,
                        chunkPosition,
                        currChunk);

                    if (!processChunk(chunk))
                    {
                        return(false);
                    }
                    chunk.Value = null;
                    chunks.Add(chunk);

                    chunk     = null;
                    currChunk = null;

                    streamWindow.AdvanceToNewChunk();

                    #endregion
                }
                else
                {
                    // do nothing, continue;
                }

                if (finalChunk)
                {
                    #region Final-Chunk

                    if (currChunk != null)
                    {
                        key   = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(currChunk));
                        chunk = new Chunk(
                            key,
                            currChunk.Length,
                            chunks.Count,
                            chunkPosition,
                            currChunk);

                        if (!processChunk(chunk))
                        {
                            return(false);
                        }
                        chunk.Value = null;
                        chunks.Add(chunk);

                        chunk     = null;
                        currChunk = null;
                        break;
                    }

                    #endregion
                }
            }

            #endregion

            return(true);
        }
Beispiel #4
0
        static void Interactive()
        {
            Console.Write("Input file [ENTER to exit]: ");
            string inFile = Console.ReadLine();

            if (String.IsNullOrEmpty(inFile))
            {
                return;
            }

            Console.Write("Chunk size: ");
            int chunkSize = Convert.ToInt32(Console.ReadLine());

            Console.Write("Shift size: ");
            int shiftSize = Convert.ToInt32(Console.ReadLine());

            FileInfo fi            = new FileInfo(inFile);
            long     contentLength = fi.Length;

            using (FileStream fs = new FileStream(inFile, FileMode.Open))
            {
                _Streams = new Streams(fs, contentLength, chunkSize, shiftSize);

                Console.WriteLine("Input data size : " + contentLength);
                Console.WriteLine("Chunk count     : " + _Streams.ChunkCount());

                byte[] bytes      = null;
                bool   finalChunk = false;
                long   position   = 0;
                byte[] newData    = null;

                while (true)
                {
                    Console.Write("Command [next advance q chunksize shiftsize nextstart remaining prev]: ");
                    string userInput = null;
                    while (String.IsNullOrEmpty(userInput))
                    {
                        userInput = Console.ReadLine();
                    }

                    switch (userInput)
                    {
                    case "next":
                        bytes = _Streams.GetNextChunk(out position, out newData, out finalChunk);
                        if (bytes != null && bytes.Length > 0 &&
                            newData != null && newData.Length > 0)
                        {
                            Console.WriteLine("Position   : " + position);
                            Console.WriteLine("Chunk data : '" + Encoding.UTF8.GetString(bytes) + "'");
                            Console.WriteLine("New data   : '" + Encoding.UTF8.GetString(newData) + "'");
                            if (finalChunk)
                            {
                                Console.WriteLine("*** Final chunk ***");
                            }
                        }
                        else
                        {
                            Console.WriteLine("No data");
                        }
                        break;

                    case "advance":
                        Console.WriteLine("Advancing to next new chunk");
                        _Streams.AdvanceToNewChunk();
                        break;

                    case "q":
                        return;

                    case "chunksize":
                        Console.WriteLine("Chunk size: " + _Streams.ChunkSize);
                        break;

                    case "shiftsize":
                        Console.WriteLine("Shift size: " + _Streams.ShiftSize);
                        break;

                    case "nextstart":
                        Console.WriteLine("Next start position: " + _Streams.NextStartPosition);
                        break;

                    case "remaining":
                        Console.WriteLine("Remaining bytes: " + _Streams.BytesRemaining);
                        break;

                    case "prev":
                        if (_Streams.PreviousChunk == null || _Streams.PreviousChunk.Length < 1)
                        {
                            Console.WriteLine("(null)");
                        }
                        else
                        {
                            Console.WriteLine("Previous chunk: '" + Encoding.UTF8.GetString(_Streams.PreviousChunk) + "'");
                        }
                        break;

                    default:
                        break;
                    }
                }
            }
        }