private List <DedupeChunk> ChunkStream(string key, long contentLength, Stream stream, Action <DedupeChunk, DedupeObjectMap> processChunk) { if (String.IsNullOrEmpty(key)) { throw new ArgumentNullException(nameof(key)); } if (contentLength < 1) { throw new ArgumentException("Content length must be greater than zero."); } if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!stream.CanRead) { throw new ArgumentException("Cannot read from supplied stream."); } if (processChunk == null) { throw new ArgumentNullException(nameof(processChunk)); } #region Initialize List <DedupeChunk> chunks = new List <DedupeChunk>(); DedupeObjectMap map = null; DedupeChunk chunk = null; long bytesRead = 0; string chunkKey = null; #endregion if (contentLength <= _Settings.MinChunkSize) { #region Single-Chunk byte[] chunkData = DedupeCommon.ReadBytesFromStream(stream, contentLength, out bytesRead); chunkKey = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData)); chunk = new DedupeChunk(chunkKey, chunkData.Length, 1, chunkData); chunks.Add(chunk); map = new DedupeObjectMap(key, chunkKey, chunk.Length, 0, 0); processChunk(chunk, map); return(chunks); #endregion } else { #region Sliding-Window Streams streamWindow = new Streams(stream, contentLength, _Settings.MinChunkSize, _Settings.ShiftCount); byte[] chunkData = null; long chunkAddress = 0; // should only be set at the beginning of a new chunk while (true) { byte[] newData = null; bool finalChunk = false; long tempPosition = 0; byte[] window = streamWindow.GetNextChunk(out tempPosition, out newData, out finalChunk); if (window == null) { return(chunks); } if (chunkData == null) { chunkAddress = tempPosition; } if (chunkData == null) { // starting a new chunk chunkData = new byte[window.Length]; Buffer.BlockCopy(window, 0, chunkData, 0, window.Length); } else { // append new data chunkData = DedupeCommon.AppendBytes(chunkData, newData); } byte[] md5Hash = DedupeCommon.Md5(window); if (DedupeCommon.IsZeroBytes(md5Hash, _Settings.BoundaryCheckBytes) || chunkData.Length >= _Settings.MaxChunkSize) { #region Chunk-Boundary chunkKey = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData)); chunk = new DedupeChunk(chunkKey, chunkData.Length, 1, chunkData); map = new DedupeObjectMap(key, chunk.Key, chunkData.Length, chunks.Count, chunkAddress); processChunk(chunk, map); chunk.Data = null; chunks.Add(chunk); chunk = null; chunkData = null; streamWindow.AdvanceToNewChunk(); #endregion } else { // do nothing, continue; } if (finalChunk) { #region Final-Chunk if (chunkData != null) { chunkKey = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData)); chunk = new DedupeChunk(chunkKey, chunkData.Length, 1, chunkData); map = new DedupeObjectMap(key, chunk.Key, chunk.Length, chunks.Count, chunkAddress); processChunk(chunk, map); chunk.Data = null; chunks.Add(chunk); break; } #endregion } } #endregion } return(chunks); }
static void RewriteFile() { while (true) { Console.Write("Input file [ENTER to exit]: "); string inFile = Console.ReadLine(); if (String.IsNullOrEmpty(inFile)) { break; } Console.Write("Output file: "); string outFile = Console.ReadLine(); Console.Write("Chunk size: "); int chunkSize = Convert.ToInt32(Console.ReadLine()); Console.Write("Shift size: "); int shiftSize = Convert.ToInt32(Console.ReadLine()); FileInfo fi = new FileInfo(inFile); long contentLength = fi.Length; using (FileStream fs = new FileStream(inFile, FileMode.Open)) { _Streams = new Streams(fs, contentLength, chunkSize, shiftSize); Console.WriteLine("Input data size : " + contentLength); Console.WriteLine("Chunk count : " + _Streams.ChunkCount()); byte[] bytes = null; int chunkCount = 1; byte[] ret = null; while (true) { bool finalChunk = false; long position = 0; byte[] newData = null; bytes = _Streams.GetNextChunk(out position, out newData, out finalChunk); /* * * Uncomment these lines to debug on small files * * * Console.WriteLine( * "Chunk " + chunkCount + " at index " + position + " [" + bytes.Length + " bytes]: " + Environment.NewLine + * " Chunk data : '" + Encoding.UTF8.GetString(bytes) + "'" + Environment.NewLine + * " New data : '" + Encoding.UTF8.GetString(newData) + "'"); * */ if (ret == null) { ret = new byte[newData.Length]; Buffer.BlockCopy(newData, 0, ret, 0, newData.Length); // Console.WriteLine("Returned data is now: '" + Encoding.UTF8.GetString(ret) + "'"); } else { ret = AppendBytes(ret, newData); // Console.WriteLine("Returned data is now: '" + Encoding.UTF8.GetString(ret) + "'"); } if (!String.IsNullOrEmpty(outFile)) { using (FileStream outFs = new FileStream(outFile, FileMode.Append)) { outFs.Write(newData); } } if (finalChunk) { break; } chunkCount++; } } Console.WriteLine(); } }
private bool ChunkStream(long contentLength, Stream stream, Func <Chunk, bool> processChunk, out List <Chunk> chunks) { #region Initialize chunks = new List <Chunk>(); Chunk chunk = null; long bytesRead = 0; string key = null; if (stream == null || !stream.CanRead || contentLength < 1) { return(false); } #endregion #region Single-Chunk if (contentLength <= _MinChunkSize) { byte[] chunkData = DedupeCommon.ReadBytesFromStream(stream, contentLength, out bytesRead); key = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(chunkData)); chunk = new Chunk( key, contentLength, 0, 0, chunkData); chunks.Add(chunk); return(processChunk(chunk)); } #endregion #region Process-Sliding-Window Streams streamWindow = new Streams(stream, contentLength, _MinChunkSize, _ShiftCount); byte[] currChunk = null; long chunkPosition = 0; // should only be set at the beginning of a new chunk while (true) { byte[] newData = null; bool finalChunk = false; long tempPosition = 0; byte[] window = streamWindow.GetNextChunk(out tempPosition, out newData, out finalChunk); if (window == null) { return(true); } if (currChunk == null) { chunkPosition = tempPosition; } if (currChunk == null) { // starting a new chunk currChunk = new byte[window.Length]; Buffer.BlockCopy(window, 0, currChunk, 0, window.Length); } else { // append new data currChunk = DedupeCommon.AppendBytes(currChunk, newData); } byte[] md5Hash = DedupeCommon.Md5(window); if (DedupeCommon.IsZeroBytes(md5Hash, _BoundaryCheckBytes) || (currChunk.Length >= _MaxChunkSize)) { #region Chunk-Boundary key = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(currChunk)); chunk = new Chunk( key, currChunk.Length, chunks.Count, chunkPosition, currChunk); if (!processChunk(chunk)) { return(false); } chunk.Value = null; chunks.Add(chunk); chunk = null; currChunk = null; streamWindow.AdvanceToNewChunk(); #endregion } else { // do nothing, continue; } if (finalChunk) { #region Final-Chunk if (currChunk != null) { key = DedupeCommon.BytesToBase64(DedupeCommon.Sha256(currChunk)); chunk = new Chunk( key, currChunk.Length, chunks.Count, chunkPosition, currChunk); if (!processChunk(chunk)) { return(false); } chunk.Value = null; chunks.Add(chunk); chunk = null; currChunk = null; break; } #endregion } } #endregion return(true); }
static void Interactive() { Console.Write("Input file [ENTER to exit]: "); string inFile = Console.ReadLine(); if (String.IsNullOrEmpty(inFile)) { return; } Console.Write("Chunk size: "); int chunkSize = Convert.ToInt32(Console.ReadLine()); Console.Write("Shift size: "); int shiftSize = Convert.ToInt32(Console.ReadLine()); FileInfo fi = new FileInfo(inFile); long contentLength = fi.Length; using (FileStream fs = new FileStream(inFile, FileMode.Open)) { _Streams = new Streams(fs, contentLength, chunkSize, shiftSize); Console.WriteLine("Input data size : " + contentLength); Console.WriteLine("Chunk count : " + _Streams.ChunkCount()); byte[] bytes = null; bool finalChunk = false; long position = 0; byte[] newData = null; while (true) { Console.Write("Command [next advance q chunksize shiftsize nextstart remaining prev]: "); string userInput = null; while (String.IsNullOrEmpty(userInput)) { userInput = Console.ReadLine(); } switch (userInput) { case "next": bytes = _Streams.GetNextChunk(out position, out newData, out finalChunk); if (bytes != null && bytes.Length > 0 && newData != null && newData.Length > 0) { Console.WriteLine("Position : " + position); Console.WriteLine("Chunk data : '" + Encoding.UTF8.GetString(bytes) + "'"); Console.WriteLine("New data : '" + Encoding.UTF8.GetString(newData) + "'"); if (finalChunk) { Console.WriteLine("*** Final chunk ***"); } } else { Console.WriteLine("No data"); } break; case "advance": Console.WriteLine("Advancing to next new chunk"); _Streams.AdvanceToNewChunk(); break; case "q": return; case "chunksize": Console.WriteLine("Chunk size: " + _Streams.ChunkSize); break; case "shiftsize": Console.WriteLine("Shift size: " + _Streams.ShiftSize); break; case "nextstart": Console.WriteLine("Next start position: " + _Streams.NextStartPosition); break; case "remaining": Console.WriteLine("Remaining bytes: " + _Streams.BytesRemaining); break; case "prev": if (_Streams.PreviousChunk == null || _Streams.PreviousChunk.Length < 1) { Console.WriteLine("(null)"); } else { Console.WriteLine("Previous chunk: '" + Encoding.UTF8.GetString(_Streams.PreviousChunk) + "'"); } break; default: break; } } } }