public static string Md5Line(Md5Item obj) { return(string.Format( "{1}{0}{2}{0}{3}{0}{4}", Seperator, obj.Md5, obj.Key, obj.Length, obj.ModifiedUtc.ToEpoch() )); }
public static string Md5Line(Md5Item obj) { return string.Format( "{1}{0}{2}{0}{3}{0}{4}", Seperator, obj.Md5, obj.Key, obj.Length, obj.ModifiedUtc.ToEpoch() ); }
private static Dictionary <string, Md5Item> LoadMd5File(string md5file) { Log.InfoFormat("Reading md5 file '{0}'...", md5file); var md5Dictionary = new Dictionary <string, Md5Item>(); if (File.Exists(md5file)) { var md5lines = File.ReadAllLines(md5file, encoding); foreach (var l in md5lines) { var item = Md5Item.Parse(l); md5Dictionary[item.Key] = item; } } Log.InfoFormat("Done, {0:N0} items", md5Dictionary.Count); return(md5Dictionary); }
private static void CopyTo(string src, string dst) { var uri = new Uri(dst); var dstBucket = uri.Host; var dstPath = uri.PathAndQuery.Substring(1); Log.InfoFormat("Bucket: {0}, Path: {1}", dstBucket, dstPath); var md5File = Path.Combine(src, md5fileName); var md5Dictionary = LoadMd5File(md5File); var files = FetchFiles(src) .Where(x => x.Length > 0) .Where(x => !x.Attributes.HasFlag(FileAttributes.Hidden)) .Where(x => !x.Name.StartsWith(".")) .Where(x => x.FullName != md5File) .Where(x => (x.Attributes & FileAttributes.Archive) == FileAttributes.Archive) .Where(x => (x.Attributes & FileAttributes.Temporary) != FileAttributes.Temporary) .Select(x => Map(src, x)); using (var md5FileWriter = File.AppendText(md5File)) { files = files.Select(x => { Md5Item item; if (!md5Dictionary.TryGetValue(x.Path, out item) || x.Length != item.Length || x.ModifiedUtc != item.ModifiedUtc ) { x.Hash = Md5Hash.Calculate(x.Path).ToHex(); item = new Md5Item(x.Hash, x.Path, x.Length, x.ModifiedUtc); md5Dictionary[x.Path] = item; lock (md5FileWriter) { md5FileWriter.WriteLine(item); } } x.Hash = item.Md5; return(x); }).ToList(); } WriteMd5Dictionary(md5Dictionary, md5File); var objects = FetchObjects(dstBucket, dstPath) .Select(x => Map(dstPath, x)) .ToDictionary(x => x.Relative, x => x); int chunkSize = 1024 * 1024 * 8; //8MB Log.DebugFormat("Chunk size: {0} ({1} bytes)", Pretty(chunkSize), chunkSize); /* * Alleen files die: * - nog niet bestaan * of een andere lengte hebben * of een andere hash md5 hebben * of een andere hash s3md5 hebben */ var items = files .Where(x => { var destinationNotExists = !objects.ContainsKey(x.Relative); if (destinationNotExists) { return(true); } var obj = objects[x.Relative]; var differentSize = x.Length != obj.Length; if (differentSize) { return(true); } var etag = obj.S3Object.ETag; var isMultiPart = etag.Contains("-"); if (isMultiPart) { var parts = ExtractParts(etag); var guestimateChunkSize = GuessChunkSize(obj.Length, parts); var s3md5mismatch = !obj.Hash.Contains(S3Md5.Calculate(x.Path, guestimateChunkSize)); return(s3md5mismatch); } else { var md5mismatch = x.Hash != obj.Hash; return(md5mismatch); } } ) .ToList(); Log.InfoFormat("Items to be mirrored: {0}", items.Count); using (IAmazonS3 client = CreateS3Client()) foreach (var item in items) { var key = dstPath + item.Relative; Log.DebugFormat("Uploading {0} => {1}", item.Relative, key); if (item.Length < chunkSize) { client.UploadObjectFromFilePath(dstBucket, key, item.Path, null); var isMatch = client.GetObject(dstBucket, key).ETag.Contains(item.Hash); if (!isMatch) { Log.ErrorFormat("Upload failed: {0}", item.Relative); } } else { Log.Debug("Multi-part"); var response = client.InitiateMultipartUpload(dstBucket, key); try { long index = 0; var md5s = new List <PartETag>(); for (int part = 1; index < item.Length; part++) { var md5 = Md5Hash.Calculate(item.Path, index, chunkSize); var partSize = Math.Min(chunkSize, item.Length - index); Log.DebugFormat("\tPart {0} ({1:N0}): {2}", part, partSize, md5.ToHex()); client.UploadPart(new UploadPartRequest { Key = key, BucketName = dstBucket, FilePath = item.Path, FilePosition = index, PartNumber = part, PartSize = chunkSize, UploadId = response.UploadId, MD5Digest = System.Convert.ToBase64String(md5), }); md5s.Add(new PartETag(part, md5.ToHex())); index += partSize; } client.CompleteMultipartUpload(new CompleteMultipartUploadRequest { Key = key, BucketName = dstBucket, PartETags = md5s, UploadId = response.UploadId, }); } catch (Exception ex) { Log.Error(item.Relative, ex); client.AbortMultipartUpload(dstBucket, key, response.UploadId); } } File.SetAttributes(item.Path, File.GetAttributes(item.Path) & ~FileAttributes.Archive); } }
private static void CopyTo(string src, string dst) { var uri = new Uri(dst); var dstBucket = uri.Host; var dstPath = uri.PathAndQuery.Substring(1); Log.InfoFormat("Bucket: {0}, Path: {1}", dstBucket, dstPath); var md5File = Path.Combine(src, md5fileName); var md5Dictionary = LoadMd5File(md5File); var files = FetchFiles(src) .Where(x => x.Length > 0) .Where(x => !x.Attributes.HasFlag(FileAttributes.Hidden)) .Where(x => !x.Name.StartsWith(".")) .Where(x => x.FullName != md5File) .Where(x => (x.Attributes & FileAttributes.Archive) == FileAttributes.Archive) .Where(x => (x.Attributes & FileAttributes.Temporary) != FileAttributes.Temporary) .Select(x => Map(src, x)); using (var md5FileWriter = File.AppendText(md5File)) { files = files.Select(x => { Md5Item item; if (!md5Dictionary.TryGetValue(x.Path, out item) || x.Length != item.Length || x.ModifiedUtc != item.ModifiedUtc ) { x.Hash = Md5Hash.Calculate(x.Path).ToHex(); item = new Md5Item(x.Hash, x.Path, x.Length, x.ModifiedUtc); md5Dictionary[x.Path] = item; lock (md5FileWriter) { md5FileWriter.WriteLine(item); } } x.Hash = item.Md5; return x; }).ToList(); } WriteMd5Dictionary(md5Dictionary, md5File); var objects = FetchObjects(dstBucket, dstPath) .Select(x => Map(dstPath, x)) .ToDictionary(x => x.Relative, x => x); int chunkSize = 1024 * 1024 * 8; //8MB Log.DebugFormat("Chunk size: {0} ({1} bytes)", Pretty(chunkSize), chunkSize); /* Alleen files die: - nog niet bestaan of een andere lengte hebben of een andere hash md5 hebben * of een andere hash s3md5 hebben */ var items = files .Where(x => { var destinationNotExists = !objects.ContainsKey(x.Relative); if (destinationNotExists) return true; var obj = objects[x.Relative]; var differentSize = x.Length != obj.Length; if (differentSize) return true; var etag = obj.S3Object.ETag; var isMultiPart = etag.Contains("-"); if (isMultiPart) { var parts = ExtractParts(etag); var guestimateChunkSize = GuessChunkSize(obj.Length, parts); var s3md5mismatch = !obj.Hash.Contains(S3Md5.Calculate(x.Path, guestimateChunkSize)); return s3md5mismatch; } else { var md5mismatch = x.Hash != obj.Hash; return md5mismatch; } } ) .ToList(); Log.InfoFormat("Items to be mirrored: {0}", items.Count); using (IAmazonS3 client = CreateS3Client()) foreach (var item in items) { var key = dstPath + item.Relative; Log.DebugFormat("Uploading {0} => {1}", item.Relative, key); if (item.Length < chunkSize) { client.UploadObjectFromFilePath(dstBucket, key, item.Path, null); var isMatch = client.GetObject(dstBucket, key).ETag.Contains(item.Hash); if (!isMatch) Log.ErrorFormat("Upload failed: {0}", item.Relative); } else { Log.Debug("Multi-part"); var response = client.InitiateMultipartUpload(dstBucket, key); try { long index = 0; var md5s = new List<PartETag>(); for (int part = 1; index < item.Length; part++) { var md5 = Md5Hash.Calculate(item.Path, index, chunkSize); var partSize = Math.Min(chunkSize, item.Length - index); Log.DebugFormat("\tPart {0} ({1:N0}): {2}", part, partSize, md5.ToHex()); client.UploadPart(new UploadPartRequest { Key = key, BucketName = dstBucket, FilePath = item.Path, FilePosition = index, PartNumber = part, PartSize = chunkSize, UploadId = response.UploadId, MD5Digest = System.Convert.ToBase64String(md5), }); md5s.Add(new PartETag(part, md5.ToHex())); index += partSize; } client.CompleteMultipartUpload(new CompleteMultipartUploadRequest { Key = key, BucketName = dstBucket, PartETags = md5s, UploadId = response.UploadId, }); } catch (Exception ex) { Log.Error(item.Relative, ex); client.AbortMultipartUpload(dstBucket, key, response.UploadId); } } File.SetAttributes(item.Path, File.GetAttributes(item.Path) & ~FileAttributes.Archive); } }