static IEnumerable <Tuple <S3Object, string, string> > FilterEqualMD5( IEnumerable <Tuple <S3Object, string, string> > objects, IDictionary <string, Md5Item> hashes ) { foreach (var o in objects) { Md5Item item; var hashExists = hashes.TryGetValue(o.Item2, out item); if (hashExists) { if (o.Item1.Size != item.Length) { yield return(o); continue; } var isMultipart = o.Item1.ETag.Length > 34;// md5 hex length = 32 + quotes if (isMultipart) { var etag = o.Item1.ETag; etag = etag.Substring(1, etag.Length - 2); etag = etag.Substring(33); var chunks = int.Parse(etag); var chunkSize = GuessChunkSize(o.Item1.Size, chunks); Log.InfoFormat("Calculating s3md5 checksum with chunk size {0} : {1}", Pretty(chunkSize), o.Item3); var s3md5hash = S3Md5.Calculate(o.Item2, chunkSize); if (o.Item1.ETag.Contains(s3md5hash)) { Log.DebugFormat("Multipart object '{0}' is equal.", o.Item3); continue; } else { Log.WarnFormat("Multipart object '{0}' is NOT equal.", o.Item3); if (!ignoreMultipartMd5) { yield return(o); } continue; } } var sameHash = o.Item1.ETag.Replace("\"", "") == item.Md5; if (sameHash) { continue; // download } Log.ErrorFormat("Hashes unequal for '{0}' (src:{1}, dst:{2})", o.Item3, o.Item1.ETag, item.Md5); } if (File.Exists(o.Item2)) { var fi = new FileInfo(o.Item2); if (o.Item1.Size != fi.Length) { Log.ErrorFormat("File size unequal for '{0}' (src:{1}, dst:{2}, diff: {3})", o.Item3, o.Item1.Size, fi.Length, o.Item1.Size - fi.Length); } } else { Log.DebugFormat("Destination does not exists: {0}", o.Item2); } yield return(o); } }
private static void CopyTo(string src, string dst) { var uri = new Uri(dst); var dstBucket = uri.Host; var dstPath = uri.PathAndQuery.Substring(1); Log.InfoFormat("Bucket: {0}, Path: {1}", dstBucket, dstPath); var md5File = Path.Combine(src, md5fileName); var md5Dictionary = LoadMd5File(md5File); var files = FetchFiles(src) .Where(x => x.Length > 0) .Where(x => !x.Attributes.HasFlag(FileAttributes.Hidden)) .Where(x => !x.Name.StartsWith(".")) .Where(x => x.FullName != md5File) .Where(x => (x.Attributes & FileAttributes.Archive) == FileAttributes.Archive) .Where(x => (x.Attributes & FileAttributes.Temporary) != FileAttributes.Temporary) .Select(x => Map(src, x)); using (var md5FileWriter = File.AppendText(md5File)) { files = files.Select(x => { Md5Item item; if (!md5Dictionary.TryGetValue(x.Path, out item) || x.Length != item.Length || x.ModifiedUtc != item.ModifiedUtc ) { x.Hash = Md5Hash.Calculate(x.Path).ToHex(); item = new Md5Item(x.Hash, x.Path, x.Length, x.ModifiedUtc); md5Dictionary[x.Path] = item; lock (md5FileWriter) { md5FileWriter.WriteLine(item); } } x.Hash = item.Md5; return(x); }).ToList(); } WriteMd5Dictionary(md5Dictionary, md5File); var objects = FetchObjects(dstBucket, dstPath) .Select(x => Map(dstPath, x)) .ToDictionary(x => x.Relative, x => x); int chunkSize = 1024 * 1024 * 8; //8MB Log.DebugFormat("Chunk size: {0} ({1} bytes)", Pretty(chunkSize), chunkSize); /* * Alleen files die: * - nog niet bestaan * of een andere lengte hebben * of een andere hash md5 hebben * of een andere hash s3md5 hebben */ var items = files .Where(x => { var destinationNotExists = !objects.ContainsKey(x.Relative); if (destinationNotExists) { return(true); } var obj = objects[x.Relative]; var differentSize = x.Length != obj.Length; if (differentSize) { return(true); } var etag = obj.S3Object.ETag; var isMultiPart = etag.Contains("-"); if (isMultiPart) { var parts = ExtractParts(etag); var guestimateChunkSize = GuessChunkSize(obj.Length, parts); var s3md5mismatch = !obj.Hash.Contains(S3Md5.Calculate(x.Path, guestimateChunkSize)); return(s3md5mismatch); } else { var md5mismatch = x.Hash != obj.Hash; return(md5mismatch); } } ) .ToList(); Log.InfoFormat("Items to be mirrored: {0}", items.Count); using (IAmazonS3 client = CreateS3Client()) foreach (var item in items) { var key = dstPath + item.Relative; Log.DebugFormat("Uploading {0} => {1}", item.Relative, key); if (item.Length < chunkSize) { client.UploadObjectFromFilePath(dstBucket, key, item.Path, null); var isMatch = client.GetObject(dstBucket, key).ETag.Contains(item.Hash); if (!isMatch) { Log.ErrorFormat("Upload failed: {0}", item.Relative); } } else { Log.Debug("Multi-part"); var response = client.InitiateMultipartUpload(dstBucket, key); try { long index = 0; var md5s = new List <PartETag>(); for (int part = 1; index < item.Length; part++) { var md5 = Md5Hash.Calculate(item.Path, index, chunkSize); var partSize = Math.Min(chunkSize, item.Length - index); Log.DebugFormat("\tPart {0} ({1:N0}): {2}", part, partSize, md5.ToHex()); client.UploadPart(new UploadPartRequest { Key = key, BucketName = dstBucket, FilePath = item.Path, FilePosition = index, PartNumber = part, PartSize = chunkSize, UploadId = response.UploadId, MD5Digest = System.Convert.ToBase64String(md5), }); md5s.Add(new PartETag(part, md5.ToHex())); index += partSize; } client.CompleteMultipartUpload(new CompleteMultipartUploadRequest { Key = key, BucketName = dstBucket, PartETags = md5s, UploadId = response.UploadId, }); } catch (Exception ex) { Log.Error(item.Relative, ex); client.AbortMultipartUpload(dstBucket, key, response.UploadId); } } File.SetAttributes(item.Path, File.GetAttributes(item.Path) & ~FileAttributes.Archive); } }