Пример #1
0
        static IEnumerable <Tuple <S3Object, string, string> > FilterEqualMD5(
            IEnumerable <Tuple <S3Object, string, string> > objects,
            IDictionary <string, Md5Item> hashes
            )
        {
            foreach (var o in objects)
            {
                Md5Item item;

                var hashExists = hashes.TryGetValue(o.Item2, out item);
                if (hashExists)
                {
                    if (o.Item1.Size != item.Length)
                    {
                        yield return(o);

                        continue;
                    }
                    var isMultipart = o.Item1.ETag.Length > 34;// md5 hex length = 32 + quotes

                    if (isMultipart)
                    {
                        var etag = o.Item1.ETag;

                        etag = etag.Substring(1, etag.Length - 2);
                        etag = etag.Substring(33);
                        var chunks = int.Parse(etag);

                        var chunkSize = GuessChunkSize(o.Item1.Size, chunks);

                        Log.InfoFormat("Calculating s3md5 checksum with chunk size {0} : {1}", Pretty(chunkSize), o.Item3);

                        var s3md5hash = S3Md5.Calculate(o.Item2, chunkSize);

                        if (o.Item1.ETag.Contains(s3md5hash))
                        {
                            Log.DebugFormat("Multipart object '{0}' is equal.", o.Item3);
                            continue;
                        }
                        else
                        {
                            Log.WarnFormat("Multipart object '{0}' is NOT equal.", o.Item3);

                            if (!ignoreMultipartMd5)
                            {
                                yield return(o);
                            }
                            continue;
                        }
                    }

                    var sameHash = o.Item1.ETag.Replace("\"", "") == item.Md5;
                    if (sameHash)
                    {
                        continue;
                        // download
                    }

                    Log.ErrorFormat("Hashes unequal for '{0}' (src:{1}, dst:{2})", o.Item3, o.Item1.ETag, item.Md5);
                }

                if (File.Exists(o.Item2))
                {
                    var fi = new FileInfo(o.Item2);
                    if (o.Item1.Size != fi.Length)
                    {
                        Log.ErrorFormat("File size unequal for '{0}' (src:{1}, dst:{2}, diff: {3})", o.Item3, o.Item1.Size,
                                        fi.Length, o.Item1.Size - fi.Length);
                    }
                }
                else
                {
                    Log.DebugFormat("Destination does not exists: {0}", o.Item2);
                }

                yield return(o);
            }
        }
Пример #2
0
        private static void CopyTo(string src, string dst)
        {
            var uri       = new Uri(dst);
            var dstBucket = uri.Host;
            var dstPath   = uri.PathAndQuery.Substring(1);

            Log.InfoFormat("Bucket: {0}, Path: {1}", dstBucket, dstPath);

            var md5File       = Path.Combine(src, md5fileName);
            var md5Dictionary = LoadMd5File(md5File);

            var files = FetchFiles(src)
                        .Where(x => x.Length > 0)
                        .Where(x => !x.Attributes.HasFlag(FileAttributes.Hidden))
                        .Where(x => !x.Name.StartsWith("."))
                        .Where(x => x.FullName != md5File)
                        .Where(x => (x.Attributes & FileAttributes.Archive) == FileAttributes.Archive)
                        .Where(x => (x.Attributes & FileAttributes.Temporary) != FileAttributes.Temporary)
                        .Select(x => Map(src, x));

            using (var md5FileWriter = File.AppendText(md5File))
            {
                files = files.Select(x =>
                {
                    Md5Item item;
                    if (!md5Dictionary.TryGetValue(x.Path, out item) ||
                        x.Length != item.Length ||
                        x.ModifiedUtc != item.ModifiedUtc
                        )
                    {
                        x.Hash = Md5Hash.Calculate(x.Path).ToHex();
                        item   = new Md5Item(x.Hash, x.Path, x.Length, x.ModifiedUtc);
                        md5Dictionary[x.Path] = item;
                        lock (md5FileWriter)
                        {
                            md5FileWriter.WriteLine(item);
                        }
                    }
                    x.Hash = item.Md5;
                    return(x);
                }).ToList();
            }

            WriteMd5Dictionary(md5Dictionary, md5File);

            var objects = FetchObjects(dstBucket, dstPath)
                          .Select(x => Map(dstPath, x))
                          .ToDictionary(x => x.Relative, x => x);

            int chunkSize = 1024 * 1024 * 8; //8MB

            Log.DebugFormat("Chunk size: {0} ({1} bytes)", Pretty(chunkSize), chunkSize);

            /*
             *          Alleen files die:
             *          - nog niet bestaan
             *           of een andere lengte hebben
             *           of een andere hash md5 hebben
             *  of een andere hash s3md5 hebben
             */
            var items = files
                        .Where(x =>
            {
                var destinationNotExists = !objects.ContainsKey(x.Relative);

                if (destinationNotExists)
                {
                    return(true);
                }

                var obj = objects[x.Relative];

                var differentSize = x.Length != obj.Length;

                if (differentSize)
                {
                    return(true);
                }

                var etag        = obj.S3Object.ETag;
                var isMultiPart = etag.Contains("-");

                if (isMultiPart)
                {
                    var parts = ExtractParts(etag);
                    var guestimateChunkSize = GuessChunkSize(obj.Length, parts);
                    var s3md5mismatch       = !obj.Hash.Contains(S3Md5.Calculate(x.Path, guestimateChunkSize));
                    return(s3md5mismatch);
                }
                else
                {
                    var md5mismatch = x.Hash != obj.Hash;
                    return(md5mismatch);
                }
            }
                               )
                        .ToList();

            Log.InfoFormat("Items to be mirrored: {0}", items.Count);


            using (IAmazonS3 client = CreateS3Client())
                foreach (var item in items)
                {
                    var key = dstPath + item.Relative;

                    Log.DebugFormat("Uploading {0} => {1}", item.Relative, key);

                    if (item.Length < chunkSize)
                    {
                        client.UploadObjectFromFilePath(dstBucket, key, item.Path, null);
                        var isMatch = client.GetObject(dstBucket, key).ETag.Contains(item.Hash);

                        if (!isMatch)
                        {
                            Log.ErrorFormat("Upload failed: {0}", item.Relative);
                        }
                    }
                    else
                    {
                        Log.Debug("Multi-part");
                        var response = client.InitiateMultipartUpload(dstBucket, key);
                        try
                        {
                            long index = 0;

                            var md5s = new List <PartETag>();

                            for (int part = 1; index < item.Length; part++)
                            {
                                var md5      = Md5Hash.Calculate(item.Path, index, chunkSize);
                                var partSize = Math.Min(chunkSize, item.Length - index);

                                Log.DebugFormat("\tPart {0} ({1:N0}): {2}", part, partSize, md5.ToHex());

                                client.UploadPart(new UploadPartRequest
                                {
                                    Key          = key,
                                    BucketName   = dstBucket,
                                    FilePath     = item.Path,
                                    FilePosition = index,
                                    PartNumber   = part,
                                    PartSize     = chunkSize,
                                    UploadId     = response.UploadId,
                                    MD5Digest    = System.Convert.ToBase64String(md5),
                                });

                                md5s.Add(new PartETag(part, md5.ToHex()));

                                index += partSize;
                            }

                            client.CompleteMultipartUpload(new CompleteMultipartUploadRequest
                            {
                                Key        = key,
                                BucketName = dstBucket,
                                PartETags  = md5s,
                                UploadId   = response.UploadId,
                            });
                        }
                        catch (Exception ex)
                        {
                            Log.Error(item.Relative, ex);
                            client.AbortMultipartUpload(dstBucket, key, response.UploadId);
                        }
                    }

                    File.SetAttributes(item.Path, File.GetAttributes(item.Path) & ~FileAttributes.Archive);
                }
        }