Esempio n. 1
0
        private async Task <BoolResult> UploadWithDedupAsync(
            OperationContext context,
            AbsolutePath path,
            HashType hashType,
            DedupNode dedupNode)
        {
            // Puts are effectively implicitly pinned regardless of configuration.
            try
            {
                if (dedupNode.Type == DedupNode.NodeType.ChunkLeaf)
                {
                    await PutChunkAsync(context, dedupNode, path);
                }
                else
                {
                    await PutNodeAsync(context, dedupNode, path);
                }

                BackingContentStoreExpiryCache.Instance.AddExpiry(dedupNode.ToContentHash(hashType), EndDateTime);
                return(BoolResult.Success);
            }
            catch (Exception ex)
            {
                return(new BoolResult(ex));
            }
        }
 private Task <bool> VerifyContentAsync(byte [] bytes, DedupNode expectedNode)
 {
     return(ChunkDedupedFileContentHashVerifier.VerifyStreamAsync(
                new MemoryStream(bytes),
                expectedNode.GetChunks().ToList(),
                new ChunkDedupedFileContentHash(expectedNode.Hash.Take(DedupSingleChunkHashInfo.Length).ToArray()),
                CancellationToken.None));
 }
        public async Task <(DedupIdentifier dedupId, ulong length)> UploadAttachmentToBlobStore(bool verbose, string itemPath, Guid planId, Guid jobId, CancellationToken cancellationToken)
        {
            UploadedAttachmentBlobFiles.Add(itemPath);
            var chunk = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, cancellationToken, false);

            var rootNode = new DedupNode(new [] { chunk });
            var dedupId  = rootNode.GetDedupIdentifier(HashType.Dedup64K);

            return(dedupId, rootNode.TransitiveContentBytes);
        }
        protected void HashCanBeVerified(DedupNode node, byte[] bytes, string expectedHash, int seed)
        {
            List <ChunkInfo> chunks = node.GetChunks().ToList();

            using (var stream = new MemoryStream(bytes))
            {
                bool hashMatch = ChunkDedupedFileContentHashVerifier.VerifyStreamAsync(stream, chunks, new ChunkDedupedFileContentHash(expectedHash), CancellationToken.None)
                                 .ConfigureAwait(false).GetAwaiter().GetResult();

                Assert.True(hashMatch);
            }
        }
Esempio n. 5
0
        private Task PutChunkAsync(OperationContext context, DedupNode dedupNode, AbsolutePath path)
        {
            var dedupIdentifier = dedupNode.GetDedupId();

            return(TryGatedArtifactOperationAsync(
                       context,
                       dedupIdentifier.ValueString,
                       "PutChunkAndKeepUntilReferenceAsync",
                       innerCts => DedupStoreClient.Client.PutChunkAndKeepUntilReferenceAsync(
                           dedupIdentifier.CastToChunkDedupIdentifier(),
                           DedupCompressedBuffer.FromUncompressed(File.ReadAllBytes(path.Path)),
                           new KeepUntilBlobReference(EndDateTime),
                           innerCts)));
        }
Esempio n. 6
0
        private async Task PutNodeAsync(OperationContext context, DedupNode dedupNode, AbsolutePath path)
        {
            var dedupIdentifier = dedupNode.GetDedupId();

            await TryGatedArtifactOperationAsync <object>(
                context,
                dedupIdentifier.ValueString,
                "DedupUploadSession.UploadAsync",
                async innerCts =>
            {
                await _uploadSession.UploadAsync(dedupNode, new Dictionary <VstsDedupIdentifier, string> {
                    { dedupIdentifier, path.Path }
                }, innerCts);
                return(null);
            });
        }
Esempio n. 7
0
        private void LogNode(bool displayChildNodes, string indent, DedupNode root, AbsolutePath path, ref ulong offset)
        {
            _totalNodes++;
            bool newNode = _allNodes.Add(root.Hash);

            if (displayChildNodes)
            {
                var  hash        = new ContentHash(HashType.DedupNode, root.Hash);
                char newNodeChar = newNode ? '*' : 'd';
                _logger.Always($"{indent}{hash} {newNodeChar} {path}");
            }

            if (root.ChildNodes != null)
            {
                foreach (var child in root.ChildNodes)
                {
                    switch (child.Type)
                    {
                    case DedupNode.NodeType.ChunkLeaf:
                        _totalBytes += child.TransitiveContentBytes;
                        _totalChunks++;

                        bool newChunk = _allChunks.Add(child.Hash);
                        if (newChunk)
                        {
                            _uniqueBytes += child.TransitiveContentBytes;
                        }

                        if (_displayChunks)
                        {
                            char newChunkChar = newChunk ? '*' : 'd';
                            _logger.Always($"{indent} {offset} {child.Hash.ToHex()} {newChunkChar}");
                        }

                        offset += child.TransitiveContentBytes;
                        break;

                    case DedupNode.NodeType.InnerNode:
                        LogNode(_displayChildNodes, indent + " ", child, null, ref offset);
                        break;

                    default:
                        throw new NotImplementedException();
                    }
                }
            }
        }
        public static async Task <(DedupIdentifier dedupId, ulong length)> UploadToBlobStore(
            bool verbose,
            string itemPath,
            Func <TelemetryInformationLevel, Uri, string, BlobStoreTelemetryRecord> telemetryRecordFactory,
            Action <string> traceOutput,
            DedupStoreClient dedupClient,
            BlobStoreClientTelemetry clientTelemetry,
            CancellationToken cancellationToken)
        {
            // Create chunks and identifier
            var chunk = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, cancellationToken, false);

            var rootNode = new DedupNode(new [] { chunk });
            // ChunkHelper uses 64k block default size
            var dedupId = rootNode.GetDedupIdentifier(HashType.Dedup64K);

            // Setup upload session to keep file for at mimimum one day
            // Blobs will need to be associated with the server with an ID ref otherwise they will be
            // garbage collected after one day
            var tracer        = DedupManifestArtifactClientFactory.CreateArtifactsTracer(verbose, traceOutput);
            var keepUntilRef  = new KeepUntilBlobReference(DateTime.UtcNow.AddDays(1));
            var uploadSession = dedupClient.CreateUploadSession(keepUntilRef, tracer, FileSystem.Instance);

            // Upload the chunks
            var uploadRecord = clientTelemetry.CreateRecord <BlobStoreTelemetryRecord>(telemetryRecordFactory);
            await clientTelemetry.MeasureActionAsync(
                record : uploadRecord,
                actionAsync : async() => await AsyncHttpRetryHelper.InvokeAsync(
                    async() =>
            {
                await uploadSession.UploadAsync(rootNode, new Dictionary <DedupIdentifier, string>()
                {
                    [dedupId] = itemPath
                }, cancellationToken);
                return(uploadSession.UploadStatistics);
            },
                    maxRetries: 3,
                    tracer: tracer,
                    canRetryDelegate: e => true,     // this isn't great, but failing on upload stinks, so just try a couple of times
                    cancellationToken: cancellationToken,
                    continueOnCapturedContext: false)
                );

            return(dedupId, rootNode.TransitiveContentBytes);
        }
        private static DedupNode CreateNodeToUpload(IEnumerable <DedupNode> nodes)
        {
            while (nodes.Count() > 1)
            {
                nodes = nodes
                        .GetPages(DedupNode.MaxDirectChildrenPerNode)
                        .Select(children => new DedupNode(children))
                        .ToList();
            }

            DedupNode root = nodes.Single();

            if (root.Type == DedupNode.NodeType.ChunkLeaf)
            {
                root = new DedupNode(new[] { root });
            }

            return(root);
        }
Esempio n. 10
0
        private async Task <(DedupIdentifier dedupId, ulong length)> UploadToBlobStore(IAsyncCommandContext context, string itemPath, CancellationToken cancellationToken)
        {
            // Create chunks and identifier
            var chunk = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, cancellationToken, false);

            var rootNode = new DedupNode(new [] { chunk });
            var dedupId  = rootNode.GetDedupIdentifier(HashType.Dedup64K);

            // Setup upload session to keep file for at mimimum one day
            var verbose       = String.Equals(context.GetVariableValueOrDefault("system.debug"), "true", StringComparison.InvariantCultureIgnoreCase);
            var tracer        = DedupManifestArtifactClientFactory.CreateArtifactsTracer(verbose, (str) => context.Output(str));
            var keepUntulRef  = new KeepUntilBlobReference(DateTime.UtcNow.AddDays(1));
            var uploadSession = _dedupClient.CreateUploadSession(keepUntulRef, tracer, FileSystem.Instance);

            // Upload the chunks
            var uploadRecord = _blobTelemetry.CreateRecord <BuildArtifactActionRecord>((level, uri, type) =>
                                                                                       new BuildArtifactActionRecord(level, uri, type, nameof(UploadAsync), context));
            await _blobTelemetry.MeasureActionAsync(
                record : uploadRecord,
                actionAsync : async() => await AsyncHttpRetryHelper.InvokeAsync(
                    async() =>
            {
                return(await uploadSession.UploadAsync(rootNode, new Dictionary <DedupIdentifier, string>()
                {
                    [dedupId] = itemPath
                }, cancellationToken));
            },
                    maxRetries: 3,
                    tracer: tracer,
                    canRetryDelegate: e => true,     // this isn't great, but failing on upload stinks, so just try a couple of times
                    cancellationToken: cancellationToken,
                    continueOnCapturedContext: false)
                );

            return(dedupId, rootNode.TransitiveContentBytes);
        }
Esempio n. 11
0
        public void DedupHashFile
        (
            [Required] string[] path,
            [DefaultValue(false)] bool chunks,
            [DefaultValue(false)] bool childNodes,
            [DefaultValue(false)] bool rollingHash,
            [DefaultValue(FileSystemConstants.FileIOBufferSize)] int bufferSize,
            [DefaultValue((long)0)] long startOffset
        )
        {
            Initialize();

            _displayChunks     = chunks;
            _displayChildNodes = childNodes;

            var paths = new List <AbsolutePath>();

            foreach (AbsolutePath root in path.Select(p => new AbsolutePath(p)))
            {
                if (_fileSystem.DirectoryExists(root))
                {
                    paths.AddRange(_fileSystem.EnumerateFiles(root, EnumerateOptions.Recurse).Select(fileInfo => fileInfo.FullPath));
                }
                else if (_fileSystem.FileExists(root))
                {
                    paths.Add(root);
                }
                else
                {
                    throw new ArgumentException("given path is not an existing file or directory");
                }
            }

            var buffer = new byte[bufferSize];

            using (var hasher = new DedupNodeHashAlgorithm(rollingHash ? DedupNodeTree.Algorithm.RollingHash : DedupNodeTree.Algorithm.MaximallyPacked))
            {
                foreach (var p in paths)
                {
                    hasher.Initialize();
                    TaskSafetyHelpers.SyncResultOnThreadPool(async() =>
                    {
                        using (var fs = await _fileSystem.OpenReadOnlySafeAsync(p, FileShare.Read | FileShare.Delete))
                        {
                            fs.Position = startOffset;
                            int bytesRead;
                            while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
                            {
                                hasher.TransformBlock(buffer, 0, bytesRead, null, 0);
                            }
                            hasher.TransformFinalBlock(new byte[0], 0, 0);
                            DedupNode root = hasher.GetNode();
                            ulong offset   = 0;
                            LogNode(true, string.Empty, root, p, ref offset);
                        }

                        return(0);
                    });
                }
            }

            _logger.Always("Totals:");
            _logger.Always($"Bytes: Unique={_uniqueBytes:N0} Total={_totalBytes:N0}");
            _logger.Always($"Chunks: Unique={_allChunks.Count:N0} Total={_totalChunks:N0}");
            _logger.Always($"Nodes: Unique={_allNodes.Count:N0} Total={_totalNodes:N0}");
        }
        private void NodeTreeChecker(int chunkCount, int expectedNodeCount, uint expectedHeight, string expectedHash)
        {
            var r            = new Random(Seed: 0);
            var actualChunks = Enumerable
                               .Range(0, chunkCount)
                               .Select(i =>
            {
                unchecked
                {
                    byte[] hash = new byte[32];
                    r.NextBytes(hash);
                    hash[0] = (byte)i;
                    hash[1] = (byte)(i >> 8);
                    hash[2] = (byte)(i >> 16);
                    hash[3] = (byte)(i >> 24);

                    return(new ChunkInfo(0, 64 * 1024, hash));
                }
            })
                               .ToList();

            var node = DedupNodeTree.Create(actualChunks);

            Assert.Equal <string>(expectedHash, node.Hash.ToHex());
            Assert.NotNull(node.Height);
            Assert.Equal(expectedHeight, node.Height.Value);
            var nodes      = node.EnumerateInnerNodesDepthFirst().ToList();
            var nodeChunks = node.EnumerateChunkLeafsInOrder().ToList();

            var node2 = PackedDedupNodeTree.EnumerateTree(actualChunks).Last();

            Assert.Equal(node.Hash.ToHex(), node2.Hash.ToHex());

            foreach (var n in nodes)
            {
                var roundTrip = DedupNode.Deserialize(n.Serialize());
                Assert.Equal(n.Hash, roundTrip.Hash, ByteArrayComparer.Instance);
                Assert.Equal(n.ChildNodes.Count, roundTrip.ChildNodes.Count);
                Assert.True(
                    n.ChildNodes.Zip(roundTrip.ChildNodes, (e1, e2) =>
                {
                    if (e1.Type != e2.Type)
                    {
                        return(false);
                    }
                    else if (e1.TransitiveContentBytes != e2.TransitiveContentBytes)
                    {
                        return(false);
                    }
                    else if (!ByteArrayComparer.Instance.Equals(e1.Hash, e2.Hash))
                    {
                        return(false);
                    }

                    return(true);
                }).All(result => result));
            }

            Assert.Equal(
                actualChunks.Select(c => c.Hash.ToHex()),
                nodeChunks.Select(c => c.Hash.ToHex()));
            Assert.Equal(chunkCount, nodeChunks.Count);
            Assert.Equal(expectedNodeCount, nodes.Count);
        }
Esempio n. 13
0
        public void DedupHashFile
        (
            [Required] string[] path,
            [Required] string hashType,
            [DefaultValue(false)] bool chunks,
            [DefaultValue(false)] bool childNodes,
            [DefaultValue(FileSystemConstants.FileIOBufferSize)] int bufferSize,
            [DefaultValue((long)0)] long startOffset
        )
        {
            Initialize();

            _displayChunks     = chunks;
            _displayChildNodes = childNodes;

            if (!Enum.TryParse(hashType, out HashType dedupHashType))
            {
                throw new ArgumentException($"HashType couldn't be inferred - {hashType}. Valid HashType is required.");
            }

            var paths = new List <AbsolutePath>();

            foreach (AbsolutePath root in path.Select(p => new AbsolutePath(Path.GetFullPath(p))))
            {
                if (_fileSystem.DirectoryExists(root))
                {
                    paths.AddRange(_fileSystem.EnumerateFiles(root, EnumerateOptions.Recurse).Select(fileInfo => fileInfo.FullPath));
                }
                else if (_fileSystem.FileExists(root))
                {
                    paths.Add(root);
                }
                else
                {
                    throw new ArgumentException("given path is not an existing file or directory");
                }
            }

            var buffer = new byte[bufferSize];

            using (var contentHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(dedupHashType.GetChunkerConfiguration())))
            {
                foreach (var p in paths)
                {
                    contentHasher.Initialize();
                    TaskSafetyHelpers.SyncResultOnThreadPool(async() =>
                    {
                        using (Stream fs = _fileSystem.OpenReadOnly(p, FileShare.Read | FileShare.Delete))
                        {
                            fs.Position = startOffset;
                            int bytesRead;
                            while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0)
                            {
                                contentHasher.TransformBlock(buffer, 0, bytesRead, null, 0);
                            }
                            contentHasher.TransformFinalBlock(new byte[0], 0, 0);
                            DedupNode root = contentHasher.GetNode();
                            ulong offset   = 0;
                            LogNode(true, string.Empty, root, p, ref offset);
                        }

                        return(0);
                    });
                }
            }

            _logger.Always("Totals:");
            _logger.Always($"Bytes: Unique={_uniqueBytes:N0} Total={_totalBytes:N0}");
            _logger.Always($"Chunks: Unique={_allChunks.Count:N0} Total={_totalChunks:N0}");
            _logger.Always($"Nodes: Unique={_allNodes.Count:N0} Total={_totalNodes:N0}");
        }