private async Task <BoolResult> UploadWithDedupAsync( OperationContext context, AbsolutePath path, HashType hashType, DedupNode dedupNode) { // Puts are effectively implicitly pinned regardless of configuration. try { if (dedupNode.Type == DedupNode.NodeType.ChunkLeaf) { await PutChunkAsync(context, dedupNode, path); } else { await PutNodeAsync(context, dedupNode, path); } BackingContentStoreExpiryCache.Instance.AddExpiry(dedupNode.ToContentHash(hashType), EndDateTime); return(BoolResult.Success); } catch (Exception ex) { return(new BoolResult(ex)); } }
private Task <bool> VerifyContentAsync(byte [] bytes, DedupNode expectedNode) { return(ChunkDedupedFileContentHashVerifier.VerifyStreamAsync( new MemoryStream(bytes), expectedNode.GetChunks().ToList(), new ChunkDedupedFileContentHash(expectedNode.Hash.Take(DedupSingleChunkHashInfo.Length).ToArray()), CancellationToken.None)); }
public async Task <(DedupIdentifier dedupId, ulong length)> UploadAttachmentToBlobStore(bool verbose, string itemPath, Guid planId, Guid jobId, CancellationToken cancellationToken) { UploadedAttachmentBlobFiles.Add(itemPath); var chunk = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, cancellationToken, false); var rootNode = new DedupNode(new [] { chunk }); var dedupId = rootNode.GetDedupIdentifier(HashType.Dedup64K); return(dedupId, rootNode.TransitiveContentBytes); }
protected void HashCanBeVerified(DedupNode node, byte[] bytes, string expectedHash, int seed) { List <ChunkInfo> chunks = node.GetChunks().ToList(); using (var stream = new MemoryStream(bytes)) { bool hashMatch = ChunkDedupedFileContentHashVerifier.VerifyStreamAsync(stream, chunks, new ChunkDedupedFileContentHash(expectedHash), CancellationToken.None) .ConfigureAwait(false).GetAwaiter().GetResult(); Assert.True(hashMatch); } }
private Task PutChunkAsync(OperationContext context, DedupNode dedupNode, AbsolutePath path) { var dedupIdentifier = dedupNode.GetDedupId(); return(TryGatedArtifactOperationAsync( context, dedupIdentifier.ValueString, "PutChunkAndKeepUntilReferenceAsync", innerCts => DedupStoreClient.Client.PutChunkAndKeepUntilReferenceAsync( dedupIdentifier.CastToChunkDedupIdentifier(), DedupCompressedBuffer.FromUncompressed(File.ReadAllBytes(path.Path)), new KeepUntilBlobReference(EndDateTime), innerCts))); }
private async Task PutNodeAsync(OperationContext context, DedupNode dedupNode, AbsolutePath path) { var dedupIdentifier = dedupNode.GetDedupId(); await TryGatedArtifactOperationAsync <object>( context, dedupIdentifier.ValueString, "DedupUploadSession.UploadAsync", async innerCts => { await _uploadSession.UploadAsync(dedupNode, new Dictionary <VstsDedupIdentifier, string> { { dedupIdentifier, path.Path } }, innerCts); return(null); }); }
private void LogNode(bool displayChildNodes, string indent, DedupNode root, AbsolutePath path, ref ulong offset) { _totalNodes++; bool newNode = _allNodes.Add(root.Hash); if (displayChildNodes) { var hash = new ContentHash(HashType.DedupNode, root.Hash); char newNodeChar = newNode ? '*' : 'd'; _logger.Always($"{indent}{hash} {newNodeChar} {path}"); } if (root.ChildNodes != null) { foreach (var child in root.ChildNodes) { switch (child.Type) { case DedupNode.NodeType.ChunkLeaf: _totalBytes += child.TransitiveContentBytes; _totalChunks++; bool newChunk = _allChunks.Add(child.Hash); if (newChunk) { _uniqueBytes += child.TransitiveContentBytes; } if (_displayChunks) { char newChunkChar = newChunk ? '*' : 'd'; _logger.Always($"{indent} {offset} {child.Hash.ToHex()} {newChunkChar}"); } offset += child.TransitiveContentBytes; break; case DedupNode.NodeType.InnerNode: LogNode(_displayChildNodes, indent + " ", child, null, ref offset); break; default: throw new NotImplementedException(); } } } }
public static async Task <(DedupIdentifier dedupId, ulong length)> UploadToBlobStore( bool verbose, string itemPath, Func <TelemetryInformationLevel, Uri, string, BlobStoreTelemetryRecord> telemetryRecordFactory, Action <string> traceOutput, DedupStoreClient dedupClient, BlobStoreClientTelemetry clientTelemetry, CancellationToken cancellationToken) { // Create chunks and identifier var chunk = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, cancellationToken, false); var rootNode = new DedupNode(new [] { chunk }); // ChunkHelper uses 64k block default size var dedupId = rootNode.GetDedupIdentifier(HashType.Dedup64K); // Setup upload session to keep file for at mimimum one day // Blobs will need to be associated with the server with an ID ref otherwise they will be // garbage collected after one day var tracer = DedupManifestArtifactClientFactory.CreateArtifactsTracer(verbose, traceOutput); var keepUntilRef = new KeepUntilBlobReference(DateTime.UtcNow.AddDays(1)); var uploadSession = dedupClient.CreateUploadSession(keepUntilRef, tracer, FileSystem.Instance); // Upload the chunks var uploadRecord = clientTelemetry.CreateRecord <BlobStoreTelemetryRecord>(telemetryRecordFactory); await clientTelemetry.MeasureActionAsync( record : uploadRecord, actionAsync : async() => await AsyncHttpRetryHelper.InvokeAsync( async() => { await uploadSession.UploadAsync(rootNode, new Dictionary <DedupIdentifier, string>() { [dedupId] = itemPath }, cancellationToken); return(uploadSession.UploadStatistics); }, maxRetries: 3, tracer: tracer, canRetryDelegate: e => true, // this isn't great, but failing on upload stinks, so just try a couple of times cancellationToken: cancellationToken, continueOnCapturedContext: false) ); return(dedupId, rootNode.TransitiveContentBytes); }
private static DedupNode CreateNodeToUpload(IEnumerable <DedupNode> nodes) { while (nodes.Count() > 1) { nodes = nodes .GetPages(DedupNode.MaxDirectChildrenPerNode) .Select(children => new DedupNode(children)) .ToList(); } DedupNode root = nodes.Single(); if (root.Type == DedupNode.NodeType.ChunkLeaf) { root = new DedupNode(new[] { root }); } return(root); }
private async Task <(DedupIdentifier dedupId, ulong length)> UploadToBlobStore(IAsyncCommandContext context, string itemPath, CancellationToken cancellationToken) { // Create chunks and identifier var chunk = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, cancellationToken, false); var rootNode = new DedupNode(new [] { chunk }); var dedupId = rootNode.GetDedupIdentifier(HashType.Dedup64K); // Setup upload session to keep file for at mimimum one day var verbose = String.Equals(context.GetVariableValueOrDefault("system.debug"), "true", StringComparison.InvariantCultureIgnoreCase); var tracer = DedupManifestArtifactClientFactory.CreateArtifactsTracer(verbose, (str) => context.Output(str)); var keepUntulRef = new KeepUntilBlobReference(DateTime.UtcNow.AddDays(1)); var uploadSession = _dedupClient.CreateUploadSession(keepUntulRef, tracer, FileSystem.Instance); // Upload the chunks var uploadRecord = _blobTelemetry.CreateRecord <BuildArtifactActionRecord>((level, uri, type) => new BuildArtifactActionRecord(level, uri, type, nameof(UploadAsync), context)); await _blobTelemetry.MeasureActionAsync( record : uploadRecord, actionAsync : async() => await AsyncHttpRetryHelper.InvokeAsync( async() => { return(await uploadSession.UploadAsync(rootNode, new Dictionary <DedupIdentifier, string>() { [dedupId] = itemPath }, cancellationToken)); }, maxRetries: 3, tracer: tracer, canRetryDelegate: e => true, // this isn't great, but failing on upload stinks, so just try a couple of times cancellationToken: cancellationToken, continueOnCapturedContext: false) ); return(dedupId, rootNode.TransitiveContentBytes); }
public void DedupHashFile ( [Required] string[] path, [DefaultValue(false)] bool chunks, [DefaultValue(false)] bool childNodes, [DefaultValue(false)] bool rollingHash, [DefaultValue(FileSystemConstants.FileIOBufferSize)] int bufferSize, [DefaultValue((long)0)] long startOffset ) { Initialize(); _displayChunks = chunks; _displayChildNodes = childNodes; var paths = new List <AbsolutePath>(); foreach (AbsolutePath root in path.Select(p => new AbsolutePath(p))) { if (_fileSystem.DirectoryExists(root)) { paths.AddRange(_fileSystem.EnumerateFiles(root, EnumerateOptions.Recurse).Select(fileInfo => fileInfo.FullPath)); } else if (_fileSystem.FileExists(root)) { paths.Add(root); } else { throw new ArgumentException("given path is not an existing file or directory"); } } var buffer = new byte[bufferSize]; using (var hasher = new DedupNodeHashAlgorithm(rollingHash ? DedupNodeTree.Algorithm.RollingHash : DedupNodeTree.Algorithm.MaximallyPacked)) { foreach (var p in paths) { hasher.Initialize(); TaskSafetyHelpers.SyncResultOnThreadPool(async() => { using (var fs = await _fileSystem.OpenReadOnlySafeAsync(p, FileShare.Read | FileShare.Delete)) { fs.Position = startOffset; int bytesRead; while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0) { hasher.TransformBlock(buffer, 0, bytesRead, null, 0); } hasher.TransformFinalBlock(new byte[0], 0, 0); DedupNode root = hasher.GetNode(); ulong offset = 0; LogNode(true, string.Empty, root, p, ref offset); } return(0); }); } } _logger.Always("Totals:"); _logger.Always($"Bytes: Unique={_uniqueBytes:N0} Total={_totalBytes:N0}"); _logger.Always($"Chunks: Unique={_allChunks.Count:N0} Total={_totalChunks:N0}"); _logger.Always($"Nodes: Unique={_allNodes.Count:N0} Total={_totalNodes:N0}"); }
private void NodeTreeChecker(int chunkCount, int expectedNodeCount, uint expectedHeight, string expectedHash) { var r = new Random(Seed: 0); var actualChunks = Enumerable .Range(0, chunkCount) .Select(i => { unchecked { byte[] hash = new byte[32]; r.NextBytes(hash); hash[0] = (byte)i; hash[1] = (byte)(i >> 8); hash[2] = (byte)(i >> 16); hash[3] = (byte)(i >> 24); return(new ChunkInfo(0, 64 * 1024, hash)); } }) .ToList(); var node = DedupNodeTree.Create(actualChunks); Assert.Equal <string>(expectedHash, node.Hash.ToHex()); Assert.NotNull(node.Height); Assert.Equal(expectedHeight, node.Height.Value); var nodes = node.EnumerateInnerNodesDepthFirst().ToList(); var nodeChunks = node.EnumerateChunkLeafsInOrder().ToList(); var node2 = PackedDedupNodeTree.EnumerateTree(actualChunks).Last(); Assert.Equal(node.Hash.ToHex(), node2.Hash.ToHex()); foreach (var n in nodes) { var roundTrip = DedupNode.Deserialize(n.Serialize()); Assert.Equal(n.Hash, roundTrip.Hash, ByteArrayComparer.Instance); Assert.Equal(n.ChildNodes.Count, roundTrip.ChildNodes.Count); Assert.True( n.ChildNodes.Zip(roundTrip.ChildNodes, (e1, e2) => { if (e1.Type != e2.Type) { return(false); } else if (e1.TransitiveContentBytes != e2.TransitiveContentBytes) { return(false); } else if (!ByteArrayComparer.Instance.Equals(e1.Hash, e2.Hash)) { return(false); } return(true); }).All(result => result)); } Assert.Equal( actualChunks.Select(c => c.Hash.ToHex()), nodeChunks.Select(c => c.Hash.ToHex())); Assert.Equal(chunkCount, nodeChunks.Count); Assert.Equal(expectedNodeCount, nodes.Count); }
public void DedupHashFile ( [Required] string[] path, [Required] string hashType, [DefaultValue(false)] bool chunks, [DefaultValue(false)] bool childNodes, [DefaultValue(FileSystemConstants.FileIOBufferSize)] int bufferSize, [DefaultValue((long)0)] long startOffset ) { Initialize(); _displayChunks = chunks; _displayChildNodes = childNodes; if (!Enum.TryParse(hashType, out HashType dedupHashType)) { throw new ArgumentException($"HashType couldn't be inferred - {hashType}. Valid HashType is required."); } var paths = new List <AbsolutePath>(); foreach (AbsolutePath root in path.Select(p => new AbsolutePath(Path.GetFullPath(p)))) { if (_fileSystem.DirectoryExists(root)) { paths.AddRange(_fileSystem.EnumerateFiles(root, EnumerateOptions.Recurse).Select(fileInfo => fileInfo.FullPath)); } else if (_fileSystem.FileExists(root)) { paths.Add(root); } else { throw new ArgumentException("given path is not an existing file or directory"); } } var buffer = new byte[bufferSize]; using (var contentHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(dedupHashType.GetChunkerConfiguration()))) { foreach (var p in paths) { contentHasher.Initialize(); TaskSafetyHelpers.SyncResultOnThreadPool(async() => { using (Stream fs = _fileSystem.OpenReadOnly(p, FileShare.Read | FileShare.Delete)) { fs.Position = startOffset; int bytesRead; while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0) { contentHasher.TransformBlock(buffer, 0, bytesRead, null, 0); } contentHasher.TransformFinalBlock(new byte[0], 0, 0); DedupNode root = contentHasher.GetNode(); ulong offset = 0; LogNode(true, string.Empty, root, p, ref offset); } return(0); }); } } _logger.Always("Totals:"); _logger.Always($"Bytes: Unique={_uniqueBytes:N0} Total={_totalBytes:N0}"); _logger.Always($"Chunks: Unique={_allChunks.Count:N0} Total={_totalChunks:N0}"); _logger.Always($"Nodes: Unique={_allNodes.Count:N0} Total={_totalNodes:N0}"); }