/// <summary> /// Create a node out of the list of chunks. /// </summary> protected internal virtual DedupNode CreateNode() { if (SingleChunkHotPath) { Contract.Check(_chunks.Count == 0)?.Assert($"Chunk count: {_chunks.Count} sizehint: {_sizeHint} chunker min chunk size: {_chunker.Configuration.MinChunkSize}"); Contract.Check(_bytesChunked == _sizeHint)?.Assert($"_bytesChunked != _sizeHint. _bytesChunked={_bytesChunked} _sizeHint={_sizeHint}"); Contract.Assert(_session == null, "Dedup session cannot be null."); byte[] chunkHash = _chunkHasher.HashFinalInternal(); return(new DedupNode(DedupNode.NodeType.ChunkLeaf, (ulong)_sizeHint, chunkHash, 0)); } else { _session?.Dispose(); _session = null; if (_chunks.Count == 0) { return(new DedupNode(new ChunkInfo(0, 0, DedupSingleChunkHashInfo.Instance.EmptyHash.ToHashByteArray()))); } else if (_chunks.Count == 1) { // Content is small enough to track as a chunk. var node = new DedupNode(_chunks.Single()); Contract.Check(node.Type == DedupNode.NodeType.ChunkLeaf)?.Assert($"{nameof(CreateNode)}: expected chunk leaf: {DedupNode.NodeType.ChunkLeaf} got {node.Type} instead."); return(node); } else { return(DedupNodeTree.Create(_chunks)); } } }
/// <summary> /// Create a node out of the list of chunks. /// </summary> protected internal virtual DedupNode CreateNode() { if (SingleChunkHotPath) { Contract.Assert(_chunks.Count == 0); Contract.Check(_bytesChunked == _sizeHint)?.Assert($"_bytesChunked != _sizeHint. _bytesChunked={_bytesChunked} _sizeHint={_sizeHint}"); Contract.Assert(_session == null); byte[] chunkHash = _chunkHasher.HashFinalInternal(); return(new DedupNode(DedupNode.NodeType.ChunkLeaf, (ulong)_sizeHint, chunkHash, 0)); } else { _session?.Dispose(); _session = null; if (_chunks.Count == 0) { return(new DedupNode(new ChunkInfo(0, 0, DedupChunkHashInfo.Instance.EmptyHash.ToHashByteArray()))); } else if (_chunks.Count == 1) { // Content is small enough to track as a chunk. var node = new DedupNode(_chunks.Single()); Contract.Assert(node.Type == DedupNode.NodeType.ChunkLeaf); return(node); } else { return(DedupNodeTree.Create(_chunks, _treeAlgorithm)); } } }
/// <nodoc /> public static NodeDedupIdentifier GetNodeIdentifier(this DedupNode node, HashType hashType) { if (node.Type != DedupNode.NodeType.InnerNode) { throw new ArgumentException($"The given hash does not represent a {nameof(NodeDedupIdentifier)}"); } return(new NodeDedupIdentifier(node.Hash, (NodeAlgorithmId)AlgorithmIdLookup.Find(hashType))); }
/// <nodoc /> public static NodeDedupIdentifier GetNodeIdentifier(this DedupNode node) { if (node.Type != DedupNode.NodeType.InnerNode) { throw new ArgumentException($"The given hash does not represent a {nameof(NodeDedupIdentifier)}"); } return(new NodeDedupIdentifier(node.Hash)); }
/// <nodoc /> public static ChunkDedupIdentifier GetChunkIdentifier(this DedupNode node) { if (node.Type != DedupNode.NodeType.ChunkLeaf) { throw new ArgumentException($"The given hash does not represent a {nameof(ChunkDedupIdentifier)}"); } return(new ChunkDedupIdentifier(node.Hash)); }
public static DedupIdentifier Create(DedupNode node) { Contract.Requires(node != null); return(Create( node.Hash, (node.Type == DedupNode.NodeType.ChunkLeaf) ? ChunkDedupIdentifier.ChunkAlgorithmId : (byte)NodeAlgorithmId.Node64K)); // TODO: We need to fix this. }
/// <nodoc /> public static NodeDedupIdentifier GetNodeIdentifier(this DedupNode node) { if (node.Type != DedupNode.NodeType.InnerNode) { throw new ArgumentException($"The given hash does not represent a {nameof(NodeDedupIdentifier)}"); } // TODO: Chunk size optimization - the hash-algo mapper will take care of this. // for now use default. return(new NodeDedupIdentifier(node.Hash, (byte)NodeAlgorithmId.Node64K)); }
/// <inheritdoc /> protected internal override DedupNode CreateNode() { var node = base.CreateNode(); if (node.Type == DedupNode.NodeType.ChunkLeaf) { node = new DedupNode(new[] { node }); } return(node); }
/// <nodoc /> public static DedupIdentifier GetDedupIdentifier(this DedupNode node, HashType hashType) { if (node.Type == DedupNode.NodeType.InnerNode) { return(node.GetNodeIdentifier(hashType)); } else { return(node.GetChunkIdentifier()); } }
public static void AssertFilled(this DedupNode node) { if (node.Type != DedupNode.NodeType.InnerNode) { throw new ArgumentException($"Expected a filled {nameof(DedupNode.NodeType.InnerNode)}, but this is a {node.Type}: {node.HashString}"); } if (node.ChildNodes == null || node.ChildNodes.Count == 0) { throw new ArgumentException($"Expected a filled {nameof(DedupNode.NodeType.InnerNode)}, but ChildNodes is empty for: {node.HashString}"); } }
/// <nodoc /> public static ContentHash ToContentHash(this DedupNode node) { var nodeDedupIdentifier = node.GetDedupIdentifier(); switch (nodeDedupIdentifier.AlgorithmId) { case (byte)NodeAlgorithmId.Node64K: return(new ContentHash(HashType.DedupNodeOrChunk, nodeDedupIdentifier.ToBlobIdentifier().Bytes)); case (byte)NodeAlgorithmId.Node1024K: return(new ContentHash(HashType.Dedup1024K, nodeDedupIdentifier.ToBlobIdentifier().Bytes)); default: throw new InvalidEnumArgumentException($"Unknown algorithm id detected for blob {nodeDedupIdentifier.ToBlobIdentifier()} : {nodeDedupIdentifier.AlgorithmId}"); } }
internal static async Task <bool> VerifyStreamAsync(Stream stream, IList <ChunkInfo> expectedChunks, ChunkDedupedFileContentHash expectedHash, CancellationToken cancellationToken) { ulong totalBytesChunked = 0; var producedChunks = new List <ChunkInfo>(expectedChunks.Count); var maxChunkSize = expectedChunks.Max((chunk) => chunk.Size); var buffer = new byte[maxChunkSize]; foreach (var currentChunk in expectedChunks) { int bytesRead = await stream.ReadAsync(buffer, 0, (int)currentChunk.Size, cancellationToken); if (bytesRead != currentChunk.Size) { return(false); } byte[] chunkHash = ChunkHasher.GetContentHash( buffer, 0, bytesRead).ToHashByteArray(); if (!chunkHash.SequenceEqual(currentChunk.Hash)) { // Hash mismatch return(false); } producedChunks.Add(new ChunkInfo( totalBytesChunked, currentChunk.Size, chunkHash)); totalBytesChunked += (ulong)bytesRead; } if (stream.ReadByte() != -1) { // File content is longer return(false); } var node = DedupNode.Create(producedChunks); var hashBytesExcludingAlgorithm = node.Hash.Take(DedupSingleChunkHashInfo.Length).ToArray(); var actualHash = new ChunkDedupedFileContentHash(hashBytesExcludingAlgorithm); return(expectedHash == actualHash); }
/// <summary> /// Create a node out of the list of chunks. /// </summary> protected internal virtual DedupNode CreateNode() { if (SingleChunkHotPath) { Contract.Check(_chunks.Count == 0)?.Assert($"Chunk count: {_chunks.Count} sizehint: {_sizeHint} chunker min chunk size: {_chunker.Configuration.MinChunkSize}"); Contract.Check(_bytesChunked == _sizeHint)?.Assert($"_bytesChunked != _sizeHint. _bytesChunked={_bytesChunked} _sizeHint={_sizeHint}"); Contract.Assert(_session == null, "Dedup session cannot be null."); byte[] chunkHash = _chunkHasher.HashFinalInternal(); return(new DedupNode(DedupNode.NodeType.ChunkLeaf, (ulong)_sizeHint, chunkHash, 0)); } else { _session?.Dispose(); _session = null; return(DedupNode.Create(_chunks)); } }
/// <summary> /// Creates a tree from the given chunks. Children are grouped to increase the likelihood of node reuse. /// </summary> private static DedupNode CreateRollingHashTree(IReadOnlyList <DedupNode> chunks) { // If we do need to make a tree, then we'll want to use a rolling hash function to ensure // that we get consistent groupings of children nodes even with insertions/removals // of children (i.e. changes to the underlying file). var rolling = new RollingHash( windowLength: 4, bitMask: VariableChildCountBitMask, minCount: MinVariableChildCount); var thisLevel = new Queue <DedupNode>(chunks); while (thisLevel.Count > DedupNode.MaxDirectChildrenPerNode) { var nextLevel = new Queue <DedupNode>(); while (thisLevel.Any()) { rolling.Reset(); var nodesForChild = new List <DedupNode>(); while (thisLevel.Any() && nodesForChild.Count < DedupNode.MaxDirectChildrenPerNode && !rolling.IsAtBoundary) { var node = thisLevel.Dequeue(); ulong nodeHash = 0; nodeHash ^= BitConverter.ToUInt64(node.Hash, 0); nodeHash ^= BitConverter.ToUInt64(node.Hash, 8); nodeHash ^= BitConverter.ToUInt64(node.Hash, 16); nodeHash ^= BitConverter.ToUInt64(node.Hash, 24); rolling.Add(nodeHash); nodesForChild.Add(node); } var newNode = new DedupNode(nodesForChild); nextLevel.Enqueue(newNode); } thisLevel = nextLevel; } var root = new DedupNode(thisLevel.ToList()); return(root); }
/// <summary> /// Non-blocking enumerable of the whole tree given an collection of nodes. /// </summary> public static IEnumerable <DedupNode> EnumerateTree(IEnumerable <DedupNode> nodes) { var nextLevel = new List <DedupNode>(); int nextLevelCount; do { var thisLevel = new List <DedupNode>(); foreach (var node in nodes) { thisLevel.Add(node); if (thisLevel.Count == DedupNode.MaxDirectChildrenPerNode) { var newNode = new DedupNode(thisLevel); yield return(newNode); nextLevel.Add(newNode); thisLevel.Clear(); } } nextLevel.AddRange(thisLevel); foreach (var node in thisLevel) { yield return(node); } nodes = nextLevel; nextLevelCount = nextLevel.Count; nextLevel = new List <DedupNode>(); }while (nextLevelCount > DedupNode.MaxDirectChildrenPerNode); if (nextLevelCount == 1) { yield return(nodes.Single()); } else { yield return(new DedupNode(nodes)); } }
/// <nodoc /> public static ContentHash ToContentHash(this DedupNode node, HashType hashType) { byte[] hash; switch (hashType) { case HashType.DedupSingleChunk: case HashType.DedupNode: hash = node.Hash; break; case HashType.Dedup64K: case HashType.Dedup1024K: hash = node.GetDedupIdentifier(hashType).ToBlobIdentifier().Bytes; break; default: throw new NotImplementedException($"Unexpected HashType '{hashType}' for DedupNode."); } return(new ContentHash(hashType, hash)); }
/// <nodoc /> public static NodeDedupIdentifier CalculateNodeDedupIdentifier(this DedupNode node, HashType hashType) { return(new NodeDedupIdentifier(node.ToContentHash(hashType).ToHashByteArray(), hashType.GetNodeAlgorithmId())); }
/// <nodoc /> public static NodeDedupIdentifier CalculateNodeDedupIdentifier(this DedupNode node, HashType hashType) { return(new NodeDedupIdentifier(ChunkHasher.GetContentHash(node.Serialize()).ToHashByteArray(), hashType.GetNodeAlgorithmId())); }
/// <nodoc /> public static ContentHash ToContentHash(this DedupNode node) { return(new ContentHash(HashType.DedupNodeOrChunk, node.GetDedupIdentifier().ToBlobIdentifier().Bytes)); }