/// <summary>
        /// Create a node out of the list of chunks.
        /// </summary>
        protected internal virtual DedupNode CreateNode()
        {
            if (SingleChunkHotPath)
            {
                Contract.Check(_chunks.Count == 0)?.Assert($"Chunk count: {_chunks.Count} sizehint: {_sizeHint} chunker min chunk size: {_chunker.Configuration.MinChunkSize}");
                Contract.Check(_bytesChunked == _sizeHint)?.Assert($"_bytesChunked != _sizeHint. _bytesChunked={_bytesChunked} _sizeHint={_sizeHint}");
                Contract.Assert(_session == null, "Dedup session cannot be null.");
                byte[] chunkHash = _chunkHasher.HashFinalInternal();
                return(new DedupNode(DedupNode.NodeType.ChunkLeaf, (ulong)_sizeHint, chunkHash, 0));
            }
            else
            {
                _session?.Dispose();
                _session = null;

                if (_chunks.Count == 0)
                {
                    return(new DedupNode(new ChunkInfo(0, 0, DedupSingleChunkHashInfo.Instance.EmptyHash.ToHashByteArray())));
                }
                else if (_chunks.Count == 1)
                {
                    // Content is small enough to track as a chunk.
                    var node = new DedupNode(_chunks.Single());
                    Contract.Check(node.Type == DedupNode.NodeType.ChunkLeaf)?.Assert($"{nameof(CreateNode)}: expected chunk leaf: {DedupNode.NodeType.ChunkLeaf} got {node.Type} instead.");
                    return(node);
                }
                else
                {
                    return(DedupNodeTree.Create(_chunks));
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Create a node out of the list of chunks.
        /// </summary>
        protected internal virtual DedupNode CreateNode()
        {
            if (SingleChunkHotPath)
            {
                Contract.Assert(_chunks.Count == 0);
                Contract.Check(_bytesChunked == _sizeHint)?.Assert($"_bytesChunked != _sizeHint. _bytesChunked={_bytesChunked} _sizeHint={_sizeHint}");
                Contract.Assert(_session == null);
                byte[] chunkHash = _chunkHasher.HashFinalInternal();
                return(new DedupNode(DedupNode.NodeType.ChunkLeaf, (ulong)_sizeHint, chunkHash, 0));
            }
            else
            {
                _session?.Dispose();
                _session = null;

                if (_chunks.Count == 0)
                {
                    return(new DedupNode(new ChunkInfo(0, 0, DedupChunkHashInfo.Instance.EmptyHash.ToHashByteArray())));
                }
                else if (_chunks.Count == 1)
                {
                    // Content is small enough to track as a chunk.
                    var node = new DedupNode(_chunks.Single());
                    Contract.Assert(node.Type == DedupNode.NodeType.ChunkLeaf);
                    return(node);
                }
                else
                {
                    return(DedupNodeTree.Create(_chunks, _treeAlgorithm));
                }
            }
        }
 /// <nodoc />
 public static NodeDedupIdentifier GetNodeIdentifier(this DedupNode node, HashType hashType)
 {
     if (node.Type != DedupNode.NodeType.InnerNode)
     {
         throw new ArgumentException($"The given hash does not represent a {nameof(NodeDedupIdentifier)}");
     }
     return(new NodeDedupIdentifier(node.Hash, (NodeAlgorithmId)AlgorithmIdLookup.Find(hashType)));
 }
Esempio n. 4
0
        /// <nodoc />
        public static NodeDedupIdentifier GetNodeIdentifier(this DedupNode node)
        {
            if (node.Type != DedupNode.NodeType.InnerNode)
            {
                throw new ArgumentException($"The given hash does not represent a {nameof(NodeDedupIdentifier)}");
            }

            return(new NodeDedupIdentifier(node.Hash));
        }
        /// <nodoc />
        public static ChunkDedupIdentifier GetChunkIdentifier(this DedupNode node)
        {
            if (node.Type != DedupNode.NodeType.ChunkLeaf)
            {
                throw new ArgumentException($"The given hash does not represent a {nameof(ChunkDedupIdentifier)}");
            }

            return(new ChunkDedupIdentifier(node.Hash));
        }
Esempio n. 6
0
        public static DedupIdentifier Create(DedupNode node)
        {
            Contract.Requires(node != null);

            return(Create(
                       node.Hash,
                       (node.Type == DedupNode.NodeType.ChunkLeaf) ?
                       ChunkDedupIdentifier.ChunkAlgorithmId :
                       (byte)NodeAlgorithmId.Node64K)); // TODO: We need to fix this.
        }
Esempio n. 7
0
 /// <nodoc />
 public static NodeDedupIdentifier GetNodeIdentifier(this DedupNode node)
 {
     if (node.Type != DedupNode.NodeType.InnerNode)
     {
         throw new ArgumentException($"The given hash does not represent a {nameof(NodeDedupIdentifier)}");
     }
     // TODO: Chunk size optimization - the hash-algo mapper will take care of this.
     // for now use default.
     return(new NodeDedupIdentifier(node.Hash, (byte)NodeAlgorithmId.Node64K));
 }
        /// <inheritdoc />
        protected internal override DedupNode CreateNode()
        {
            var node = base.CreateNode();

            if (node.Type == DedupNode.NodeType.ChunkLeaf)
            {
                node = new DedupNode(new[] { node });
            }

            return(node);
        }
 /// <nodoc />
 public static DedupIdentifier GetDedupIdentifier(this DedupNode node, HashType hashType)
 {
     if (node.Type == DedupNode.NodeType.InnerNode)
     {
         return(node.GetNodeIdentifier(hashType));
     }
     else
     {
         return(node.GetChunkIdentifier());
     }
 }
Esempio n. 10
0
        public static void AssertFilled(this DedupNode node)
        {
            if (node.Type != DedupNode.NodeType.InnerNode)
            {
                throw new ArgumentException($"Expected a filled {nameof(DedupNode.NodeType.InnerNode)}, but this is a {node.Type}: {node.HashString}");
            }

            if (node.ChildNodes == null || node.ChildNodes.Count == 0)
            {
                throw new ArgumentException($"Expected a filled {nameof(DedupNode.NodeType.InnerNode)}, but ChildNodes is empty for: {node.HashString}");
            }
        }
Esempio n. 11
0
        /// <nodoc />
        public static ContentHash ToContentHash(this DedupNode node)
        {
            var nodeDedupIdentifier = node.GetDedupIdentifier();

            switch (nodeDedupIdentifier.AlgorithmId)
            {
            case (byte)NodeAlgorithmId.Node64K:
                return(new ContentHash(HashType.DedupNodeOrChunk, nodeDedupIdentifier.ToBlobIdentifier().Bytes));

            case (byte)NodeAlgorithmId.Node1024K:
                return(new ContentHash(HashType.Dedup1024K, nodeDedupIdentifier.ToBlobIdentifier().Bytes));

            default:
                throw new InvalidEnumArgumentException($"Unknown algorithm id detected for blob {nodeDedupIdentifier.ToBlobIdentifier()} : {nodeDedupIdentifier.AlgorithmId}");
            }
        }
        internal static async Task <bool> VerifyStreamAsync(Stream stream, IList <ChunkInfo> expectedChunks, ChunkDedupedFileContentHash expectedHash, CancellationToken cancellationToken)
        {
            ulong totalBytesChunked = 0;
            var   producedChunks    = new List <ChunkInfo>(expectedChunks.Count);
            var   maxChunkSize      = expectedChunks.Max((chunk) => chunk.Size);
            var   buffer            = new byte[maxChunkSize];

            foreach (var currentChunk in expectedChunks)
            {
                int bytesRead = await stream.ReadAsync(buffer, 0, (int)currentChunk.Size, cancellationToken);

                if (bytesRead != currentChunk.Size)
                {
                    return(false);
                }

                byte[] chunkHash = ChunkHasher.GetContentHash(
                    buffer,
                    0,
                    bytesRead).ToHashByteArray();

                if (!chunkHash.SequenceEqual(currentChunk.Hash))
                {
                    // Hash mismatch
                    return(false);
                }

                producedChunks.Add(new ChunkInfo(
                                       totalBytesChunked,
                                       currentChunk.Size,
                                       chunkHash));

                totalBytesChunked += (ulong)bytesRead;
            }

            if (stream.ReadByte() != -1)
            {
                // File content is longer
                return(false);
            }

            var node = DedupNode.Create(producedChunks);
            var hashBytesExcludingAlgorithm = node.Hash.Take(DedupSingleChunkHashInfo.Length).ToArray();
            var actualHash = new ChunkDedupedFileContentHash(hashBytesExcludingAlgorithm);

            return(expectedHash == actualHash);
        }
        /// <summary>
        /// Create a node out of the list of chunks.
        /// </summary>
        protected internal virtual DedupNode CreateNode()
        {
            if (SingleChunkHotPath)
            {
                Contract.Check(_chunks.Count == 0)?.Assert($"Chunk count: {_chunks.Count} sizehint: {_sizeHint} chunker min chunk size: {_chunker.Configuration.MinChunkSize}");
                Contract.Check(_bytesChunked == _sizeHint)?.Assert($"_bytesChunked != _sizeHint. _bytesChunked={_bytesChunked} _sizeHint={_sizeHint}");
                Contract.Assert(_session == null, "Dedup session cannot be null.");
                byte[] chunkHash = _chunkHasher.HashFinalInternal();
                return(new DedupNode(DedupNode.NodeType.ChunkLeaf, (ulong)_sizeHint, chunkHash, 0));
            }
            else
            {
                _session?.Dispose();
                _session = null;

                return(DedupNode.Create(_chunks));
            }
        }
Esempio n. 14
0
        /// <summary>
        /// Creates a tree from the given chunks. Children are grouped to increase the likelihood of node reuse.
        /// </summary>
        private static DedupNode CreateRollingHashTree(IReadOnlyList <DedupNode> chunks)
        {
            // If we do need to make a tree, then we'll want to use a rolling hash function to ensure
            // that we get consistent groupings of children nodes even with insertions/removals
            // of children (i.e. changes to the underlying file).
            var rolling = new RollingHash(
                windowLength: 4,
                bitMask: VariableChildCountBitMask,
                minCount: MinVariableChildCount);
            var thisLevel = new Queue <DedupNode>(chunks);

            while (thisLevel.Count > DedupNode.MaxDirectChildrenPerNode)
            {
                var nextLevel = new Queue <DedupNode>();
                while (thisLevel.Any())
                {
                    rolling.Reset();
                    var nodesForChild = new List <DedupNode>();
                    while (thisLevel.Any() && nodesForChild.Count < DedupNode.MaxDirectChildrenPerNode && !rolling.IsAtBoundary)
                    {
                        var node = thisLevel.Dequeue();

                        ulong nodeHash = 0;
                        nodeHash ^= BitConverter.ToUInt64(node.Hash, 0);
                        nodeHash ^= BitConverter.ToUInt64(node.Hash, 8);
                        nodeHash ^= BitConverter.ToUInt64(node.Hash, 16);
                        nodeHash ^= BitConverter.ToUInt64(node.Hash, 24);

                        rolling.Add(nodeHash);

                        nodesForChild.Add(node);
                    }

                    var newNode = new DedupNode(nodesForChild);
                    nextLevel.Enqueue(newNode);
                }

                thisLevel = nextLevel;
            }

            var root = new DedupNode(thisLevel.ToList());

            return(root);
        }
Esempio n. 15
0
        /// <summary>
        /// Non-blocking enumerable of the whole tree given an collection of nodes.
        /// </summary>
        public static IEnumerable <DedupNode> EnumerateTree(IEnumerable <DedupNode> nodes)
        {
            var nextLevel = new List <DedupNode>();
            int nextLevelCount;

            do
            {
                var thisLevel = new List <DedupNode>();
                foreach (var node in nodes)
                {
                    thisLevel.Add(node);
                    if (thisLevel.Count == DedupNode.MaxDirectChildrenPerNode)
                    {
                        var newNode = new DedupNode(thisLevel);
                        yield return(newNode);

                        nextLevel.Add(newNode);
                        thisLevel.Clear();
                    }
                }

                nextLevel.AddRange(thisLevel);
                foreach (var node in thisLevel)
                {
                    yield return(node);
                }

                nodes          = nextLevel;
                nextLevelCount = nextLevel.Count;
                nextLevel      = new List <DedupNode>();
            }while (nextLevelCount > DedupNode.MaxDirectChildrenPerNode);

            if (nextLevelCount == 1)
            {
                yield return(nodes.Single());
            }
            else
            {
                yield return(new DedupNode(nodes));
            }
        }
Esempio n. 16
0
        /// <nodoc />
        public static ContentHash ToContentHash(this DedupNode node, HashType hashType)
        {
            byte[] hash;
            switch (hashType)
            {
            case HashType.DedupSingleChunk:
            case HashType.DedupNode:
                hash = node.Hash;
                break;

            case HashType.Dedup64K:
            case HashType.Dedup1024K:
                hash = node.GetDedupIdentifier(hashType).ToBlobIdentifier().Bytes;
                break;

            default:
                throw new NotImplementedException($"Unexpected HashType '{hashType}' for DedupNode.");
            }

            return(new ContentHash(hashType, hash));
        }
Esempio n. 17
0
 /// <nodoc />
 public static NodeDedupIdentifier CalculateNodeDedupIdentifier(this DedupNode node, HashType hashType)
 {
     return(new NodeDedupIdentifier(node.ToContentHash(hashType).ToHashByteArray(), hashType.GetNodeAlgorithmId()));
 }
Esempio n. 18
0
 /// <nodoc />
 public static NodeDedupIdentifier CalculateNodeDedupIdentifier(this DedupNode node, HashType hashType)
 {
     return(new NodeDedupIdentifier(ChunkHasher.GetContentHash(node.Serialize()).ToHashByteArray(), hashType.GetNodeAlgorithmId()));
 }
Esempio n. 19
0
 /// <nodoc />
 public static ContentHash ToContentHash(this DedupNode node)
 {
     return(new ContentHash(HashType.DedupNodeOrChunk, node.GetDedupIdentifier().ToBlobIdentifier().Bytes));
 }