Beispiel #1
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Dedup1024KHashAlgorithm"/> class.
 /// </summary>
 public Dedup1024KHashAlgorithm()
     : this(Chunker.Create(TargetHashType.GetChunkerConfiguration()))
 {
 }
        protected DedupNode HashIsStable(HashType hashType, uint byteCount, string expectedHash, int seed = 0)
        {
            byte[] bytes = new byte[byteCount];
            if (byteCount > 0)
            {
                FillBufferWithTestContent(seed, bytes);
            }

            DedupNode node;

            if (hashType == HashType.Dedup64K) // COMChunker only supports 64K.
            {
                if (Chunker.IsComChunkerSupported)
                {
                    using (var chunker = new ComChunker(ChunkerConfiguration.SupportedComChunkerConfiguration))
                        using (var defaultHasher = new DedupNodeOrChunkHashAlgorithm(chunker))
                        {
                            node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, false);
                            node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, true);
                            node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, false);
                            node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, true);
                        }
                }
            }

            using (var defaultHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(hashType.GetChunkerConfiguration())))
            {
                node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, false);
                node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, true);
                node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, false);
                node = HashIsStableForChunker(defaultHasher, bytes, expectedHash, seed, true);
            }

            HashCanBeVerified(node, bytes, expectedHash, seed);

            return(node);
        }
        private void ChunksAndNodesInCommonInSimilarFilesInternal(HashType hashType)
        {
            using var hasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(hashType.GetChunkerConfiguration()));
            byte[] bytes = new byte[50 * 1024 * 1024];

            int offsetForSecondFile = 200 * 1024;

            var r = new Random(Seed: 0);

            r.NextBytes(bytes);

            hasher.SetInputLength(bytes.Length);
            byte[]           hash1   = hasher.ComputeHash(bytes, 0, bytes.Length);
            var              node1   = hasher.GetNode();
            HashSet <string> chunks1 = node1.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToHashSet();
            HashSet <string> nodes1  = node1.EnumerateInnerNodesDepthFirst().Select(c => c.Hash.ToHex()).ToHashSet();

            hasher.SetInputLength(bytes.Length);
            byte[]           hash2   = hasher.ComputeHash(bytes, offsetForSecondFile, bytes.Length - offsetForSecondFile);
            var              node2   = hasher.GetNode();
            HashSet <string> chunks2 = node2.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToHashSet();
            HashSet <string> nodes2  = node2.EnumerateInnerNodesDepthFirst().Select(c => c.Hash.ToHex()).ToHashSet();

            Assert.NotEqual(hash1, hash2, ByteArrayComparer.Instance);

            var commonChunks = new HashSet <string>(chunks1);

            commonChunks.IntersectWith(chunks2);
            Assert.Subset(chunks1, commonChunks);
            Assert.Subset(chunks2, commonChunks);
            Assert.InRange(commonChunks.Count, chunks1.Count - (chunks1.Count / 10), chunks1.Count);
            Assert.InRange(commonChunks.Count, chunks2.Count - (chunks2.Count / 10), chunks2.Count);

            var commonNodes = new HashSet <string>(nodes1);

            commonNodes.IntersectWith(nodes2);
            Assert.Subset(nodes1, commonNodes);
            Assert.Subset(nodes2, commonNodes);

            int nodeQueries  = 0;
            int chunkQueries = 0;

            node2.VisitPreorder(n =>
            {
                switch (n.Type)
                {
                case DedupNode.NodeType.ChunkLeaf:
                    chunkQueries++;
                    break;

                case DedupNode.NodeType.InnerNode:
                    nodeQueries++;
                    break;
                }

                return(!nodes1.Contains(n.Hash.ToHex()));
            });

            Assert.Equal(0, commonNodes.Count);
            Assert.Equal(nodeQueries, nodes2.Count);
            Assert.Equal(chunkQueries, chunks2.Count);
        }
        protected DedupNode CanChunkLargeFilesHelper(HashType hashType, int blockSize, int blockCount, string expected)
        {
            var r = new Random(Seed: 0);

            byte[] bytes     = new byte[blockSize];
            byte[] tempBytes = new byte[blockSize];

            using (var mgdHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(hashType.GetChunkerConfiguration())))
                using (var comHasher = (Chunker.IsComChunkerSupported && hashType == HashType.Dedup64K) ?
                                       new DedupNodeOrChunkHashAlgorithm(new ComChunker(ChunkerConfiguration.SupportedComChunkerConfiguration)) :
                                       null)
                {
                    long totalLength = (long)blockSize * blockCount;
                    mgdHasher.SetInputLength(totalLength);
                    comHasher?.SetInputLength(totalLength);

                    FillBufferWithTestContent(seed: r.Next(), bytes);

                    for (int i = 0; i < blockCount; i++)
                    {
                        Task.WaitAll(
                            Task.Run(() => mgdHasher.TransformBlock(bytes, 0, bytes.Length, null, 0)),
                            Task.Run(() => comHasher?.TransformBlock(bytes, 0, bytes.Length, null, 0)),
                            // Filling the buffer for the next iteration in parallel with actual work
                            // to speed up the tests.
                            Task.Run(
                                () =>
                        {
                            if (i < blockCount - 1)
                            {
                                FillBufferWithTestContent(seed: r.Next(), tempBytes);
                            }
                        })
                            );

                        swap(ref bytes, ref tempBytes);
                    }

                    mgdHasher.TransformFinalBlock(new byte[0], 0, 0);
                    comHasher?.TransformFinalBlock(new byte[0], 0, 0);

                    var node = mgdHasher.GetNode();
                    Assert.Equal <long>(
                        (long)blockSize * blockCount,
                        node.EnumerateChunkLeafsInOrder().Sum(c => (long)c.TransitiveContentBytes));

                    Assert.Equal <string>(expected, node.Hash.ToHex());
                    if (comHasher != null)
                    {
                        Assert.Equal <string>(expected, comHasher.GetNode().Hash.ToHex());
                    }

                    return(node);
                }
Beispiel #5
0
        protected DedupNode HashIsStable(HashType hashType, uint byteCount, string expectedHash, int seed = 0)
        {
            DedupNode node;

            if (hashType == HashType.Dedup64K) // COMChunker only supports 64K.
            {
                if (Chunker.IsComChunkerSupported)
                {
                    using (var chunker = new ComChunker(ChunkerConfiguration.SupportedComChunkerConfiguration))
                        using (var defaultHasher = new DedupNodeOrChunkHashAlgorithm(chunker))
                        {
                            node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, false);
                            node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, true);
                            node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, false);
                            node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, true);
                        }
                }
            }

            using (var defaultHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(hashType.GetChunkerConfiguration())))
            {
                node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, false);
                node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, true);
                node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, false);
                node = HashIsStableForChunker(defaultHasher, byteCount, expectedHash, seed, true);
            }

            return(node);
        }
 /// <nodoc />
 public DedupNodeOrChunkHashAlgorithm() :
     this(Chunker.Create(NodeOrChunkTargetHashType.GetChunkerConfiguration()))
 {
 }
Beispiel #7
0
        private void ChunksEnumeratedAsFileIsRead(Func <IChunker> chunkerFactory, HashType hashType)
        {
            var chunks = new List <ChunkInfo>();

            byte[] bytes;

            using (var chunker = chunkerFactory())
            {
                bytes = new byte[4 * chunker.Configuration.MinPushBufferSize];

                var r = new Random(Seed: 0);
                r.NextBytes(bytes);

                using (var session = chunker.BeginChunking(chunk =>
                {
                    chunks.Add(chunk);
                }))
                {
                    int pushSize       = 2 * chunker.Configuration.MinPushBufferSize;
                    int lastChunkCount = 0;
                    for (int i = 0; i < bytes.Length; i += pushSize)
                    {
                        session.PushBuffer(bytes, i, Math.Min(pushSize, bytes.Length - i));
                        Assert.True(chunks.Count > lastChunkCount);
                        lastChunkCount = chunks.Count;
                    }
                }
            }

            string[] expectedChunkHashes = chunks.Select(c => c.Hash.ToHex()).ToArray();

            DedupNode rootFromhash;

            string[] actualChunkHashes;

            using (var hasher = new DedupNodeOrChunkHashAlgorithm(Chunker.Create(hashType.GetChunkerConfiguration())))
            {
                hasher.SetInputLength(bytes.Length);
                hasher.ComputeHash(bytes);
                rootFromhash      = hasher.GetNode();
                actualChunkHashes = rootFromhash.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToArray();
                Assert.Equal(expectedChunkHashes, actualChunkHashes);
            }

            var seenNodes = new HashSet <byte[]>(chunks.Select(c => c.Hash), ByteArrayComparer.Instance);

            DedupNode?root = null;

            foreach (var node in PackedDedupNodeTree.EnumerateTree(chunks)
                     .Where(n => n.Type != DedupNode.NodeType.ChunkLeaf))
            {
                foreach (var child in node.ChildNodes)
                {
                    Assert.True(seenNodes.Contains(child.Hash));
                }

                Assert.True(seenNodes.Add(node.Hash));
                root = node;
            }

            Assert.True(root.HasValue);

            // ReSharper disable once PossibleInvalidOperationException
            Assert.Equal(rootFromhash, root.Value);
            actualChunkHashes = root.Value.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToArray();
            Assert.Equal(expectedChunkHashes, actualChunkHashes);
        }