protected DedupNode CanChunkLargeFilesHelper(HashType hashType, int blockSize, int blockCount, string expected) { var r = new Random(Seed: 0); byte[] bytes = new byte[blockSize]; byte[] tempBytes = new byte[blockSize]; using (var mgdHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(hashType.GetChunkerConfiguration()))) using (var comHasher = (Chunker.IsComChunkerSupported && hashType == HashType.Dedup64K) ? new DedupNodeOrChunkHashAlgorithm(new ComChunker(ChunkerConfiguration.SupportedComChunkerConfiguration)) : null) { long totalLength = (long)blockSize * blockCount; mgdHasher.SetInputLength(totalLength); comHasher?.SetInputLength(totalLength); FillBufferWithTestContent(seed: r.Next(), bytes); for (int i = 0; i < blockCount; i++) { Task.WaitAll( Task.Run(() => mgdHasher.TransformBlock(bytes, 0, bytes.Length, null, 0)), Task.Run(() => comHasher?.TransformBlock(bytes, 0, bytes.Length, null, 0)), // Filling the buffer for the next iteration in parallel with actual work // to speed up the tests. Task.Run( () => { if (i < blockCount - 1) { FillBufferWithTestContent(seed: r.Next(), tempBytes); } }) ); swap(ref bytes, ref tempBytes); } mgdHasher.TransformFinalBlock(new byte[0], 0, 0); comHasher?.TransformFinalBlock(new byte[0], 0, 0); var node = mgdHasher.GetNode(); Assert.Equal <long>( (long)blockSize * blockCount, node.EnumerateChunkLeafsInOrder().Sum(c => (long)c.TransitiveContentBytes)); Assert.Equal <string>(expected, node.Hash.ToHex()); if (comHasher != null) { Assert.Equal <string>(expected, comHasher.GetNode().Hash.ToHex()); } return(node); }
public void CanEnumerateChunksInChunk() { DedupNode rootFromhash; using (var hasher = new DedupNodeOrChunkHashAlgorithm()) { hasher.SetInputLength(1); hasher.ComputeHash(new byte[1]); rootFromhash = hasher.GetNode(); } Assert.Equal(rootFromhash.HashString, rootFromhash.EnumerateChunkLeafsInOrder().Single().HashString); }
private void HashOfChunksInNodeMatchesChunkHashAlgorithmInner(int expectedChunkCount, ChunkerConfiguration config, IChunker chunker) { using (DedupNodeOrChunkHashAlgorithm nodeHasher = new DedupNodeOrChunkHashAlgorithm(chunker)) using (DedupChunkHashAlgorithm chunkHasher = new DedupChunkHashAlgorithm()) { byte[] bytes = new byte[expectedChunkCount * config.AvgChunkSize]; nodeHasher.SetInputLength(bytes.Length); var r = new Random(Seed: 0); r.NextBytes(bytes); nodeHasher.ComputeHash(bytes, 0, bytes.Length); var node = nodeHasher.GetNode(); Assert.NotNull(node.Height); if (expectedChunkCount >= 2 * DedupNode.MaxDirectChildrenPerNode) { Assert.Equal((uint)2, node.Height.Value); } ulong offset = 0; int chunkCount = 0; foreach (var chunkInNode in node.EnumerateChunkLeafsInOrder()) { byte[] chunkHash = chunkHasher.ComputeHash(bytes, (int)offset, (int)chunkInNode.TransitiveContentBytes); Assert.Equal(chunkHash.ToHex(), chunkInNode.Hash.ToHex()); offset += chunkInNode.TransitiveContentBytes; chunkCount += 1; } Assert.Equal(offset, node.TransitiveContentBytes); double ratio = (1.0 * expectedChunkCount) / chunkCount; Assert.True(Math.Abs(ratio - 1.0) < 0.3); // within 30% of expected } }
private void ChunksAndNodesInCommonInSimilarFilesInternal(HashType hashType) { using var hasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(hashType.GetChunkerConfiguration())); byte[] bytes = new byte[50 * 1024 * 1024]; int offsetForSecondFile = 200 * 1024; var r = new Random(Seed: 0); r.NextBytes(bytes); hasher.SetInputLength(bytes.Length); byte[] hash1 = hasher.ComputeHash(bytes, 0, bytes.Length); var node1 = hasher.GetNode(); HashSet <string> chunks1 = node1.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToHashSet(); HashSet <string> nodes1 = node1.EnumerateInnerNodesDepthFirst().Select(c => c.Hash.ToHex()).ToHashSet(); hasher.SetInputLength(bytes.Length); byte[] hash2 = hasher.ComputeHash(bytes, offsetForSecondFile, bytes.Length - offsetForSecondFile); var node2 = hasher.GetNode(); HashSet <string> chunks2 = node2.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToHashSet(); HashSet <string> nodes2 = node2.EnumerateInnerNodesDepthFirst().Select(c => c.Hash.ToHex()).ToHashSet(); Assert.NotEqual(hash1, hash2, ByteArrayComparer.Instance); var commonChunks = new HashSet <string>(chunks1); commonChunks.IntersectWith(chunks2); Assert.Subset(chunks1, commonChunks); Assert.Subset(chunks2, commonChunks); Assert.InRange(commonChunks.Count, chunks1.Count - (chunks1.Count / 10), chunks1.Count); Assert.InRange(commonChunks.Count, chunks2.Count - (chunks2.Count / 10), chunks2.Count); var commonNodes = new HashSet <string>(nodes1); commonNodes.IntersectWith(nodes2); Assert.Subset(nodes1, commonNodes); Assert.Subset(nodes2, commonNodes); int nodeQueries = 0; int chunkQueries = 0; node2.VisitPreorder(n => { switch (n.Type) { case DedupNode.NodeType.ChunkLeaf: chunkQueries++; break; case DedupNode.NodeType.InnerNode: nodeQueries++; break; } return(!nodes1.Contains(n.Hash.ToHex())); }); Assert.Equal(0, commonNodes.Count); Assert.Equal(nodeQueries, nodes2.Count); Assert.Equal(chunkQueries, chunks2.Count); }
public void DedupHashFile ( [Required] string[] path, [Required] string hashType, [DefaultValue(false)] bool chunks, [DefaultValue(false)] bool childNodes, [DefaultValue(FileSystemConstants.FileIOBufferSize)] int bufferSize, [DefaultValue((long)0)] long startOffset ) { Initialize(); _displayChunks = chunks; _displayChildNodes = childNodes; if (!Enum.TryParse(hashType, out HashType dedupHashType)) { throw new ArgumentException($"HashType couldn't be inferred - {hashType}. Valid HashType is required."); } var paths = new List <AbsolutePath>(); foreach (AbsolutePath root in path.Select(p => new AbsolutePath(Path.GetFullPath(p)))) { if (_fileSystem.DirectoryExists(root)) { paths.AddRange(_fileSystem.EnumerateFiles(root, EnumerateOptions.Recurse).Select(fileInfo => fileInfo.FullPath)); } else if (_fileSystem.FileExists(root)) { paths.Add(root); } else { throw new ArgumentException("given path is not an existing file or directory"); } } var buffer = new byte[bufferSize]; using (var contentHasher = new DedupNodeOrChunkHashAlgorithm(new ManagedChunker(dedupHashType.GetChunkerConfiguration()))) { foreach (var p in paths) { contentHasher.Initialize(); TaskSafetyHelpers.SyncResultOnThreadPool(async() => { using (Stream fs = await _fileSystem.OpenReadOnlySafeAsync(p, FileShare.Read | FileShare.Delete)) { fs.Position = startOffset; int bytesRead; while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length)) > 0) { contentHasher.TransformBlock(buffer, 0, bytesRead, null, 0); } contentHasher.TransformFinalBlock(new byte[0], 0, 0); DedupNode root = contentHasher.GetNode(); ulong offset = 0; LogNode(true, string.Empty, root, p, ref offset); } return(0); }); } } _logger.Always("Totals:"); _logger.Always($"Bytes: Unique={_uniqueBytes:N0} Total={_totalBytes:N0}"); _logger.Always($"Chunks: Unique={_allChunks.Count:N0} Total={_totalChunks:N0}"); _logger.Always($"Nodes: Unique={_allNodes.Count:N0} Total={_totalNodes:N0}"); }
private void ChunksEnumeratedAsFileIsRead(Func <IChunker> chunkerFactory, HashType hashType) { var chunks = new List <ChunkInfo>(); byte[] bytes; using (var chunker = chunkerFactory()) { bytes = new byte[4 * chunker.Configuration.MinPushBufferSize]; var r = new Random(Seed: 0); r.NextBytes(bytes); using (var session = chunker.BeginChunking(chunk => { chunks.Add(chunk); })) { int pushSize = 2 * chunker.Configuration.MinPushBufferSize; int lastChunkCount = 0; for (int i = 0; i < bytes.Length; i += pushSize) { session.PushBuffer(bytes, i, Math.Min(pushSize, bytes.Length - i)); Assert.True(chunks.Count > lastChunkCount); lastChunkCount = chunks.Count; } } } string[] expectedChunkHashes = chunks.Select(c => c.Hash.ToHex()).ToArray(); DedupNode rootFromhash; string[] actualChunkHashes; using (var hasher = new DedupNodeOrChunkHashAlgorithm(Chunker.Create(hashType.GetChunkerConfiguration()))) { hasher.SetInputLength(bytes.Length); hasher.ComputeHash(bytes); rootFromhash = hasher.GetNode(); actualChunkHashes = rootFromhash.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToArray(); Assert.Equal(expectedChunkHashes, actualChunkHashes); } var seenNodes = new HashSet <byte[]>(chunks.Select(c => c.Hash), ByteArrayComparer.Instance); DedupNode?root = null; foreach (var node in PackedDedupNodeTree.EnumerateTree(chunks) .Where(n => n.Type != DedupNode.NodeType.ChunkLeaf)) { foreach (var child in node.ChildNodes) { Assert.True(seenNodes.Contains(child.Hash)); } Assert.True(seenNodes.Add(node.Hash)); root = node; } Assert.True(root.HasValue); // ReSharper disable once PossibleInvalidOperationException Assert.Equal(rootFromhash, root.Value); actualChunkHashes = root.Value.EnumerateChunkLeafsInOrder().Select(c => c.Hash.ToHex()).ToArray(); Assert.Equal(expectedChunkHashes, actualChunkHashes); }