public virtual void TestUnderReplicatedUsesNewRacks() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); // All datanodes are on the same rack string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack1", "/rack1" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 0); // Add new datanodes on a different rack and increase the // replication factor so the block is underreplicated and make // sure at least one of the hosts on the new rack is used. string[] newRacks = new string[] { "/rack2", "/rack2" }; cluster.StartDataNodes(conf, 2, true, null, newRacks); ReplicationFactor = 5; NameNodeAdapter.SetReplication(ns, "/testFile", ReplicationFactor); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestSufficientlySingleReplBlockUsesNewRack() { Configuration conf = GetConf(); short ReplicationFactor = 1; Path filePath = new Path("/testFile"); string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block with a replication factor of 1 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 0); ReplicationFactor = 2; NameNodeAdapter.SetReplication(ns, "/testFile", ReplicationFactor); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestInvalidateOverReplicatedBlock() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); try { FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); FileSystem fs = cluster.GetFileSystem(); Path p = new Path(MiniDFSCluster.GetBaseDirectory(), "/foo1"); FSDataOutputStream @out = fs.Create(p, (short)2); @out.WriteBytes("HDFS-3119: " + p); @out.Hsync(); fs.SetReplication(p, (short)1); @out.Close(); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, p); NUnit.Framework.Assert.AreEqual("Expected only one live replica for the block", 1 , bm.CountNodes(block.GetLocalBlock()).LiveReplicas()); } finally { cluster.Shutdown(); } }
public virtual void TestSufficientlyReplBlocksUsesNewRack() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); // All datanodes are on the same rack string[] racks = new string[] { "/rack1", "/rack1", "/rack1" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); try { // Create a file with one block with a replication factor of 3 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 0); // Add a new datanode on a different rack string[] newRacks = new string[] { "/rack2" }; cluster.StartDataNodes(conf, 1, true, null, newRacks); cluster.WaitActive(); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public virtual void TestSetrepIncWithUnderReplicatedBlocks() { // 1 min timeout Configuration conf = new HdfsConfiguration(); short ReplicationFactor = 2; string FileName = "/testFile"; Path FilePath = new Path(FileName); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor + 1).Build(); try { // create a file with one block with a replication factor of 2 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L); DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); // remove one replica from the blocksMap so block becomes under-replicated // but the block does not get put into the under-replicated blocks queue BlockManager bm = cluster.GetNamesystem().GetBlockManager(); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, FilePath); DatanodeDescriptor dn = bm.blocksMap.GetStorages(b.GetLocalBlock()).GetEnumerator ().Next().GetDatanodeDescriptor(); bm.AddToInvalidates(b.GetLocalBlock(), dn); Sharpen.Thread.Sleep(5000); bm.blocksMap.RemoveNode(b.GetLocalBlock(), dn); // increment this file's replication factor FsShell shell = new FsShell(conf); NUnit.Framework.Assert.AreEqual(0, shell.Run(new string[] { "-setrep", "-w", Sharpen.Extensions.ToString (1 + ReplicationFactor), FileName })); } finally { cluster.Shutdown(); } }
public virtual void TestReduceReplFactorRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); string[] racks = new string[] { "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Decrease the replication factor, make sure the deleted replica // was not the one that lived on the rack with only one replica, // ie we should still have 2 racks after reducing the repl factor. ReplicationFactor = 2; NameNodeAdapter.SetReplication(ns, "/testFile", ReplicationFactor); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public virtual void TestShmBasedStaleness() { BlockReaderTestUtil.EnableShortCircuitShmTracing(); TemporarySocketDirectory sockDir = new TemporarySocketDirectory(); Configuration conf = CreateShortCircuitConf("testShmBasedStaleness", sockDir); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); ShortCircuitCache cache = fs.GetClient().GetClientContext().GetShortCircuitCache( ); string TestFile = "/test_file"; int TestFileLen = 8193; int Seed = unchecked ((int)(0xFADED)); DFSTestUtil.CreateFile(fs, new Path(TestFile), TestFileLen, (short)1, Seed); FSDataInputStream fis = fs.Open(new Path(TestFile)); int first = fis.Read(); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, new Path(TestFile)); NUnit.Framework.Assert.IsTrue(first != -1); cache.Accept(new _CacheVisitor_502(block)); // Stop the Namenode. This will close the socket keeping the client's // shared memory segment alive, and make it stale. cluster.GetDataNodes()[0].Shutdown(); cache.Accept(new _CacheVisitor_518(block)); cluster.Shutdown(); sockDir.Close(); }
public virtual void TestNodeDecomissionWithOverreplicationRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 5; Path filePath = new Path("/testFile"); // Configure an excludes file FileSystem localFileSys = FileSystem.GetLocal(conf); Path workingDir = localFileSys.GetWorkingDirectory(); Path dir = new Path(workingDir, "build/test/data/temp/decommission"); Path excludeFile = new Path(dir, "exclude"); Path includeFile = new Path(dir, "include"); NUnit.Framework.Assert.IsTrue(localFileSys.Mkdirs(dir)); DFSTestUtil.WriteFile(localFileSys, excludeFile, string.Empty); DFSTestUtil.WriteFile(localFileSys, includeFile, string.Empty); conf.Set(DFSConfigKeys.DfsHosts, includeFile.ToUri().GetPath()); conf.Set(DFSConfigKeys.DfsHostsExclude, excludeFile.ToUri().GetPath()); // All hosts are on two racks, only one host on /rack2 string[] racks = new string[] { "/rack1", "/rack2", "/rack1", "/rack1", "/rack1" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Lower the replication factor so the blocks are over replicated ReplicationFactor = 2; fs.SetReplication(filePath, ReplicationFactor); // Decommission one of the hosts with the block that is not on // the lone host on rack2 (if we decomission that host it would // be impossible to respect the rack policy). BlockLocation[] locs = fs.GetFileBlockLocations(fs.GetFileStatus(filePath), 0, long.MaxValue ); foreach (string top in locs[0].GetTopologyPaths()) { if (!top.StartsWith("/rack2")) { string name = Sharpen.Runtime.Substring(top, "/rack1".Length + 1); DFSTestUtil.WriteFile(localFileSys, excludeFile, name); ns.GetBlockManager().GetDatanodeManager().RefreshNodes(conf); DFSTestUtil.WaitForDecommission(fs, name); break; } } // Check the block still has sufficient # replicas across racks, // ie we didn't remove the replica on the host on /rack1. DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestHostsExcludeInUI() { Configuration conf = GetConf(); short ReplicationFactor = 2; Path filePath = new Path("/testFile"); // Configure an excludes file FileSystem localFileSys = FileSystem.GetLocal(conf); Path workingDir = localFileSys.GetWorkingDirectory(); Path dir = new Path(workingDir, "build/test/data/temp/decommission"); Path excludeFile = new Path(dir, "exclude"); Path includeFile = new Path(dir, "include"); NUnit.Framework.Assert.IsTrue(localFileSys.Mkdirs(dir)); DFSTestUtil.WriteFile(localFileSys, excludeFile, string.Empty); DFSTestUtil.WriteFile(localFileSys, includeFile, string.Empty); conf.Set(DFSConfigKeys.DfsHostsExclude, excludeFile.ToUri().GetPath()); conf.Set(DFSConfigKeys.DfsHosts, includeFile.ToUri().GetPath()); // Two blocks and four racks string[] racks = new string[] { "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Decommission one of the hosts with the block, this should cause // the block to get replicated to another host on the same rack, // otherwise the rack policy is violated. BlockLocation[] locs = fs.GetFileBlockLocations(fs.GetFileStatus(filePath), 0, long.MaxValue ); string name = locs[0].GetNames()[0]; string names = name + "\n" + "localhost:42\n"; Log.Info("adding '" + names + "' to exclude file " + excludeFile.ToUri().GetPath( )); DFSTestUtil.WriteFile(localFileSys, excludeFile, name); ns.GetBlockManager().GetDatanodeManager().RefreshNodes(conf); DFSTestUtil.WaitForDecommission(fs, name); // Check the block still has sufficient # replicas across racks DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); MBeanServer mbs = ManagementFactory.GetPlatformMBeanServer(); ObjectName mxbeanName = new ObjectName("Hadoop:service=NameNode,name=NameNodeInfo" ); string nodes = (string)mbs.GetAttribute(mxbeanName, "LiveNodes"); NUnit.Framework.Assert.IsTrue("Live nodes should contain the decommissioned node" , nodes.Contains("Decommissioned")); } finally { cluster.Shutdown(); } }
public virtual void TestDnFencing() { // Create a file with replication level 3. DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)3, 1L); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, TestFilePath); // Drop its replication count to 1, so it becomes over-replicated. // Then compute the invalidation of the extra blocks and trigger // heartbeats so the invalidations are flushed to the DNs. nn1.GetRpcServer().SetReplication(TestFile, (short)1); BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager( )); cluster.TriggerHeartbeats(); // Transition nn2 to active even though nn1 still thinks it's active. Banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.AbortEditLogs(nn1); NameNodeAdapter.EnterSafeMode(nn1, false); cluster.TransitionToActive(1); // Check that the standby picked up the replication change. NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication ()); // Dump some info for debugging purposes. Banner("NN2 Metadata immediately after failover"); DoMetasave(nn2); Banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); Banner("Metadata after nodes have all block-reported"); DoMetasave(nn2); // Force a rescan of postponedMisreplicatedBlocks. BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager(); BlockManagerTestUtil.CheckHeartbeat(nn2BM); BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM); // The blocks should no longer be postponed. NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks ()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager( )); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks ()); Banner("Making sure the file is still readable"); FileSystem fs2 = cluster.GetFileSystem(1); DFSTestUtil.ReadFile(fs2, TestFilePath); Banner("Waiting for the actual block files to get deleted from DNs."); WaitForTrueReplication(cluster, block, 1); }
public virtual void TestWithAllCorruptReplicas() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); CorruptBlock(cluster, fs, fileName, 0, block); CorruptBlock(cluster, fs, fileName, 1, block); CorruptBlock(cluster, fs, fileName, 2, block); // wait for 3 seconds so that all block reports are processed. try { Sharpen.Thread.Sleep(3000); } catch (Exception) { } NUnit.Framework.Assert.AreEqual(0, CountReplicas(namesystem, block).LiveReplicas( )); NUnit.Framework.Assert.AreEqual(3, CountReplicas(namesystem, block).CorruptReplicas ()); namesystem.SetReplication(fileName.ToString(), (short)1); // wait for 3 seconds so that all block reports are processed. try { Sharpen.Thread.Sleep(3000); } catch (Exception) { } NUnit.Framework.Assert.AreEqual(0, CountReplicas(namesystem, block).LiveReplicas( )); NUnit.Framework.Assert.AreEqual(3, CountReplicas(namesystem, block).CorruptReplicas ()); } finally { cluster.Shutdown(); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="Sharpen.TimeoutException"/> private void ChangeBlockLen(MiniDFSCluster cluster, int lenDelta) { Path fileName = new Path("/file1"); short ReplicationFactor = (short)1; FileSystem fs = cluster.GetFileSystem(); int fileLen = fs.GetConf().GetInt(DFSConfigKeys.DfsBytesPerChecksumKey, 512); DFSTestUtil.CreateFile(fs, fileName, fileLen, ReplicationFactor, 0); DFSTestUtil.WaitReplication(fs, fileName, ReplicationFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); // Change the length of a replica for (int i = 0; i < cluster.GetDataNodes().Count; i++) { if (DFSTestUtil.ChangeReplicaLength(cluster, block, i, lenDelta)) { break; } } // increase the file's replication factor fs.SetReplication(fileName, (short)(ReplicationFactor + 1)); // block replication triggers corrupt block detection DFSClient dfsClient = new DFSClient(new IPEndPoint("localhost", cluster.GetNameNodePort ()), fs.GetConf()); LocatedBlocks blocks = dfsClient.GetNamenode().GetBlockLocations(fileName.ToString (), 0, fileLen); if (lenDelta < 0) { // replica truncated while (!blocks.Get(0).IsCorrupt() || ReplicationFactor != blocks.Get(0).GetLocations ().Length) { Sharpen.Thread.Sleep(100); blocks = dfsClient.GetNamenode().GetBlockLocations(fileName.ToString(), 0, fileLen ); } } else { // no corruption detected; block replicated while (ReplicationFactor + 1 != blocks.Get(0).GetLocations().Length) { Sharpen.Thread.Sleep(100); blocks = dfsClient.GetNamenode().GetBlockLocations(fileName.ToString(), 0, fileLen ); } } fs.Delete(fileName, true); }
/// <exception cref="System.Exception"/> private void TestBadBlockReportOnTransfer(bool corruptBlockByDeletingBlockFile) { Configuration conf = new HdfsConfiguration(); FileSystem fs = null; DFSClient dfsClient = null; LocatedBlocks blocks = null; int replicaCount = 0; short replFactor = 1; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); dfsClient = new DFSClient(new IPEndPoint("localhost", cluster.GetNameNodePort()), conf); // Create file with replication factor of 1 Path file1 = new Path("/tmp/testBadBlockReportOnTransfer/file1"); DFSTestUtil.CreateFile(fs, file1, 1024, replFactor, 0); DFSTestUtil.WaitReplication(fs, file1, replFactor); // Corrupt the block belonging to the created file ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, file1); int blockFilesCorrupted = corruptBlockByDeletingBlockFile ? cluster.CorruptBlockOnDataNodesByDeletingBlockFile (block) : cluster.CorruptBlockOnDataNodes(block); NUnit.Framework.Assert.AreEqual("Corrupted too few blocks", replFactor, blockFilesCorrupted ); // Increase replication factor, this should invoke transfer request // Receiving datanode fails on checksum and reports it to namenode replFactor = 2; fs.SetReplication(file1, replFactor); // Now get block details and check if the block is corrupt blocks = dfsClient.GetNamenode().GetBlockLocations(file1.ToString(), 0, long.MaxValue ); while (blocks.Get(0).IsCorrupt() != true) { try { Log.Info("Waiting until block is marked as corrupt..."); Sharpen.Thread.Sleep(1000); } catch (Exception) { } blocks = dfsClient.GetNamenode().GetBlockLocations(file1.ToString(), 0, long.MaxValue ); } replicaCount = blocks.Get(0).GetLocations().Length; NUnit.Framework.Assert.IsTrue(replicaCount == 1); cluster.Shutdown(); }
public virtual void TestStablePositionAfterCorruptRead() { short ReplFactor = 1; long FileLength = 512L; HdfsConfiguration conf = GetConfiguration(null); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); Path path = new Path("/corrupted"); DFSTestUtil.CreateFile(fs, path, FileLength, ReplFactor, 12345L); DFSTestUtil.WaitReplication(fs, path, ReplFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, path); int blockFilesCorrupted = cluster.CorruptBlockOnDataNodes(block); NUnit.Framework.Assert.AreEqual("All replicas not corrupted", ReplFactor, blockFilesCorrupted ); FSDataInputStream dis = cluster.GetFileSystem().Open(path); ByteBuffer buf = ByteBuffer.AllocateDirect((int)FileLength); bool sawException = false; try { dis.Read(buf); } catch (ChecksumException) { sawException = true; } NUnit.Framework.Assert.IsTrue(sawException); NUnit.Framework.Assert.AreEqual(0, buf.Position()); NUnit.Framework.Assert.AreEqual(buf.Capacity(), buf.Limit()); dis = cluster.GetFileSystem().Open(path); buf.Position(3); buf.Limit(25); sawException = false; try { dis.Read(buf); } catch (ChecksumException) { sawException = true; } NUnit.Framework.Assert.IsTrue(sawException); NUnit.Framework.Assert.AreEqual(3, buf.Position()); NUnit.Framework.Assert.AreEqual(25, buf.Limit()); cluster.Shutdown(); }
public virtual void TestReplDueToNodeFailRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); // Last datanode is on a different rack string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); DatanodeManager dm = ns.GetBlockManager().GetDatanodeManager(); try { // Create a file with one block with a replication factor of 2 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Make the last datanode look like it failed to heartbeat by // calling removeDatanode and stopping it. AList <DataNode> datanodes = cluster.GetDataNodes(); int idx = datanodes.Count - 1; DataNode dataNode = datanodes[idx]; DatanodeID dnId = dataNode.GetDatanodeId(); cluster.StopDataNode(idx); dm.RemoveDatanode(dnId); // The block should still have sufficient # replicas, across racks. // The last node may not have contained a replica, but if it did // it should have been replicated within the same rack. DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Fail the last datanode again, it's also on rack2 so there is // only 1 rack for all the replicas datanodes = cluster.GetDataNodes(); idx = datanodes.Count - 1; dataNode = datanodes[idx]; dnId = dataNode.GetDatanodeId(); cluster.StopDataNode(idx); dm.RemoveDatanode(dnId); // Make sure we have enough live replicas even though we are // short one rack and therefore need one replica DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 1); } finally { cluster.Shutdown(); } }
public virtual void TestCorruptBlockRereplicatedAcrossRacks() { Configuration conf = GetConf(); short ReplicationFactor = 2; int fileLen = 512; Path filePath = new Path("/testFile"); // Datanodes are spread across two racks string[] racks = new string[] { "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block with a replication factor of 2 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, fileLen, ReplicationFactor, 1L); string fileContent = DFSTestUtil.ReadFile(fs, filePath); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Corrupt a replica of the block int dnToCorrupt = DFSTestUtil.FirstDnWithBlock(cluster, b); NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(dnToCorrupt, b)); // Restart the datanode so blocks are re-scanned, and the corrupt // block is detected. cluster.RestartDataNode(dnToCorrupt); // Wait for the namenode to notice the corrupt replica DFSTestUtil.WaitCorruptReplicas(fs, ns, filePath, b, 1); // The rack policy is still respected DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Ensure all replicas are valid (the corrupt replica may not // have been cleaned up yet). for (int i = 0; i < racks.Length; i++) { string blockContent = cluster.ReadBlockOnDataNode(i, b); if (blockContent != null && i != dnToCorrupt) { NUnit.Framework.Assert.AreEqual("Corrupt replica", fileContent, blockContent); } } } finally { cluster.Shutdown(); } }
public virtual void TestReduceReplFactorDueToRejoinRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 2; Path filePath = new Path("/testFile"); // Last datanode is on a different rack string[] racks = new string[] { "/rack1", "/rack1", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); DatanodeManager dm = ns.GetBlockManager().GetDatanodeManager(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Make the last (cross rack) datanode look like it failed // to heartbeat by stopping it and calling removeDatanode. AList <DataNode> datanodes = cluster.GetDataNodes(); NUnit.Framework.Assert.AreEqual(3, datanodes.Count); DataNode dataNode = datanodes[2]; DatanodeID dnId = dataNode.GetDatanodeId(); cluster.StopDataNode(2); dm.RemoveDatanode(dnId); // The block gets re-replicated to another datanode so it has a // sufficient # replicas, but not across racks, so there should // be 1 rack, and 1 needed replica (even though there are 2 hosts // available and only 2 replicas required). DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 1); // Start the "failed" datanode, which has a replica so the block is // now over-replicated and therefore a replica should be removed but // not on the restarted datanode as that would violate the rack policy. string[] rack2 = new string[] { "/rack2" }; cluster.StartDataNodes(conf, 1, true, null, rack2); cluster.WaitActive(); // The block now has sufficient # replicas, across racks DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestNodeDecomissionRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 2; Path filePath = new Path("/testFile"); // Configure an excludes file FileSystem localFileSys = FileSystem.GetLocal(conf); Path workingDir = localFileSys.GetWorkingDirectory(); Path dir = new Path(workingDir, "build/test/data/temp/decommission"); Path excludeFile = new Path(dir, "exclude"); Path includeFile = new Path(dir, "include"); NUnit.Framework.Assert.IsTrue(localFileSys.Mkdirs(dir)); DFSTestUtil.WriteFile(localFileSys, excludeFile, string.Empty); DFSTestUtil.WriteFile(localFileSys, includeFile, string.Empty); conf.Set(DFSConfigKeys.DfsHostsExclude, excludeFile.ToUri().GetPath()); conf.Set(DFSConfigKeys.DfsHosts, includeFile.ToUri().GetPath()); // Two blocks and four racks string[] racks = new string[] { "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Decommission one of the hosts with the block, this should cause // the block to get replicated to another host on the same rack, // otherwise the rack policy is violated. BlockLocation[] locs = fs.GetFileBlockLocations(fs.GetFileStatus(filePath), 0, long.MaxValue ); string name = locs[0].GetNames()[0]; DFSTestUtil.WriteFile(localFileSys, excludeFile, name); ns.GetBlockManager().GetDatanodeManager().RefreshNodes(conf); DFSTestUtil.WaitForDecommission(fs, name); // Check the block still has sufficient # replicas across racks DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
/// <summary> /// The test verifies the number of outstanding replication requests for a /// given DN shouldn't exceed the limit set by configuration property /// dfs.namenode.replication.max-streams-hard-limit. /// </summary> /// <remarks> /// The test verifies the number of outstanding replication requests for a /// given DN shouldn't exceed the limit set by configuration property /// dfs.namenode.replication.max-streams-hard-limit. /// The test does the followings: /// 1. Create a mini cluster with 2 DNs. Set large heartbeat interval so that /// replication requests won't be picked by any DN right away. /// 2. Create a file with 10 blocks and replication factor 2. Thus each /// of the 2 DNs have one replica of each block. /// 3. Add a DN to the cluster for later replication. /// 4. Remove a DN that has data. /// 5. Ask BlockManager to compute the replication work. This will assign /// replication requests to the only DN that has data. /// 6. Make sure the number of pending replication requests of that DN don't /// exceed the limit. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestNumberOfBlocksToBeReplicated() { // 1 min timeout Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsNamenodeMinBlockSizeKey, 0); conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, 1); conf.SetInt(DFSConfigKeys.DfsBytesPerChecksumKey, 1); // Large value to make sure the pending replication request can stay in // DatanodeDescriptor.replicateBlocks before test timeout. conf.SetInt(DFSConfigKeys.DfsHeartbeatIntervalKey, 100); // Make sure BlockManager can pull all blocks from UnderReplicatedBlocks via // chooseUnderReplicatedBlocks at once. conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationWorkMultiplierPerIteration, 5); int NumOfBlocks = 10; short RepFactor = 2; string FileName = "/testFile"; Path FilePath = new Path(FileName); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(RepFactor) .Build(); try { // create a file with 10 blocks with a replication factor of 2 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, FilePath, NumOfBlocks, RepFactor, 1L); DFSTestUtil.WaitReplication(fs, FilePath, RepFactor); cluster.StartDataNodes(conf, 1, true, null, null, null, null); BlockManager bm = cluster.GetNamesystem().GetBlockManager(); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, FilePath); IEnumerator <DatanodeStorageInfo> storageInfos = bm.blocksMap.GetStorages(b.GetLocalBlock ()).GetEnumerator(); DatanodeDescriptor firstDn = storageInfos.Next().GetDatanodeDescriptor(); DatanodeDescriptor secondDn = storageInfos.Next().GetDatanodeDescriptor(); bm.GetDatanodeManager().RemoveDatanode(firstDn); NUnit.Framework.Assert.AreEqual(NumOfBlocks, bm.GetUnderReplicatedNotMissingBlocks ()); bm.ComputeDatanodeWork(); NUnit.Framework.Assert.IsTrue("The number of blocks to be replicated should be less than " + "or equal to " + bm.replicationStreamsHardLimit, secondDn.GetNumberOfBlocksToBeReplicated () <= bm.replicationStreamsHardLimit); } finally { cluster.Shutdown(); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void DoShortCircuitReadAfterEvictionTest() { string MethodName = GenericTestUtils.GetMethodName(); Path path1 = new Path("/" + MethodName + ".01.dat"); Path path2 = new Path("/" + MethodName + ".02.dat"); int Seed = unchecked ((int)(0xFADED)); MakeRandomTestFile(path1, BlockSize, true, Seed); // Verify short-circuit read from RAM_DISK. EnsureFileReplicasOnStorageType(path1, StorageType.RamDisk); FilePath metaFile = cluster.GetBlockMetadataFile(0, DFSTestUtil.GetFirstBlock(fs, path1)); NUnit.Framework.Assert.IsTrue(metaFile.Length() <= BlockMetadataHeader.GetHeaderSize ()); NUnit.Framework.Assert.IsTrue(VerifyReadRandomFile(path1, BlockSize, Seed)); // Sleep for a short time to allow the lazy writer thread to do its job. Sharpen.Thread.Sleep(3 * LazyWriterIntervalSec * 1000); // Verify short-circuit read from RAM_DISK once again. EnsureFileReplicasOnStorageType(path1, StorageType.RamDisk); metaFile = cluster.GetBlockMetadataFile(0, DFSTestUtil.GetFirstBlock(fs, path1)); NUnit.Framework.Assert.IsTrue(metaFile.Length() <= BlockMetadataHeader.GetHeaderSize ()); NUnit.Framework.Assert.IsTrue(VerifyReadRandomFile(path1, BlockSize, Seed)); // Create another file with a replica on RAM_DISK, which evicts the first. MakeRandomTestFile(path2, BlockSize, true, Seed); Sharpen.Thread.Sleep(3 * LazyWriterIntervalSec * 1000); TriggerBlockReport(); // Verify short-circuit read still works from DEFAULT storage. This time, // we'll have a checksum written during lazy persistence. EnsureFileReplicasOnStorageType(path1, StorageType.Default); metaFile = cluster.GetBlockMetadataFile(0, DFSTestUtil.GetFirstBlock(fs, path1)); NUnit.Framework.Assert.IsTrue(metaFile.Length() > BlockMetadataHeader.GetHeaderSize ()); NUnit.Framework.Assert.IsTrue(VerifyReadRandomFile(path1, BlockSize, Seed)); // In the implementation of legacy short-circuit reads, any failure is // trapped silently, reverts back to a remote read, and also disables all // subsequent legacy short-circuit reads in the ClientContext. If the test // uses legacy, then assert that it didn't get disabled. ClientContext clientContext = client.GetClientContext(); if (clientContext.GetUseLegacyBlockReaderLocal()) { NUnit.Framework.Assert.IsFalse(clientContext.GetDisableLegacyBlockReaderLocal()); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void DoShortCircuitReadBlockFileCorruptionTest() { string MethodName = GenericTestUtils.GetMethodName(); Path path1 = new Path("/" + MethodName + ".01.dat"); Path path2 = new Path("/" + MethodName + ".02.dat"); int Seed = unchecked ((int)(0xFADED)); MakeRandomTestFile(path1, BlockSize, true, Seed); EnsureFileReplicasOnStorageType(path1, StorageType.RamDisk); // Create another file with a replica on RAM_DISK, which evicts the first. MakeRandomTestFile(path2, BlockSize, true, Seed); // Sleep for a short time to allow the lazy writer thread to do its job. Sharpen.Thread.Sleep(3 * LazyWriterIntervalSec * 1000); TriggerBlockReport(); // Corrupt the lazy-persisted block file, and verify that checksum // verification catches it. EnsureFileReplicasOnStorageType(path1, StorageType.Default); cluster.CorruptReplica(0, DFSTestUtil.GetFirstBlock(fs, path1)); exception.Expect(typeof(ChecksumException)); DFSTestUtil.ReadFileBuffer(fs, path1); }
/// <exception cref="System.Exception"/> public virtual void TestVerifyBlockChecksumCommand() { DFSTestUtil.CreateFile(fs, new Path("/bar"), 1234, (short)1, unchecked ((int)(0xdeadbeef ))); FsDatasetSpi <object> fsd = datanode.GetFSDataset(); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, new Path("/bar")); FilePath blockFile = FsDatasetTestUtil.GetBlockFile(fsd, block.GetBlockPoolId(), block.GetLocalBlock()); NUnit.Framework.Assert.AreEqual("ret: 1, You must specify a meta file with -meta" , RunCmd(new string[] { "verify", "-block", blockFile.GetAbsolutePath() })); FilePath metaFile = FsDatasetTestUtil.GetMetaFile(fsd, block.GetBlockPoolId(), block .GetLocalBlock()); NUnit.Framework.Assert.AreEqual("ret: 0, Checksum type: " + "DataChecksum(type=CRC32C, chunkSize=512)" , RunCmd(new string[] { "verify", "-meta", metaFile.GetAbsolutePath() })); NUnit.Framework.Assert.AreEqual("ret: 0, Checksum type: " + "DataChecksum(type=CRC32C, chunkSize=512)" + "Checksum verification succeeded on block file " + blockFile.GetAbsolutePath( ), RunCmd(new string[] { "verify", "-meta", metaFile.GetAbsolutePath(), "-block" , blockFile.GetAbsolutePath() })); }
/// <exception cref="System.Exception"/> private void DoTestEntirelyCorruptFile(int numDataNodes) { long fileSize = 4096; Path file = new Path("/testFile"); short replFactor = (short)numDataNodes; Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsReplicationKey, numDataNodes); // Set short retry timeouts so this test runs faster conf.SetInt(DFSConfigKeys.DfsClientRetryWindowBase, 10); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDataNodes ).Build(); try { cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, file, fileSize, replFactor, 12345L); /*seed*/ DFSTestUtil.WaitReplication(fs, file, replFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, file); int blockFilesCorrupted = cluster.CorruptBlockOnDataNodes(block); NUnit.Framework.Assert.AreEqual("All replicas not corrupted", replFactor, blockFilesCorrupted ); try { IOUtils.CopyBytes(fs.Open(file), new IOUtils.NullOutputStream(), conf, true); NUnit.Framework.Assert.Fail("Didn't get exception"); } catch (IOException ioe) { DFSClient.Log.Info("Got expected exception", ioe); } } finally { cluster.Shutdown(); } }
public virtual void TestByAddingAnExtraDataNode() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(4).Build(); FileSystem fs = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); MiniDFSCluster.DataNodeProperties dnPropsFourth = cluster.StopDataNode(3); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); CorruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.WaitReplication(fs, fileName, (short)2); NUnit.Framework.Assert.AreEqual(2, CountReplicas(namesystem, block).LiveReplicas( )); NUnit.Framework.Assert.AreEqual(1, CountReplicas(namesystem, block).CorruptReplicas ()); cluster.RestartDataNode(dnPropsFourth); DFSTestUtil.WaitReplication(fs, fileName, (short)3); NUnit.Framework.Assert.AreEqual(3, CountReplicas(namesystem, block).LiveReplicas( )); NUnit.Framework.Assert.AreEqual(0, CountReplicas(namesystem, block).CorruptReplicas ()); } finally { cluster.Shutdown(); } }
public virtual void TestGetNewStamp() { int numDataNodes = 1; Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDataNodes ).Build(); try { cluster.WaitActive(); FileSystem fileSys = cluster.GetFileSystem(); NamenodeProtocols namenode = cluster.GetNameNodeRpc(); /* Test writing to finalized replicas */ Path file = new Path("dataprotocol.dat"); DFSTestUtil.CreateFile(fileSys, file, 1L, (short)numDataNodes, 0L); // get the first blockid for the file ExtendedBlock firstBlock = DFSTestUtil.GetFirstBlock(fileSys, file); // test getNewStampAndToken on a finalized block try { namenode.UpdateBlockForPipeline(firstBlock, string.Empty); NUnit.Framework.Assert.Fail("Can not get a new GS from a finalized block"); } catch (IOException e) { NUnit.Framework.Assert.IsTrue(e.Message.Contains("is not under Construction")); } // test getNewStampAndToken on a non-existent block try { long newBlockId = firstBlock.GetBlockId() + 1; ExtendedBlock newBlock = new ExtendedBlock(firstBlock.GetBlockPoolId(), newBlockId , 0, firstBlock.GetGenerationStamp()); namenode.UpdateBlockForPipeline(newBlock, string.Empty); NUnit.Framework.Assert.Fail("Cannot get a new GS from a non-existent block"); } catch (IOException e) { NUnit.Framework.Assert.IsTrue(e.Message.Contains("does not exist")); } /* Test RBW replicas */ // change first block to a RBW DFSOutputStream @out = null; try { @out = (DFSOutputStream)(fileSys.Append(file).GetWrappedStream()); @out.Write(1); @out.Hflush(); FSDataInputStream @in = null; try { @in = fileSys.Open(file); firstBlock = DFSTestUtil.GetAllBlocks(@in)[0].GetBlock(); } finally { IOUtils.CloseStream(@in); } // test non-lease holder DFSClient dfs = ((DistributedFileSystem)fileSys).dfs; try { namenode.UpdateBlockForPipeline(firstBlock, "test" + dfs.clientName); NUnit.Framework.Assert.Fail("Cannot get a new GS for a non lease holder"); } catch (LeaseExpiredException e) { NUnit.Framework.Assert.IsTrue(e.Message.StartsWith("Lease mismatch")); } // test null lease holder try { namenode.UpdateBlockForPipeline(firstBlock, null); NUnit.Framework.Assert.Fail("Cannot get a new GS for a null lease holder"); } catch (LeaseExpiredException e) { NUnit.Framework.Assert.IsTrue(e.Message.StartsWith("Lease mismatch")); } // test getNewStampAndToken on a rbw block namenode.UpdateBlockForPipeline(firstBlock, dfs.clientName); } finally { IOUtils.CloseStream(@out); } } finally { cluster.Shutdown(); } }
public virtual void TestChangedStorageId() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).NnTopology (MiniDFSNNTopology.SimpleHATopology()).Build(); try { cluster.TransitionToActive(0); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); OutputStream @out = fs.Create(filePath); @out.Write(Sharpen.Runtime.GetBytesForString("foo bar baz")); @out.Close(); HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1) ); // Change the gen stamp of the block on datanode to go back in time (gen // stamps start at 1000) ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, filePath); NUnit.Framework.Assert.IsTrue(cluster.ChangeGenStampOfBlock(0, block, 900)); // Stop the DN so the replica with the changed gen stamp will be reported // when this DN starts up. MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Restart the namenode so that when the DN comes up it will see an initial // block report. cluster.RestartNameNode(1, false); NUnit.Framework.Assert.IsTrue(cluster.RestartDataNode(dnProps, true)); // Wait until the standby NN queues up the corrupt block in the pending DN // message queue. while (cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount( ) < 1) { ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); } NUnit.Framework.Assert.AreEqual(1, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount ()); string oldStorageId = GetRegisteredDatanodeUid(cluster, 1); // Reformat/restart the DN. NUnit.Framework.Assert.IsTrue(WipeAndRestartDn(cluster, 0)); // Give the DN time to start up and register, which will cause the // DatanodeManager to dissociate the old storage ID from the DN xfer addr. string newStorageId = string.Empty; do { ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); newStorageId = GetRegisteredDatanodeUid(cluster, 1); System.Console.Out.WriteLine("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId); }while (newStorageId.Equals(oldStorageId)); NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount ()); // Now try to fail over. cluster.TransitionToStandby(0); cluster.TransitionToActive(1); } finally { cluster.Shutdown(); } }
/// <summary> /// Regression test for HDFS-7960.<p/> /// Shutting down a datanode, removing a storage directory, and restarting /// the DataNode should not produce zombie storages. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestRemovingStorageDoesNotProduceZombies() { Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 1); int NumStoragesPerDn = 2; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).StoragesPerDatanode (NumStoragesPerDn).Build(); try { cluster.WaitActive(); foreach (DataNode dn in cluster.GetDataNodes()) { NUnit.Framework.Assert.AreEqual(NumStoragesPerDn, cluster.GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanode(dn.GetDatanodeId()).GetStorageInfos().Length ); } // Create a file which will end up on all 3 datanodes. Path TestPath = new Path("/foo1"); DistributedFileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, 1024, (short)3, unchecked ((int)(0xcafecafe)) ); foreach (DataNode dn_1 in cluster.GetDataNodes()) { DataNodeTestUtils.TriggerBlockReport(dn_1); } ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, new Path("/foo1")); cluster.GetNamesystem().WriteLock(); string storageIdToRemove; string datanodeUuid; // Find the first storage which this block is in. try { IEnumerator <DatanodeStorageInfo> storageInfoIter = cluster.GetNamesystem().GetBlockManager ().GetStorages(block.GetLocalBlock()).GetEnumerator(); NUnit.Framework.Assert.IsTrue(storageInfoIter.HasNext()); DatanodeStorageInfo info = storageInfoIter.Next(); storageIdToRemove = info.GetStorageID(); datanodeUuid = info.GetDatanodeDescriptor().GetDatanodeUuid(); } finally { cluster.GetNamesystem().WriteUnlock(); } // Find the DataNode which holds that first storage. DataNode datanodeToRemoveStorageFrom; int datanodeToRemoveStorageFromIdx = 0; while (true) { if (datanodeToRemoveStorageFromIdx >= cluster.GetDataNodes().Count) { NUnit.Framework.Assert.Fail("failed to find datanode with uuid " + datanodeUuid); datanodeToRemoveStorageFrom = null; break; } DataNode dn_2 = cluster.GetDataNodes()[datanodeToRemoveStorageFromIdx]; if (dn_2.GetDatanodeUuid().Equals(datanodeUuid)) { datanodeToRemoveStorageFrom = dn_2; break; } datanodeToRemoveStorageFromIdx++; } // Find the volume within the datanode which holds that first storage. IList <FsVolumeSpi> volumes = datanodeToRemoveStorageFrom.GetFSDataset().GetVolumes (); NUnit.Framework.Assert.AreEqual(NumStoragesPerDn, volumes.Count); string volumeDirectoryToRemove = null; foreach (FsVolumeSpi volume in volumes) { if (volume.GetStorageID().Equals(storageIdToRemove)) { volumeDirectoryToRemove = volume.GetBasePath(); } } // Shut down the datanode and remove the volume. // Replace the volume directory with a regular file, which will // cause a volume failure. (If we merely removed the directory, // it would be re-initialized with a new storage ID.) NUnit.Framework.Assert.IsNotNull(volumeDirectoryToRemove); datanodeToRemoveStorageFrom.Shutdown(); FileUtil.FullyDelete(new FilePath(volumeDirectoryToRemove)); FileOutputStream fos = new FileOutputStream(volumeDirectoryToRemove); try { fos.Write(1); } finally { fos.Close(); } cluster.RestartDataNode(datanodeToRemoveStorageFromIdx); // Wait for the NameNode to remove the storage. Log.Info("waiting for the datanode to remove " + storageIdToRemove); GenericTestUtils.WaitFor(new _Supplier_227(cluster, datanodeToRemoveStorageFrom, storageIdToRemove, NumStoragesPerDn), 10, 30000); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <exception cref="System.IO.IOException"/> public virtual void TestHandleTruncatedBlockFile() { MiniDFSCluster cluster = null; HdfsConfiguration conf = new HdfsConfiguration(); conf.SetBoolean(DFSConfigKeys.DfsClientReadShortcircuitKey, true); conf.SetBoolean(DFSConfigKeys.DfsClientReadShortcircuitSkipChecksumKey, false); conf.Set(DFSConfigKeys.DfsDomainSocketPathKey, "/tmp/testHandleTruncatedBlockFile._PORT" ); conf.Set(DFSConfigKeys.DfsChecksumTypeKey, "CRC32C"); Path TestPath = new Path("/a"); Path TestPath2 = new Path("/b"); long RandomSeed = 4567L; long RandomSeed2 = 4568L; FSDataInputStream fsIn = null; int TestLength = 3456; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestLength, (short)1, RandomSeed); DFSTestUtil.CreateFile(fs, TestPath2, TestLength, (short)1, RandomSeed2); fsIn = cluster.GetFileSystem().Open(TestPath2); byte[] original = new byte[TestLength]; IOUtils.ReadFully(fsIn, original, 0, TestLength); fsIn.Close(); fsIn = null; try { DFSTestUtil.WaitReplication(fs, TestPath, (short)1); } catch (Exception e) { NUnit.Framework.Assert.Fail("unexpected InterruptedException during " + "waitReplication: " + e); } catch (TimeoutException e) { NUnit.Framework.Assert.Fail("unexpected TimeoutException during " + "waitReplication: " + e); } ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, TestPath); FilePath dataFile = cluster.GetBlockFile(0, block); cluster.Shutdown(); cluster = null; RandomAccessFile raf = null; try { raf = new RandomAccessFile(dataFile, "rw"); raf.SetLength(0); } finally { if (raf != null) { raf.Close(); } } cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Format(false).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); fsIn = fs.Open(TestPath); try { byte[] buf = new byte[100]; fsIn.Seek(2000); fsIn.ReadFully(buf, 0, buf.Length); NUnit.Framework.Assert.Fail("shouldn't be able to read from corrupt 0-length " + "block file."); } catch (IOException e) { DFSClient.Log.Error("caught exception ", e); } fsIn.Close(); fsIn = null; // We should still be able to read the other file. // This is important because it indicates that we detected that the // previous block was corrupt, rather than blaming the problem on // communication. fsIn = fs.Open(TestPath2); byte[] buf_1 = new byte[original.Length]; fsIn.ReadFully(buf_1, 0, buf_1.Length); TestBlockReaderLocal.AssertArrayRegionsEqual(original, 0, buf_1, 0, original.Length ); fsIn.Close(); fsIn = null; } finally { if (fsIn != null) { fsIn.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestDataTransferProtocol() { Random random = new Random(); int oneMil = 1024 * 1024; Path file = new Path("dataprotocol.dat"); int numDataNodes = 1; Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsReplicationKey, numDataNodes); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDataNodes ).Build(); try { cluster.WaitActive(); datanode = cluster.GetFileSystem().GetDataNodeStats(HdfsConstants.DatanodeReportType .Live)[0]; dnAddr = NetUtils.CreateSocketAddr(datanode.GetXferAddr()); FileSystem fileSys = cluster.GetFileSystem(); int fileLen = Math.Min(conf.GetInt(DFSConfigKeys.DfsBlockSizeKey, 4096), 4096); CreateFile(fileSys, file, fileLen); // get the first blockid for the file ExtendedBlock firstBlock = DFSTestUtil.GetFirstBlock(fileSys, file); string poolId = firstBlock.GetBlockPoolId(); long newBlockId = firstBlock.GetBlockId() + 1; recvBuf.Reset(); sendBuf.Reset(); // bad version recvOut.WriteShort((short)(DataTransferProtocol.DataTransferVersion - 1)); sendOut.WriteShort((short)(DataTransferProtocol.DataTransferVersion - 1)); SendRecvData("Wrong Version", true); // bad ops sendBuf.Reset(); sendOut.WriteShort((short)DataTransferProtocol.DataTransferVersion); sendOut.WriteByte(OP.WriteBlock.code - 1); SendRecvData("Wrong Op Code", true); /* Test OP_WRITE_BLOCK */ sendBuf.Reset(); DataChecksum badChecksum = Org.Mockito.Mockito.Spy(DefaultChecksum); Org.Mockito.Mockito.DoReturn(-1).When(badChecksum).GetBytesPerChecksum(); WriteBlock(poolId, newBlockId, badChecksum); recvBuf.Reset(); SendResponse(DataTransferProtos.Status.Error, null, null, recvOut); SendRecvData("wrong bytesPerChecksum while writing", true); sendBuf.Reset(); recvBuf.Reset(); WriteBlock(poolId, ++newBlockId, DefaultChecksum); PacketHeader hdr = new PacketHeader(4, 0, 100, false, -1 - random.Next(oneMil), false ); // size of packet // offset in block, // seqno // last packet // bad datalen hdr.Write(sendOut); SendResponse(DataTransferProtos.Status.Success, string.Empty, null, recvOut); new PipelineAck(100, new int[] { PipelineAck.CombineHeader(PipelineAck.ECN.Disabled , DataTransferProtos.Status.Error) }).Write(recvOut); SendRecvData("negative DATA_CHUNK len while writing block " + newBlockId, true); // test for writing a valid zero size block sendBuf.Reset(); recvBuf.Reset(); WriteBlock(poolId, ++newBlockId, DefaultChecksum); hdr = new PacketHeader(8, 0, 100, true, 0, false); // size of packet // OffsetInBlock // sequencenumber // lastPacketInBlock // chunk length hdr.Write(sendOut); sendOut.WriteInt(0); // zero checksum sendOut.Flush(); //ok finally write a block with 0 len SendResponse(DataTransferProtos.Status.Success, string.Empty, null, recvOut); new PipelineAck(100, new int[] { PipelineAck.CombineHeader(PipelineAck.ECN.Disabled , DataTransferProtos.Status.Success) }).Write(recvOut); SendRecvData("Writing a zero len block blockid " + newBlockId, false); /* Test OP_READ_BLOCK */ string bpid = cluster.GetNamesystem().GetBlockPoolId(); ExtendedBlock blk = new ExtendedBlock(bpid, firstBlock.GetLocalBlock()); long blkid = blk.GetBlockId(); // bad block id sendBuf.Reset(); recvBuf.Reset(); blk.SetBlockId(blkid - 1); sender.ReadBlock(blk, BlockTokenSecretManager.DummyToken, "cl", 0L, fileLen, true , CachingStrategy.NewDefaultStrategy()); SendRecvData("Wrong block ID " + newBlockId + " for read", false); // negative block start offset -1L sendBuf.Reset(); blk.SetBlockId(blkid); sender.ReadBlock(blk, BlockTokenSecretManager.DummyToken, "cl", -1L, fileLen, true , CachingStrategy.NewDefaultStrategy()); SendRecvData("Negative start-offset for read for block " + firstBlock.GetBlockId( ), false); // bad block start offset sendBuf.Reset(); sender.ReadBlock(blk, BlockTokenSecretManager.DummyToken, "cl", fileLen, fileLen, true, CachingStrategy.NewDefaultStrategy()); SendRecvData("Wrong start-offset for reading block " + firstBlock.GetBlockId(), false ); // negative length is ok. Datanode assumes we want to read the whole block. recvBuf.Reset(); ((DataTransferProtos.BlockOpResponseProto)DataTransferProtos.BlockOpResponseProto .NewBuilder().SetStatus(DataTransferProtos.Status.Success).SetReadOpChecksumInfo (DataTransferProtos.ReadOpChecksumInfoProto.NewBuilder().SetChecksum(DataTransferProtoUtil .ToProto(DefaultChecksum)).SetChunkOffset(0L)).Build()).WriteDelimitedTo(recvOut ); sendBuf.Reset(); sender.ReadBlock(blk, BlockTokenSecretManager.DummyToken, "cl", 0L, -1L - random. Next(oneMil), true, CachingStrategy.NewDefaultStrategy()); SendRecvData("Negative length for reading block " + firstBlock.GetBlockId(), false ); // length is more than size of block. recvBuf.Reset(); SendResponse(DataTransferProtos.Status.Error, null, "opReadBlock " + firstBlock + " received exception java.io.IOException: " + "Offset 0 and length 4097 don't match block " + firstBlock + " ( blockLen 4096 )", recvOut); sendBuf.Reset(); sender.ReadBlock(blk, BlockTokenSecretManager.DummyToken, "cl", 0L, fileLen + 1, true, CachingStrategy.NewDefaultStrategy()); SendRecvData("Wrong length for reading block " + firstBlock.GetBlockId(), false); //At the end of all this, read the file to make sure that succeeds finally. sendBuf.Reset(); sender.ReadBlock(blk, BlockTokenSecretManager.DummyToken, "cl", 0L, fileLen, true , CachingStrategy.NewDefaultStrategy()); ReadFile(fileSys, file, fileLen); } finally { cluster.Shutdown(); } }
public virtual void TestOpWrite() { int numDataNodes = 1; long BlockIdFudge = 128; Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDataNodes ).Build(); try { cluster.WaitActive(); string poolId = cluster.GetNamesystem().GetBlockPoolId(); datanode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes()[0], poolId ); dnAddr = NetUtils.CreateSocketAddr(datanode.GetXferAddr()); FileSystem fileSys = cluster.GetFileSystem(); /* Test writing to finalized replicas */ Path file = new Path("dataprotocol.dat"); DFSTestUtil.CreateFile(fileSys, file, 1L, (short)numDataNodes, 0L); // get the first blockid for the file ExtendedBlock firstBlock = DFSTestUtil.GetFirstBlock(fileSys, file); // test PIPELINE_SETUP_CREATE on a finalized block TestWrite(firstBlock, BlockConstructionStage.PipelineSetupCreate, 0L, "Cannot create an existing block" , true); // test PIPELINE_DATA_STREAMING on a finalized block TestWrite(firstBlock, BlockConstructionStage.DataStreaming, 0L, "Unexpected stage" , true); // test PIPELINE_SETUP_STREAMING_RECOVERY on an existing block long newGS = firstBlock.GetGenerationStamp() + 1; TestWrite(firstBlock, BlockConstructionStage.PipelineSetupStreamingRecovery, newGS , "Cannot recover data streaming to a finalized replica", true); // test PIPELINE_SETUP_APPEND on an existing block newGS = firstBlock.GetGenerationStamp() + 1; TestWrite(firstBlock, BlockConstructionStage.PipelineSetupAppend, newGS, "Append to a finalized replica" , false); firstBlock.SetGenerationStamp(newGS); // test PIPELINE_SETUP_APPEND_RECOVERY on an existing block file = new Path("dataprotocol1.dat"); DFSTestUtil.CreateFile(fileSys, file, 1L, (short)numDataNodes, 0L); firstBlock = DFSTestUtil.GetFirstBlock(fileSys, file); newGS = firstBlock.GetGenerationStamp() + 1; TestWrite(firstBlock, BlockConstructionStage.PipelineSetupAppendRecovery, newGS, "Recover appending to a finalized replica", false); // test PIPELINE_CLOSE_RECOVERY on an existing block file = new Path("dataprotocol2.dat"); DFSTestUtil.CreateFile(fileSys, file, 1L, (short)numDataNodes, 0L); firstBlock = DFSTestUtil.GetFirstBlock(fileSys, file); newGS = firstBlock.GetGenerationStamp() + 1; TestWrite(firstBlock, BlockConstructionStage.PipelineCloseRecovery, newGS, "Recover failed close to a finalized replica" , false); firstBlock.SetGenerationStamp(newGS); // Test writing to a new block. Don't choose the next sequential // block ID to avoid conflicting with IDs chosen by the NN. long newBlockId = firstBlock.GetBlockId() + BlockIdFudge; ExtendedBlock newBlock = new ExtendedBlock(firstBlock.GetBlockPoolId(), newBlockId , 0, firstBlock.GetGenerationStamp()); // test PIPELINE_SETUP_CREATE on a new block TestWrite(newBlock, BlockConstructionStage.PipelineSetupCreate, 0L, "Create a new block" , false); // test PIPELINE_SETUP_STREAMING_RECOVERY on a new block newGS = newBlock.GetGenerationStamp() + 1; newBlock.SetBlockId(newBlock.GetBlockId() + 1); TestWrite(newBlock, BlockConstructionStage.PipelineSetupStreamingRecovery, newGS, "Recover a new block", true); // test PIPELINE_SETUP_APPEND on a new block newGS = newBlock.GetGenerationStamp() + 1; TestWrite(newBlock, BlockConstructionStage.PipelineSetupAppend, newGS, "Cannot append to a new block" , true); // test PIPELINE_SETUP_APPEND_RECOVERY on a new block newBlock.SetBlockId(newBlock.GetBlockId() + 1); newGS = newBlock.GetGenerationStamp() + 1; TestWrite(newBlock, BlockConstructionStage.PipelineSetupAppendRecovery, newGS, "Cannot append to a new block" , true); /* Test writing to RBW replicas */ Path file1 = new Path("dataprotocol1.dat"); DFSTestUtil.CreateFile(fileSys, file1, 1L, (short)numDataNodes, 0L); DFSOutputStream @out = (DFSOutputStream)(fileSys.Append(file1).GetWrappedStream() ); @out.Write(1); @out.Hflush(); FSDataInputStream @in = fileSys.Open(file1); firstBlock = DFSTestUtil.GetAllBlocks(@in)[0].GetBlock(); firstBlock.SetNumBytes(2L); try { // test PIPELINE_SETUP_CREATE on a RBW block TestWrite(firstBlock, BlockConstructionStage.PipelineSetupCreate, 0L, "Cannot create a RBW block" , true); // test PIPELINE_SETUP_APPEND on an existing block newGS = firstBlock.GetGenerationStamp() + 1; TestWrite(firstBlock, BlockConstructionStage.PipelineSetupAppend, newGS, "Cannot append to a RBW replica" , true); // test PIPELINE_SETUP_APPEND on an existing block TestWrite(firstBlock, BlockConstructionStage.PipelineSetupAppendRecovery, newGS, "Recover append to a RBW replica", false); firstBlock.SetGenerationStamp(newGS); // test PIPELINE_SETUP_STREAMING_RECOVERY on a RBW block file = new Path("dataprotocol2.dat"); DFSTestUtil.CreateFile(fileSys, file, 1L, (short)numDataNodes, 0L); @out = (DFSOutputStream)(fileSys.Append(file).GetWrappedStream()); @out.Write(1); @out.Hflush(); @in = fileSys.Open(file); firstBlock = DFSTestUtil.GetAllBlocks(@in)[0].GetBlock(); firstBlock.SetNumBytes(2L); newGS = firstBlock.GetGenerationStamp() + 1; TestWrite(firstBlock, BlockConstructionStage.PipelineSetupStreamingRecovery, newGS , "Recover a RBW replica", false); } finally { IOUtils.CloseStream(@in); IOUtils.CloseStream(@out); } } finally { cluster.Shutdown(); } }