public virtual void TestReduceReplFactorRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); string[] racks = new string[] { "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Decrease the replication factor, make sure the deleted replica // was not the one that lived on the rack with only one replica, // ie we should still have 2 racks after reducing the repl factor. ReplicationFactor = 2; NameNodeAdapter.SetReplication(ns, "/testFile", ReplicationFactor); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestUnderReplicatedUsesNewRacks() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); // All datanodes are on the same rack string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack1", "/rack1" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 0); // Add new datanodes on a different rack and increase the // replication factor so the block is underreplicated and make // sure at least one of the hosts on the new rack is used. string[] newRacks = new string[] { "/rack2", "/rack2" }; cluster.StartDataNodes(conf, 2, true, null, newRacks); ReplicationFactor = 5; NameNodeAdapter.SetReplication(ns, "/testFile", ReplicationFactor); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestSufficientlySingleReplBlockUsesNewRack() { Configuration conf = GetConf(); short ReplicationFactor = 1; Path filePath = new Path("/testFile"); string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); try { // Create a file with one block with a replication factor of 1 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 0); ReplicationFactor = 2; NameNodeAdapter.SetReplication(ns, "/testFile", ReplicationFactor); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public bool Get() { try { LocatedBlocks locs = NameNodeAdapter.GetBlockLocations(nn, path, 0, 1000); DatanodeInfo[] dnis = locs.GetLastLocatedBlock().GetLocations(); foreach (DatanodeInfo dni in dnis) { NUnit.Framework.Assert.IsNotNull(dni); } int numReplicas = dnis.Length; Org.Apache.Hadoop.Hdfs.Server.Namenode.HA.TestStandbyIsHot.Log.Info("Got " + numReplicas + " locs: " + locs); if (numReplicas > expectedReplicas) { cluster.TriggerDeletionReports(); } cluster.TriggerHeartbeats(); return(numReplicas == expectedReplicas); } catch (IOException e) { Org.Apache.Hadoop.Hdfs.Server.Namenode.HA.TestStandbyIsHot.Log.Warn("No block locations yet: " + e.Message); return(false); } }
/// <summary>Make sure the client retries when the active NN is in safemode</summary> /// <exception cref="System.Exception"/> public virtual void TestClientRetrySafeMode() { IDictionary <Path, bool> results = Collections.SynchronizedMap(new Dictionary <Path , bool>()); Path test = new Path("/test"); // let nn0 enter safemode NameNodeAdapter.EnterSafeMode(nn0, false); FSNamesystem.SafeModeInfo safeMode = (FSNamesystem.SafeModeInfo)Whitebox.GetInternalState (nn0.GetNamesystem(), "safeMode"); Whitebox.SetInternalState(safeMode, "extension", Sharpen.Extensions.ValueOf(30000 )); Log.Info("enter safemode"); new _Thread_133(this, test, results).Start(); // make sure the client's call has actually been handled by the active NN NUnit.Framework.Assert.IsFalse("The directory should not be created while NN in safemode" , fs.Exists(test)); Sharpen.Thread.Sleep(1000); // let nn0 leave safemode NameNodeAdapter.LeaveSafeMode(nn0); Log.Info("leave safemode"); lock (this) { while (!results.Contains(test)) { Sharpen.Runtime.Wait(this); } NUnit.Framework.Assert.IsTrue(results[test]); } }
public virtual void TestStartup() { Configuration conf = new Configuration(); HAUtil.SetAllowStandbyReads(conf, true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(0).Build(); try { // During HA startup, both nodes should be in // standby and we shouldn't have any edits files // in any edits directory! IList <URI> allDirs = Lists.NewArrayList(); Sharpen.Collections.AddAll(allDirs, cluster.GetNameDirs(0)); Sharpen.Collections.AddAll(allDirs, cluster.GetNameDirs(1)); allDirs.AddItem(cluster.GetSharedEditsDir(0, 1)); AssertNoEditFiles(allDirs); // Set the first NN to active, make sure it creates edits // in its own dirs and the shared dir. The standby // should still have no edits! cluster.TransitionToActive(0); AssertEditFiles(cluster.GetNameDirs(0), NNStorage.GetInProgressEditsFileName(1)); AssertEditFiles(Sharpen.Collections.SingletonList(cluster.GetSharedEditsDir(0, 1) ), NNStorage.GetInProgressEditsFileName(1)); AssertNoEditFiles(cluster.GetNameDirs(1)); cluster.GetNameNode(0).GetRpcServer().Mkdirs("/test", FsPermission.CreateImmutable ((short)0x1ed), true); // Restarting the standby should not finalize any edits files // in the shared directory when it starts up! cluster.RestartNameNode(1); AssertEditFiles(cluster.GetNameDirs(0), NNStorage.GetInProgressEditsFileName(1)); AssertEditFiles(Sharpen.Collections.SingletonList(cluster.GetSharedEditsDir(0, 1) ), NNStorage.GetInProgressEditsFileName(1)); AssertNoEditFiles(cluster.GetNameDirs(1)); // Additionally it should not have applied any in-progress logs // at start-up -- otherwise, it would have read half-way into // the current log segment, and on the next roll, it would have to // either replay starting in the middle of the segment (not allowed) // or double-replay the edits (incorrect). NUnit.Framework.Assert.IsNull(NameNodeAdapter.GetFileInfo(cluster.GetNameNode(1), "/test", true)); cluster.GetNameNode(0).GetRpcServer().Mkdirs("/test2", FsPermission.CreateImmutable ((short)0x1ed), true); // If we restart NN0, it'll come back as standby, and we can // transition NN1 to active and make sure it reads edits correctly at this point. cluster.RestartNameNode(0); cluster.TransitionToActive(1); // NN1 should have both the edits that came before its restart, and the edits that // came after its restart. NUnit.Framework.Assert.IsNotNull(NameNodeAdapter.GetFileInfo(cluster.GetNameNode( 1), "/test", true)); NUnit.Framework.Assert.IsNotNull(NameNodeAdapter.GetFileInfo(cluster.GetNameNode( 1), "/test2", true)); } finally { cluster.Shutdown(); } }
public virtual void TestMissingBlock() { // Create a file with single block with two replicas Path file = GetTestPath("testMissingBlocks"); CreateFile(file, 100, (short)1); // Corrupt the only replica of the block to result in a missing block LocatedBlock block = NameNodeAdapter.GetBlockLocations(cluster.GetNameNode(), file .ToString(), 0, 1).Get(0); cluster.GetNamesystem().WriteLock(); try { bm.FindAndMarkBlockAsCorrupt(block.GetBlock(), block.GetLocations()[0], "STORAGE_ID" , "TEST"); } finally { cluster.GetNamesystem().WriteUnlock(); } UpdateMetrics(); MetricsRecordBuilder rb = MetricsAsserts.GetMetrics(NsMetrics); MetricsAsserts.AssertGauge("UnderReplicatedBlocks", 1L, rb); MetricsAsserts.AssertGauge("MissingBlocks", 1L, rb); MetricsAsserts.AssertGauge("MissingReplOneBlocks", 1L, rb); fs.Delete(file, true); WaitForDnMetricValue(NsMetrics, "UnderReplicatedBlocks", 0L); }
/// <summary> /// Test that the NN initializes its under-replicated blocks queue /// before it is ready to exit safemode (HDFS-1476) /// </summary> /// <exception cref="System.Exception"/> public virtual void TestInitializeReplQueuesEarly() { Log.Info("Starting testInitializeReplQueuesEarly"); // Spray the blocks around the cluster when we add DNs instead of // concentrating all blocks on the first node. BlockManagerTestUtil.SetWritingPrefersLocalNode(cluster.GetNamesystem().GetBlockManager (), false); cluster.StartDataNodes(conf, 2, true, HdfsServerConstants.StartupOption.Regular, null); cluster.WaitActive(); Log.Info("Creating files"); DFSTestUtil.CreateFile(fs, TestPath, 15 * BlockSize, (short)1, 1L); Log.Info("Stopping all DataNodes"); IList <MiniDFSCluster.DataNodeProperties> dnprops = Lists.NewLinkedList(); dnprops.AddItem(cluster.StopDataNode(0)); dnprops.AddItem(cluster.StopDataNode(0)); dnprops.AddItem(cluster.StopDataNode(0)); cluster.GetConfiguration(0).SetFloat(DFSConfigKeys.DfsNamenodeReplQueueThresholdPctKey , 1f / 15f); Log.Info("Restarting NameNode"); cluster.RestartNameNode(); NameNode nn = cluster.GetNameNode(); string status = nn.GetNamesystem().GetSafemode(); NUnit.Framework.Assert.AreEqual("Safe mode is ON. The reported blocks 0 needs additional " + "15 blocks to reach the threshold 0.9990 of total blocks 15." + Newline + "The number of live datanodes 0 has reached the minimum number 0. " + "Safe mode will be turned off automatically once the thresholds " + "have been reached." , status); NUnit.Framework.Assert.IsFalse("Mis-replicated block queues should not be initialized " + "until threshold is crossed", NameNodeAdapter.SafeModeInitializedReplQueues(nn )); Log.Info("Restarting one DataNode"); cluster.RestartDataNode(dnprops.Remove(0)); // Wait for block reports from all attached storages of // the restarted DN to come in. GenericTestUtils.WaitFor(new _Supplier_214(this), 10, 10000); int safe = NameNodeAdapter.GetSafeModeSafeBlocks(nn); NUnit.Framework.Assert.IsTrue("Expected first block report to make some blocks safe." , safe > 0); NUnit.Framework.Assert.IsTrue("Did not expect first block report to make all blocks safe." , safe < 15); NUnit.Framework.Assert.IsTrue(NameNodeAdapter.SafeModeInitializedReplQueues(nn)); // Ensure that UnderReplicatedBlocks goes up to 15 - safe. Misreplicated // blocks are processed asynchronously so this may take a few seconds. // Failure here will manifest as a test timeout. BlockManagerTestUtil.UpdateState(nn.GetNamesystem().GetBlockManager()); long underReplicatedBlocks = nn.GetNamesystem().GetUnderReplicatedBlocks(); while (underReplicatedBlocks != (15 - safe)) { Log.Info("UnderReplicatedBlocks expected=" + (15 - safe) + ", actual=" + underReplicatedBlocks ); Sharpen.Thread.Sleep(100); BlockManagerTestUtil.UpdateState(nn.GetNamesystem().GetBlockManager()); underReplicatedBlocks = nn.GetNamesystem().GetUnderReplicatedBlocks(); } cluster.RestartDataNodes(); }
/// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> /// <exception cref="System.Exception"/> private void AssertCanStartHANameNodes(MiniDFSCluster cluster, Configuration conf , string path) { // Now should be able to start both NNs. Pass "false" here so that we don't // try to waitActive on all NNs, since the second NN doesn't exist yet. cluster.RestartNameNode(0, false); cluster.RestartNameNode(1, true); // Make sure HA is working. cluster.GetNameNode(0).GetRpcServer().TransitionToActive(new HAServiceProtocol.StateChangeRequestInfo (HAServiceProtocol.RequestSource.RequestByUser)); FileSystem fs = null; try { Path newPath = new Path(path); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(newPath)); HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1) ); NUnit.Framework.Assert.IsTrue(NameNodeAdapter.GetFileInfo(cluster.GetNameNode(1), newPath.ToString(), false).IsDir()); } finally { if (fs != null) { fs.Close(); } } }
public virtual void TestOpenFilesWithRename() { Path path = new Path("/test"); DoWriteAndAbort(fs, path); // check for zero sized blocks Path fileWithEmptyBlock = new Path("/test/test/test4"); fs.Create(fileWithEmptyBlock); NamenodeProtocols nameNodeRpc = cluster.GetNameNodeRpc(); string clientName = fs.GetClient().GetClientName(); // create one empty block nameNodeRpc.AddBlock(fileWithEmptyBlock.ToString(), clientName, null, null, INodeId .GrandfatherInodeId, null); fs.CreateSnapshot(path, "s2"); fs.Rename(new Path("/test/test"), new Path("/test/test-renamed")); fs.Delete(new Path("/test/test-renamed"), true); NameNode nameNode = cluster.GetNameNode(); NameNodeAdapter.EnterSafeMode(nameNode, false); NameNodeAdapter.SaveNamespace(nameNode); NameNodeAdapter.LeaveSafeMode(nameNode); cluster.RestartNameNode(true); }
/// <exception cref="System.IO.IOException"/> public static void WriteOp(EditLogOutputStream stm, long txid) { FSEditLogOp op = NameNodeAdapter.CreateMkdirOp("tx " + txid); op.SetTransactionId(txid); stm.Write(op); }
/// <summary> /// Test that when access time updates are not needed, the FSNamesystem /// write lock is not taken by getBlockLocations. /// </summary> /// <remarks> /// Test that when access time updates are not needed, the FSNamesystem /// write lock is not taken by getBlockLocations. /// Regression test for HDFS-3981. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual void TestGetBlockLocationsOnlyUsesReadLock() { Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsNamenodeAccesstimePrecisionKey, 100 * 1000); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Build(); ReentrantReadWriteLock spyLock = NameNodeAdapter.SpyOnFsLock(cluster.GetNamesystem ()); try { // Create empty file in the FSN. Path p = new Path("/empty-file"); DFSTestUtil.CreateFile(cluster.GetFileSystem(), p, 0, (short)1, 0L); // getBlockLocations() should not need the write lock, since we just created // the file (and thus its access time is already within the 100-second // accesstime precision configured above). MockitoUtil.DoThrowWhenCallStackMatches(new Exception("Should not need write lock" ), ".*getBlockLocations.*").When(spyLock).WriteLock(); cluster.GetFileSystem().GetFileBlockLocations(p, 0, 100); } finally { cluster.Shutdown(); } }
public virtual void TestEnterSafeModeInSBNShouldNotThrowNPE() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 3 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup and enter safemode. nn0.GetRpcServer().RollEditLog(); Banner("Creating some blocks that won't be in the edit log"); DFSTestUtil.CreateFile(fs, new Path("/test2"), 5 * BlockSize, (short)3, 1L); Banner("Deleting the original blocks"); fs.Delete(new Path("/test"), true); Banner("Restarting standby"); RestartStandby(); FSNamesystem namesystem = nn1.GetNamesystem(); string status = namesystem.GetSafemode(); NUnit.Framework.Assert.IsTrue("Bad safemode status: '" + status + "'", status.StartsWith ("Safe mode is ON.")); NameNodeAdapter.EnterSafeMode(nn1, false); NUnit.Framework.Assert.IsTrue("Failed to enter into safemode in standby", namesystem .IsInSafeMode()); NameNodeAdapter.EnterSafeMode(nn1, false); NUnit.Framework.Assert.IsTrue("Failed to enter into safemode in standby", namesystem .IsInSafeMode()); }
/// <summary> /// Regression test for HDFS-2693: when doing state transitions, we need to /// lock the FSNamesystem so that we don't end up doing any writes while it's /// "in between" states. /// </summary> /// <remarks> /// Regression test for HDFS-2693: when doing state transitions, we need to /// lock the FSNamesystem so that we don't end up doing any writes while it's /// "in between" states. /// This test case starts up several client threads which do mutation operations /// while flipping a NN back and forth from active to standby. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestTransitionSynchronization() { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(0).Build(); try { cluster.WaitActive(); ReentrantReadWriteLock spyLock = NameNodeAdapter.SpyOnFsLock(cluster.GetNameNode( 0).GetNamesystem()); Org.Mockito.Mockito.DoAnswer(new GenericTestUtils.SleepAnswer(50)).When(spyLock). WriteLock(); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(); for (int i = 0; i < 50; i++) { int finalI = i; ctx.AddThread(new _RepeatingTestThread_256(finalI, fs, ctx)); } ctx.AddThread(new _RepeatingTestThread_266(cluster, ctx)); ctx.StartThreads(); ctx.WaitFor(20000); ctx.Stop(); } finally { cluster.Shutdown(); } }
public virtual void TestGetServiceState() { NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn1")); NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn2")); cluster.TransitionToActive(0); NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn1")); NameNodeAdapter.EnterSafeMode(cluster.GetNameNode(0), false); NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn1")); }
public virtual void TestTryFailoverToSafeMode() { conf.Set(DFSConfigKeys.DfsHaFenceMethodsKey, TestDFSHAAdmin.GetFencerTrueCommand( )); tool.SetConf(conf); NameNodeAdapter.EnterSafeMode(cluster.GetNameNode(0), false); NUnit.Framework.Assert.AreEqual(-1, RunTool("-failover", "nn2", "nn1")); NUnit.Framework.Assert.IsTrue("Bad output: " + errOutput, errOutput.Contains("is not ready to become active: " + "The NameNode is in safemode")); }
public virtual void TestReplicationError() { // create a file of replication factor of 1 Path fileName = new Path("/test.txt"); int fileLen = 1; DFSTestUtil.CreateFile(fs, fileName, 1, (short)1, 1L); DFSTestUtil.WaitReplication(fs, fileName, (short)1); // get the block belonged to the created file LocatedBlocks blocks = NameNodeAdapter.GetBlockLocations(cluster.GetNameNode(), fileName .ToString(), 0, (long)fileLen); NUnit.Framework.Assert.AreEqual("Should only find 1 block", blocks.LocatedBlockCount (), 1); LocatedBlock block = blocks.Get(0); // bring up a second datanode cluster.StartDataNodes(conf, 1, true, null, null); cluster.WaitActive(); int sndNode = 1; DataNode datanode = cluster.GetDataNodes()[sndNode]; // replicate the block to the second datanode IPEndPoint target = datanode.GetXferAddress(); Socket s = Sharpen.Extensions.CreateSocket(target.Address, target.Port); // write the header. DataOutputStream @out = new DataOutputStream(s.GetOutputStream()); DataChecksum checksum = DataChecksum.NewDataChecksum(DataChecksum.Type.Crc32, 512 ); new Sender(@out).WriteBlock(block.GetBlock(), StorageType.Default, BlockTokenSecretManager .DummyToken, string.Empty, new DatanodeInfo[0], new StorageType[0], null, BlockConstructionStage .PipelineSetupCreate, 1, 0L, 0L, 0L, checksum, CachingStrategy.NewDefaultStrategy (), false, false, null); @out.Flush(); // close the connection before sending the content of the block @out.Close(); // the temporary block & meta files should be deleted string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath storageDir = cluster.GetInstanceStorageDir(sndNode, 0); FilePath dir1 = MiniDFSCluster.GetRbwDir(storageDir, bpid); storageDir = cluster.GetInstanceStorageDir(sndNode, 1); FilePath dir2 = MiniDFSCluster.GetRbwDir(storageDir, bpid); while (dir1.ListFiles().Length != 0 || dir2.ListFiles().Length != 0) { Sharpen.Thread.Sleep(100); } // then increase the file's replication factor fs.SetReplication(fileName, (short)2); // replication should succeed DFSTestUtil.WaitReplication(fs, fileName, (short)1); // clean up the file fs.Delete(fileName, false); }
public virtual void TestDnFencing() { // Create a file with replication level 3. DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)3, 1L); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, TestFilePath); // Drop its replication count to 1, so it becomes over-replicated. // Then compute the invalidation of the extra blocks and trigger // heartbeats so the invalidations are flushed to the DNs. nn1.GetRpcServer().SetReplication(TestFile, (short)1); BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager( )); cluster.TriggerHeartbeats(); // Transition nn2 to active even though nn1 still thinks it's active. Banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.AbortEditLogs(nn1); NameNodeAdapter.EnterSafeMode(nn1, false); cluster.TransitionToActive(1); // Check that the standby picked up the replication change. NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication ()); // Dump some info for debugging purposes. Banner("NN2 Metadata immediately after failover"); DoMetasave(nn2); Banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); Banner("Metadata after nodes have all block-reported"); DoMetasave(nn2); // Force a rescan of postponedMisreplicatedBlocks. BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager(); BlockManagerTestUtil.CheckHeartbeat(nn2BM); BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM); // The blocks should no longer be postponed. NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks ()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager( )); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks ()); Banner("Making sure the file is still readable"); FileSystem fs2 = cluster.GetFileSystem(1); DFSTestUtil.ReadFile(fs2, TestFilePath); Banner("Waiting for the actual block files to get deleted from DNs."); WaitForTrueReplication(cluster, block, 1); }
/// <exception cref="System.IO.IOException"/> private TestFailureToReadEdits.LimitedEditLogAnswer CauseFailureOnEditLogRead() { FSEditLog spyEditLog = NameNodeAdapter.SpyOnEditLog(nn1); TestFailureToReadEdits.LimitedEditLogAnswer answer = new TestFailureToReadEdits.LimitedEditLogAnswer (); Org.Mockito.Mockito.DoAnswer(answer).When(spyEditLog).SelectInputStreams(Matchers.AnyLong (), Matchers.AnyLong(), (MetaRecoveryContext)Matchers.AnyObject(), Matchers.AnyBoolean ()); return(answer); }
/// <summary>Restart the cluster, optionally saving a new checkpoint.</summary> /// <param name="checkpoint">boolean true to save a new checkpoint</param> /// <exception cref="System.Exception">if restart fails</exception> private static void Restart(bool checkpoint) { NameNode nameNode = cluster.GetNameNode(); if (checkpoint) { NameNodeAdapter.EnterSafeMode(nameNode, false); NameNodeAdapter.SaveNamespace(nameNode); } Shutdown(); InitCluster(false); }
/// <exception cref="System.Exception"/> public static byte[] CreateTxnData(int startTxn, int numTxns) { DataOutputBuffer buf = new DataOutputBuffer(); FSEditLogOp.Writer writer = new FSEditLogOp.Writer(buf); for (long txid = startTxn; txid < startTxn + numTxns; txid++) { FSEditLogOp op = NameNodeAdapter.CreateMkdirOp("tx " + txid); op.SetTransactionId(txid); writer.WriteOp(op); } return(Arrays.CopyOf(buf.GetData(), buf.GetLength())); }
public virtual void SetUp() { config = new HdfsConfiguration(); config.SetBoolean(DFSConfigKeys.DfsWebhdfsEnabledKey, true); config.SetLong(DFSConfigKeys.DfsNamenodeDelegationTokenMaxLifetimeKey, 10000); config.SetLong(DFSConfigKeys.DfsNamenodeDelegationTokenRenewIntervalKey, 5000); config.SetBoolean(DFSConfigKeys.DfsNamenodeDelegationTokenAlwaysUseKey, true); config.Set("hadoop.security.auth_to_local", "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT"); FileSystem.SetDefaultUri(config, "hdfs://localhost:" + "0"); cluster = new MiniDFSCluster.Builder(config).NumDataNodes(0).Build(); cluster.WaitActive(); dtSecretManager = NameNodeAdapter.GetDtSecretManager(cluster.GetNamesystem()); }
public virtual void TestBlocksDeletedInEditLog() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); // Make 4 blocks persisted in the image. DFSTestUtil.CreateFile(fs, new Path("/test"), 4 * BlockSize, (short)3, 1L); NameNodeAdapter.EnterSafeMode(nn0, false); NameNodeAdapter.SaveNamespace(nn0); NameNodeAdapter.LeaveSafeMode(nn0); // OP_ADD for 2 blocks DFSTestUtil.CreateFile(fs, new Path("/test2"), 2 * BlockSize, (short)3, 1L); // OP_DELETE for 4 blocks fs.Delete(new Path("/test"), true); RestartActive(); }
internal static void CheckLease(string f, int size) { string holder = NameNodeAdapter.GetLeaseHolderForPath(cluster.GetNameNode(), f); if (size == 0) { NUnit.Framework.Assert.AreEqual("lease holder should null, file is closed", null, holder); } else { NUnit.Framework.Assert.AreEqual("lease holder should now be the NN", HdfsServerConstants .NamenodeLeaseHolder, holder); } }
/// <exception cref="System.Exception"/> public FSEditLogOp Answer(InvocationOnMock invocation) { FSEditLogOp op = (FSEditLogOp)invocation.CallRealMethod(); if (this._enclosing.throwExceptionOnRead && TestFailureToReadEdits.TestDir3.Equals (NameNodeAdapter.GetMkdirOpPath(op))) { throw new IOException("failed to read op creating " + TestFailureToReadEdits.TestDir3 ); } else { return(op); } }
public virtual void TestBootstrapStandbyWithActiveNN() { // make nn0 active cluster.TransitionToActive(0); // do ops and generate in-progress edit log data Configuration confNN1 = cluster.GetConfiguration(1); DistributedFileSystem dfs = (DistributedFileSystem)HATestUtil.ConfigureFailoverFs (cluster, confNN1); for (int i = 1; i <= 10; i++) { dfs.Mkdirs(new Path("/test" + i)); } dfs.Close(); // shutdown nn1 and delete its edit log files cluster.ShutdownNameNode(1); DeleteEditLogIfExists(confNN1); cluster.GetNameNodeRpc(0).SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter, true); cluster.GetNameNodeRpc(0).SaveNamespace(); cluster.GetNameNodeRpc(0).SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave, true); // check without -skipSharedEditsCheck, Bootstrap should fail for BKJM // immediately after saveNamespace int rc = BootstrapStandby.Run(new string[] { "-force", "-nonInteractive" }, confNN1 ); NUnit.Framework.Assert.AreEqual("Mismatches return code", 6, rc); // check with -skipSharedEditsCheck rc = BootstrapStandby.Run(new string[] { "-force", "-nonInteractive", "-skipSharedEditsCheck" }, confNN1); NUnit.Framework.Assert.AreEqual("Mismatches return code", 0, rc); // Checkpoint as fast as we can, in a tight loop. confNN1.SetInt(DFSConfigKeys.DfsNamenodeCheckpointPeriodKey, 1); cluster.RestartNameNode(1); cluster.TransitionToStandby(1); NameNode nn0 = cluster.GetNameNode(0); HATestUtil.WaitForStandbyToCatchUp(nn0, cluster.GetNameNode(1)); long expectedCheckpointTxId = NameNodeAdapter.GetNamesystem(nn0).GetFSImage().GetMostRecentCheckpointTxId (); HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of((int)expectedCheckpointTxId )); // Should have copied over the namespace FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of((int)expectedCheckpointTxId )); FSImageTestUtil.AssertNNFilesMatch(cluster); }
/// <summary> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. /// </summary> /// <remarks> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. Regression test for HDFS-4591. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestStandbyExceptionThrownDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Eq(NNStorage.NameNodeFile.Image), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); try { // Perform an RPC to the SBN and make sure it throws a StandbyException. nn1.GetRpcServer().GetFileInfo("/"); NUnit.Framework.Assert.Fail("Should have thrown StandbyException, but instead succeeded." ); } catch (StandbyException se) { GenericTestUtils.AssertExceptionContains("is not supported", se); } // Make sure new incremental block reports are processed during // checkpointing on the SBN. NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetPendingDataNodeMessageCount ()); DoCreate(); Sharpen.Thread.Sleep(1000); NUnit.Framework.Assert.IsTrue(cluster.GetNamesystem(1).GetPendingDataNodeMessageCount () > 0); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); }
public virtual void TestExcessBlocks() { Path file = GetTestPath("testExcessBlocks"); CreateFile(file, 100, (short)2); NameNodeAdapter.SetReplication(namesystem, file.ToString(), (short)1); UpdateMetrics(); MetricsRecordBuilder rb = MetricsAsserts.GetMetrics(NsMetrics); MetricsAsserts.AssertGauge("ExcessBlocks", 1L, rb); // verify ExcessBlocks metric is decremented and // excessReplicateMap is cleared after deleting a file fs.Delete(file, true); rb = MetricsAsserts.GetMetrics(NsMetrics); MetricsAsserts.AssertGauge("ExcessBlocks", 0L, rb); NUnit.Framework.Assert.IsTrue(bm.excessReplicateMap.IsEmpty()); }
/// <exception cref="System.Exception"/> public virtual void TestReadsAllowedDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Any <NNStorage.NameNodeFile>(), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); // Perform an RPC that needs to take the write lock. Sharpen.Thread t = new _Thread_404(this); t.Start(); // Make sure that our thread is waiting for the lock. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().HasQueuedThreads ()); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().IsWriteLocked ()); NUnit.Framework.Assert.IsTrue(nn1.GetNamesystem().GetCpLockForTests().HasQueuedThreads ()); // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to // be taken. string pageContents = DFSTestUtil.UrlGet(new Uri("http://" + nn1.GetHttpAddress() .GetHostName() + ":" + nn1.GetHttpAddress().Port + "/jmx")); NUnit.Framework.Assert.IsTrue(pageContents.Contains("NumLiveDataNodes")); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); t.Join(); }
public virtual void TestStorageWithRemainingCapacity() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build(); FileSystem fs = FileSystem.Get(conf); Path file1 = null; try { cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); string poolId = namesystem.GetBlockPoolId(); DatanodeRegistration nodeReg = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes ()[0], poolId); DatanodeDescriptor dd = NameNodeAdapter.GetDatanode(namesystem, nodeReg); // By default, MiniDFSCluster will create 1 datanode with 2 storages. // Assigning 64k for remaining storage capacity and will //create a file with 100k. foreach (DatanodeStorageInfo storage in dd.GetStorageInfos()) { storage.SetUtilizationForTesting(65536, 0, 65536, 0); } //sum of the remaining capacity of both the storages dd.SetRemaining(131072); file1 = new Path("testRemainingStorage.dat"); try { DFSTestUtil.CreateFile(fs, file1, 102400, 102400, 102400, (short)1, unchecked ((int )(0x1BAD5EED))); } catch (RemoteException re) { GenericTestUtils.AssertExceptionContains("nodes instead of " + "minReplication", re); } } finally { // Clean up NUnit.Framework.Assert.IsTrue(fs.Exists(file1)); fs.Delete(file1, true); NUnit.Framework.Assert.IsTrue(!fs.Exists(file1)); cluster.Shutdown(); } }