/// <summary> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. /// </summary> /// <remarks> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. Regression test for HDFS-4591. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestStandbyExceptionThrownDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Eq(NNStorage.NameNodeFile.Image), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); try { // Perform an RPC to the SBN and make sure it throws a StandbyException. nn1.GetRpcServer().GetFileInfo("/"); NUnit.Framework.Assert.Fail("Should have thrown StandbyException, but instead succeeded." ); } catch (StandbyException se) { GenericTestUtils.AssertExceptionContains("is not supported", se); } // Make sure new incremental block reports are processed during // checkpointing on the SBN. NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetPendingDataNodeMessageCount ()); DoCreate(); Sharpen.Thread.Sleep(1000); NUnit.Framework.Assert.IsTrue(cluster.GetNamesystem(1).GetPendingDataNodeMessageCount () > 0); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); }
/// <exception cref="System.Exception"/> public virtual void TestReadsAllowedDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Any <NNStorage.NameNodeFile>(), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); // Perform an RPC that needs to take the write lock. Sharpen.Thread t = new _Thread_404(this); t.Start(); // Make sure that our thread is waiting for the lock. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().HasQueuedThreads ()); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().IsWriteLocked ()); NUnit.Framework.Assert.IsTrue(nn1.GetNamesystem().GetCpLockForTests().HasQueuedThreads ()); // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to // be taken. string pageContents = DFSTestUtil.UrlGet(new Uri("http://" + nn1.GetHttpAddress() .GetHostName() + ":" + nn1.GetHttpAddress().Port + "/jmx")); NUnit.Framework.Assert.IsTrue(pageContents.Contains("NumLiveDataNodes")); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); t.Join(); }
public virtual void TestModTimePersistsAfterRestart() { long sleepTime = 10; // 10 milliseconds MiniDFSCluster cluster = null; FileSystem fs = null; Configuration conf = new HdfsConfiguration(); try { cluster = new MiniDFSCluster.Builder(conf).Build(); fs = cluster.GetFileSystem(); Path testPath = new Path("/test"); // Open a file, and get its initial modification time. OutputStream @out = fs.Create(testPath); long initialModTime = fs.GetFileStatus(testPath).GetModificationTime(); NUnit.Framework.Assert.IsTrue(initialModTime > 0); // Wait and then close the file. Ensure that the mod time goes up. ThreadUtil.SleepAtLeastIgnoreInterrupts(sleepTime); @out.Close(); long modTimeAfterClose = fs.GetFileStatus(testPath).GetModificationTime(); NUnit.Framework.Assert.IsTrue(modTimeAfterClose >= initialModTime + sleepTime); // Restart the NN, and make sure that the later mod time is still used. cluster.RestartNameNode(); long modTimeAfterRestart = fs.GetFileStatus(testPath).GetModificationTime(); NUnit.Framework.Assert.AreEqual(modTimeAfterClose, modTimeAfterRestart); } finally { if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestChangedStorageId() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).NnTopology (MiniDFSNNTopology.SimpleHATopology()).Build(); try { cluster.TransitionToActive(0); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); OutputStream @out = fs.Create(filePath); @out.Write(Sharpen.Runtime.GetBytesForString("foo bar baz")); @out.Close(); HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1) ); // Change the gen stamp of the block on datanode to go back in time (gen // stamps start at 1000) ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, filePath); NUnit.Framework.Assert.IsTrue(cluster.ChangeGenStampOfBlock(0, block, 900)); // Stop the DN so the replica with the changed gen stamp will be reported // when this DN starts up. MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Restart the namenode so that when the DN comes up it will see an initial // block report. cluster.RestartNameNode(1, false); NUnit.Framework.Assert.IsTrue(cluster.RestartDataNode(dnProps, true)); // Wait until the standby NN queues up the corrupt block in the pending DN // message queue. while (cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount( ) < 1) { ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); } NUnit.Framework.Assert.AreEqual(1, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount ()); string oldStorageId = GetRegisteredDatanodeUid(cluster, 1); // Reformat/restart the DN. NUnit.Framework.Assert.IsTrue(WipeAndRestartDn(cluster, 0)); // Give the DN time to start up and register, which will cause the // DatanodeManager to dissociate the old storage ID from the DN xfer addr. string newStorageId = string.Empty; do { ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); newStorageId = GetRegisteredDatanodeUid(cluster, 1); System.Console.Out.WriteLine("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId); }while (newStorageId.Equals(oldStorageId)); NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount ()); // Now try to fail over. cluster.TransitionToStandby(0); cluster.TransitionToActive(1); } finally { cluster.Shutdown(); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestExcludedNodesForgiveness() { // Forgive nodes in under 2.5s for this test case. conf.SetLong(DFSConfigKeys.DfsClientWriteExcludeNodesCacheExpiryInterval, 2500); // We'll be using a 512 bytes block size just for tests // so making sure the checksum bytes too match it. conf.SetInt("io.bytes.per.checksum", 512); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); IList <MiniDFSCluster.DataNodeProperties> props = cluster.dataNodes; FileSystem fs = cluster.GetFileSystem(); Path filePath = new Path("/testForgivingExcludedNodes"); // 256 bytes data chunk for writes byte[] bytes = new byte[256]; for (int index = 0; index < bytes.Length; index++) { bytes[index] = (byte)('0'); } // File with a 512 bytes block size FSDataOutputStream @out = fs.Create(filePath, true, 4096, (short)3, 512); // Write a block to all 3 DNs (2x256bytes). @out.Write(bytes); @out.Write(bytes); @out.Hflush(); // Remove two DNs, to put them into the exclude list. MiniDFSCluster.DataNodeProperties two = cluster.StopDataNode(2); MiniDFSCluster.DataNodeProperties one = cluster.StopDataNode(1); // Write another block. // At this point, we have two nodes already in excluded list. @out.Write(bytes); @out.Write(bytes); @out.Hflush(); // Bring back the older DNs, since they are gonna be forgiven only // afterwards of this previous block write. NUnit.Framework.Assert.AreEqual(true, cluster.RestartDataNode(one, true)); NUnit.Framework.Assert.AreEqual(true, cluster.RestartDataNode(two, true)); cluster.WaitActive(); // Sleep for 5s, to let the excluded nodes be expired // from the excludes list (i.e. forgiven after the configured wait period). // [Sleeping just in case the restart of the DNs completed < 5s cause // otherwise, we'll end up quickly excluding those again.] ThreadUtil.SleepAtLeastIgnoreInterrupts(5000); // Terminate the last good DN, to assert that there's no // single-DN-available scenario, caused by not forgiving the other // two by now. cluster.StopDataNode(0); try { // Attempt writing another block, which should still pass // cause the previous two should have been forgiven by now, // while the last good DN added to excludes this time. @out.Write(bytes); @out.Hflush(); @out.Close(); } catch (Exception e) { NUnit.Framework.Assert.Fail("Excluded DataNodes should be forgiven after a while and " + "not cause file writing exception of: '" + e.Message + "'"); } }
public override void Run() { ThreadUtil.SleepAtLeastIgnoreInterrupts(millisToSleep); impl1.SetIdentifier("renamed-impl1"); }