public virtual void TestQueueLimiting() { // Block the underlying fake proxy from actually completing any calls. GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(mockProxy).Journal(Org.Mockito.Mockito .Any <RequestInfo>(), Org.Mockito.Mockito.Eq(1L), Org.Mockito.Mockito.Eq(1L), Org.Mockito.Mockito .Eq(1), Org.Mockito.Mockito.Same(FakeData)); // Queue up the maximum number of calls. int numToQueue = LimitQueueSizeBytes / FakeData.Length; for (int i = 1; i <= numToQueue; i++) { ch.SendEdits(1L, (long)i, 1, FakeData); } // The accounting should show the correct total number queued. NUnit.Framework.Assert.AreEqual(LimitQueueSizeBytes, ch.GetQueuedEditsSize()); // Trying to queue any more should fail. try { ch.SendEdits(1L, numToQueue + 1, 1, FakeData).Get(1, TimeUnit.Seconds); NUnit.Framework.Assert.Fail("Did not fail to queue more calls after queue was full" ); } catch (ExecutionException ee) { if (!(ee.InnerException is LoggerTooFarBehindException)) { throw; } } delayer.Proceed(); // After we allow it to proceeed, it should chug through the original queue GenericTestUtils.WaitFor(new _Supplier_124(this), 10, 1000); }
/// <summary> /// Test case that stops a writer after finalizing a block but /// before calling completeFile, recovers a file from another writer, /// starts writing from that writer, and then has the old lease holder /// call completeFile /// </summary> /// <exception cref="System.Exception"/> public virtual void TestCompleteOtherLeaseHoldersFile() { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(5).Build(); try { cluster.WaitActive(); NamenodeProtocols preSpyNN = cluster.GetNameNodeRpc(); NamenodeProtocols spyNN = Org.Mockito.Mockito.Spy(preSpyNN); // Delay completeFile GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(spyNN).Complete(Matchers.AnyString(), Matchers.AnyString(), (ExtendedBlock)Matchers.AnyObject(), Matchers.AnyLong()); DFSClient client = new DFSClient(null, spyNN, conf, null); file1 = new Path("/testCompleteOtherLease"); OutputStream stm = client.Create("/testCompleteOtherLease", true); // write 1/2 block AppendTestUtil.Write(stm, 0, 4096); AtomicReference <Exception> err = new AtomicReference <Exception>(); Sharpen.Thread t = new _Thread_242(stm, err); t.Start(); Log.Info("Waiting for close to get to latch..."); delayer.WaitForCall(); // At this point, the block is finalized on the DNs, but the file // has not been completed in the NN. // Lose the leases Log.Info("Killing lease checker"); client.GetLeaseRenewer().InterruptAndJoin(); FileSystem fs1 = cluster.GetFileSystem(); FileSystem fs2 = AppendTestUtil.CreateHdfsWithDifferentUsername(fs1.GetConf()); Log.Info("Recovering file"); RecoverFile(fs2); Log.Info("Opening file for append from new fs"); FSDataOutputStream appenderStream = fs2.Append(file1); Log.Info("Writing some data from new appender"); AppendTestUtil.Write(appenderStream, 0, 4096); Log.Info("Telling old close to proceed."); delayer.Proceed(); Log.Info("Waiting for close to finish."); t.Join(); Log.Info("Close finished."); // We expect that close will get a "Lease mismatch" // error. Exception thrownByClose = err.Get(); NUnit.Framework.Assert.IsNotNull(thrownByClose); NUnit.Framework.Assert.IsTrue(thrownByClose is IOException); if (!thrownByClose.Message.Contains("Lease mismatch")) { throw thrownByClose; } // The appender should be able to close properly appenderStream.Close(); } finally { cluster.Shutdown(); } }
/// <summary> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. /// </summary> /// <remarks> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. Regression test for HDFS-4591. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestStandbyExceptionThrownDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Eq(NNStorage.NameNodeFile.Image), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); try { // Perform an RPC to the SBN and make sure it throws a StandbyException. nn1.GetRpcServer().GetFileInfo("/"); NUnit.Framework.Assert.Fail("Should have thrown StandbyException, but instead succeeded." ); } catch (StandbyException se) { GenericTestUtils.AssertExceptionContains("is not supported", se); } // Make sure new incremental block reports are processed during // checkpointing on the SBN. NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetPendingDataNodeMessageCount ()); DoCreate(); Sharpen.Thread.Sleep(1000); NUnit.Framework.Assert.IsTrue(cluster.GetNamesystem(1).GetPendingDataNodeMessageCount () > 0); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); }
/// <exception cref="System.Exception"/> public virtual void TestReadsAllowedDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Any <NNStorage.NameNodeFile>(), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); // Perform an RPC that needs to take the write lock. Sharpen.Thread t = new _Thread_404(this); t.Start(); // Make sure that our thread is waiting for the lock. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().HasQueuedThreads ()); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().IsWriteLocked ()); NUnit.Framework.Assert.IsTrue(nn1.GetNamesystem().GetCpLockForTests().HasQueuedThreads ()); // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to // be taken. string pageContents = DFSTestUtil.UrlGet(new Uri("http://" + nn1.GetHttpAddress() .GetHostName() + ":" + nn1.GetHttpAddress().Port + "/jmx")); NUnit.Framework.Assert.IsTrue(pageContents.Contains("NumLiveDataNodes")); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); t.Join(); }
/// <summary> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. /// </summary> /// <remarks> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. The /// DN running the recovery should then fail to commit the synchronization /// and a later retry will succeed. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestFailoverRightBeforeCommitSynchronization() { Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); // Look into the block manager on the active node for the block // under construction. NameNode nn0 = cluster.GetNameNode(0); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, TestPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN , nn0); // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf); NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath)); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Failing over to NN 1"); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Let the commitBlockSynchronization call go through, and check that // it failed with the correct exception. delayer.Proceed(); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t == null) { NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby" ); } GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported" , t); // Now, if we try again to recover the block, it should succeed on the new // active. LoopRecoverLease(fsOtherUser, TestPath); AppendTestUtil.Check(fs, TestPath, BlockSize / 2); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
/// <summary>Test race between delete operation and commitBlockSynchronization method. /// </summary> /// <remarks> /// Test race between delete operation and commitBlockSynchronization method. /// See HDFS-6825. /// </remarks> /// <param name="hasSnapshot"/> /// <exception cref="System.Exception"/> private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot) { Log.Info("Start testing, hasSnapshot: " + hasSnapshot); AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry <string, bool> >(); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file", false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1" , true)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file" , false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1" , true)); Path rootPath = new Path("/"); Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary <DataNode, DatanodeProtocolClientSideTranslatorPB>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); int stId = 0; foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList) { string testPath = stest.Key; bool mkSameDir = stest.Value; Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot ); Path fPath = new Path(testPath); //find grandest non-root parent Path grandestNonRootParent = fPath; while (!grandestNonRootParent.GetParent().Equals(rootPath)) { grandestNonRootParent = grandestNonRootParent.GetParent(); } stm = fs.Create(fPath); Log.Info("test on " + testPath + " created " + fPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); if (hasSnapshot) { SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString()); ++stId; } // Look into the block manager on the active node for the block // under construction. NameNode nn = cluster.GetNameNode(); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, fPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN]; if (nnSpy == null) { nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn); dnMap[primaryDN] = nnSpy; } // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages fs.RecoverLease(fPath); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Deleting recursively " + grandestNonRootParent); fs.Delete(grandestNonRootParent, true); if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath)) { Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath); fs.Mkdirs(grandestNonRootParent); } delayer.Proceed(); Log.Info("Now wait for result"); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t != null) { Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t); } } // end of loop each fPath Log.Info("Now check we can restart"); cluster.RestartNameNodes(); Log.Info("Restart finished"); } finally { if (stm != null) { IOUtils.CloseStream(stm); } if (cluster != null) { cluster.Shutdown(); } } }
/// <exception cref="System.Exception"/> public virtual void TestCancelSaveNamespace() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage image = fsn.GetFSImage(); NNStorage storage = image.GetStorage(); storage.Close(); // unlock any directories that FSNamesystem's initialization may have locked storage.SetStorageDirectories(FSNamesystem.GetNamespaceDirs(conf), FSNamesystem.GetNamespaceEditsDirs (conf)); FSNamesystem spyFsn = Org.Mockito.Mockito.Spy(fsn); FSNamesystem finalFsn = spyFsn; GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); BlockIdManager bid = Org.Mockito.Mockito.Spy(spyFsn.GetBlockIdManager()); Whitebox.SetInternalState(finalFsn, "blockIdManager", bid); Org.Mockito.Mockito.DoAnswer(delayer).When(bid).GetGenerationStampV2(); ExecutorService pool = Executors.NewFixedThreadPool(2); try { DoAnEdit(fsn, 1); Canceler canceler = new Canceler(); // Save namespace fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { Future <Void> saverFuture = pool.Submit(new _Callable_561(image, finalFsn, canceler )); // Wait until saveNamespace calls getGenerationStamp delayer.WaitForCall(); // then cancel the saveNamespace Future <Void> cancelFuture = pool.Submit(new _Callable_572(canceler)); // give the cancel call time to run Sharpen.Thread.Sleep(500); // allow saveNamespace to proceed - it should check the cancel flag after // this point and throw an exception delayer.Proceed(); cancelFuture.Get(); saverFuture.Get(); NUnit.Framework.Assert.Fail("saveNamespace did not fail even though cancelled!"); } catch (Exception t) { GenericTestUtils.AssertExceptionContains("SaveNamespaceCancelledException", t); } Log.Info("Successfully cancelled a saveNamespace"); // Check that we have only the original image and not any // cruft left over from half-finished images FSImageTestUtil.LogStorageContents(Log, storage); foreach (Storage.StorageDirectory sd in storage.DirIterable(null)) { FilePath curDir = sd.GetCurrentDir(); GenericTestUtils.AssertGlobEquals(curDir, "fsimage_.*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(0) + MD5FileUtils.Md5Suffix); } } finally { fsn.Close(); } }