/// <summary> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. /// </summary> /// <remarks> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. The /// DN running the recovery should then fail to commit the synchronization /// and a later retry will succeed. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestFailoverRightBeforeCommitSynchronization() { Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); // Look into the block manager on the active node for the block // under construction. NameNode nn0 = cluster.GetNameNode(0); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, TestPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN , nn0); // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf); NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath)); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Failing over to NN 1"); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Let the commitBlockSynchronization call go through, and check that // it failed with the correct exception. delayer.Proceed(); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t == null) { NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby" ); } GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported" , t); // Now, if we try again to recover the block, it should succeed on the new // active. LoopRecoverLease(fsOtherUser, TestPath); AppendTestUtil.Check(fs, TestPath, BlockSize / 2); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
/// <summary>Test race between delete operation and commitBlockSynchronization method. /// </summary> /// <remarks> /// Test race between delete operation and commitBlockSynchronization method. /// See HDFS-6825. /// </remarks> /// <param name="hasSnapshot"/> /// <exception cref="System.Exception"/> private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot) { Log.Info("Start testing, hasSnapshot: " + hasSnapshot); AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry <string, bool> >(); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file", false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1" , true)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file" , false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1" , true)); Path rootPath = new Path("/"); Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary <DataNode, DatanodeProtocolClientSideTranslatorPB>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); int stId = 0; foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList) { string testPath = stest.Key; bool mkSameDir = stest.Value; Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot ); Path fPath = new Path(testPath); //find grandest non-root parent Path grandestNonRootParent = fPath; while (!grandestNonRootParent.GetParent().Equals(rootPath)) { grandestNonRootParent = grandestNonRootParent.GetParent(); } stm = fs.Create(fPath); Log.Info("test on " + testPath + " created " + fPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); if (hasSnapshot) { SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString()); ++stId; } // Look into the block manager on the active node for the block // under construction. NameNode nn = cluster.GetNameNode(); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, fPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN]; if (nnSpy == null) { nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn); dnMap[primaryDN] = nnSpy; } // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages fs.RecoverLease(fPath); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Deleting recursively " + grandestNonRootParent); fs.Delete(grandestNonRootParent, true); if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath)) { Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath); fs.Mkdirs(grandestNonRootParent); } delayer.Proceed(); Log.Info("Now wait for result"); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t != null) { Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t); } } // end of loop each fPath Log.Info("Now check we can restart"); cluster.RestartNameNodes(); Log.Info("Restart finished"); } finally { if (stm != null) { IOUtils.CloseStream(stm); } if (cluster != null) { cluster.Shutdown(); } } }