public virtual void TestBlockReportsWhileFileBeingWritten() { FSDataOutputStream @out = fs.Create(TestFilePath); try { AppendTestUtil.Write(@out, 0, 10); @out.Hflush(); // Block report will include the RBW replica, but will be // queued on the StandbyNode. cluster.TriggerBlockReports(); } finally { IOUtils.CloseStream(@out); } cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn2.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetCorruptReplicaBlocks()); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetCorruptReplicaBlocks()); DFSTestUtil.ReadFile(fs, TestFilePath); }
/// <exception cref="System.Exception"/> private void DoWriteOverFailoverTest(TestPipelinesFailover.TestScenario scenario, TestPipelinesFailover.MethodToTestIdempotence methodToTest) { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); // Don't check replication periodically. conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, 1000); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { int sizeWritten = 0; cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a block and a half AppendTestUtil.Write(stm, 0, BlockAndAHalf); sizeWritten += BlockAndAHalf; // Make sure all of the blocks are written out before failover. stm.Hflush(); Log.Info("Failing over to NN 1"); scenario.Run(cluster); // NOTE: explicitly do *not* make any further metadata calls // to the NN here. The next IPC call should be to allocate the next // block. Any other call would notice the failover and not test // idempotence of the operation (HDFS-3031) FSNamesystem ns1 = cluster.GetNameNode(1).GetNamesystem(); BlockManagerTestUtil.UpdateState(ns1.GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, ns1.GetPendingReplicationBlocks()); NUnit.Framework.Assert.AreEqual(0, ns1.GetCorruptReplicaBlocks()); NUnit.Framework.Assert.AreEqual(0, ns1.GetMissingBlocksCount()); // If we're testing allocateBlock()'s idempotence, write another // block and a half, so we have to allocate a new block. // Otherise, don't write anything, so our next RPC will be // completeFile() if we're testing idempotence of that operation. if (methodToTest == TestPipelinesFailover.MethodToTestIdempotence.AllocateBlock) { // write another block and a half AppendTestUtil.Write(stm, sizeWritten, BlockAndAHalf); sizeWritten += BlockAndAHalf; } stm.Close(); stm = null; AppendTestUtil.Check(fs, TestPath, sizeWritten); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
// return the initial state of the configuration /// <summary> /// Test for the case where one of the DNs in the pipeline is in the /// process of doing a block report exactly when the block is closed. /// </summary> /// <remarks> /// Test for the case where one of the DNs in the pipeline is in the /// process of doing a block report exactly when the block is closed. /// In this case, the block report becomes delayed until after the /// block is marked completed on the NN, and hence it reports an RBW /// replica for a COMPLETE block. Such a report should not be marked /// corrupt. /// This is a regression test for HDFS-2791. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestOneReplicaRbwReportArrivesAfterBlockCompleted() { CountDownLatch brFinished = new CountDownLatch(1); GenericTestUtils.DelayAnswer delayer = new _DelayAnswer_579(brFinished, Log); // inform the test that our block report went through. string MethodName = GenericTestUtils.GetMethodName(); Path filePath = new Path("/" + MethodName + ".dat"); // Start a second DN for this test -- we're checking // what happens when one of the DNs is slowed for some reason. ReplFactor = 2; StartDNandWait(null, false); NameNode nn = cluster.GetNameNode(); FSDataOutputStream @out = fs.Create(filePath, ReplFactor); try { AppendTestUtil.Write(@out, 0, 10); @out.Hflush(); // Set up a spy so that we can delay the block report coming // from this node. DataNode dn = cluster.GetDataNodes()[0]; DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(dn, nn); Org.Mockito.Mockito.DoAnswer(delayer).When(spy).BlockReport(Org.Mockito.Mockito.AnyObject <DatanodeRegistration>(), Org.Mockito.Mockito.AnyString(), Org.Mockito.Mockito.AnyObject <StorageBlockReport[]>(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); // Force a block report to be generated. The block report will have // an RBW replica in it. Wait for the RPC to be sent, but block // it before it gets to the NN. dn.ScheduleAllBlockReport(0); delayer.WaitForCall(); } finally { IOUtils.CloseStream(@out); } // Now that the stream is closed, the NN will have the block in COMPLETE // state. delayer.Proceed(); brFinished.Await(); // Verify that no replicas are marked corrupt, and that the // file is still readable. BlockManagerTestUtil.UpdateState(nn.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn.GetNamesystem().GetCorruptReplicaBlocks()); DFSTestUtil.ReadFile(fs, filePath); // Ensure that the file is readable even from the DN that we futzed with. cluster.StopDataNode(1); DFSTestUtil.ReadFile(fs, filePath); }
/// <exception cref="System.Exception"/> private void DoTestWriteOverFailoverWithDnFail(TestPipelinesFailover.TestScenario scenario) { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(5).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a block and a half AppendTestUtil.Write(stm, 0, BlockAndAHalf); // Make sure all the blocks are written before failover stm.Hflush(); Log.Info("Failing over to NN 1"); scenario.Run(cluster); NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath)); cluster.StopDataNode(0); // write another block and a half AppendTestUtil.Write(stm, BlockAndAHalf, BlockAndAHalf); stm.Hflush(); Log.Info("Failing back to NN 0"); cluster.TransitionToStandby(1); cluster.TransitionToActive(0); cluster.StopDataNode(1); AppendTestUtil.Write(stm, BlockAndAHalf * 2, BlockAndAHalf); stm.Hflush(); stm.Close(); stm = null; AppendTestUtil.Check(fs, TestPath, BlockAndAHalf * 3); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
/// <summary>Tests lease recovery if a client crashes.</summary> /// <remarks> /// Tests lease recovery if a client crashes. This approximates the /// use case of HBase WALs being recovered after a NN failover. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestLeaseRecoveryAfterFailover() { Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a block and a half AppendTestUtil.Write(stm, 0, BlockAndAHalf); stm.Hflush(); Log.Info("Failing over to NN 1"); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath)); FileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf); LoopRecoverLease(fsOtherUser, TestPath); AppendTestUtil.Check(fs, TestPath, BlockAndAHalf); // Fail back to ensure that the block locations weren't lost on the // original node. cluster.TransitionToStandby(1); cluster.TransitionToActive(0); AppendTestUtil.Check(fs, TestPath, BlockAndAHalf); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public override void DoAnAction() { FSDataOutputStream stm = fs.Create(path, true); try { AppendTestUtil.Write(stm, 0, 100); stm.Hflush(); LoopRecoverLease(fsOtherUser, path); AppendTestUtil.Check(fs, path, 100); } finally { try { stm.Close(); } catch (IOException) { } } }
public virtual void TestRBWReportArrivesAfterEdits() { CountDownLatch brFinished = new CountDownLatch(1); GenericTestUtils.DelayAnswer delayer = new _DelayAnswer_521(brFinished, Log); // inform the test that our block report went through. FSDataOutputStream @out = fs.Create(TestFilePath); try { AppendTestUtil.Write(@out, 0, 10); @out.Hflush(); DataNode dn = cluster.GetDataNodes()[0]; DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(dn, nn2); Org.Mockito.Mockito.DoAnswer(delayer).When(spy).BlockReport(Org.Mockito.Mockito.AnyObject <DatanodeRegistration>(), Org.Mockito.Mockito.AnyString(), Org.Mockito.Mockito.AnyObject <StorageBlockReport[]>(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); dn.ScheduleAllBlockReport(0); delayer.WaitForCall(); } finally { IOUtils.CloseStream(@out); } cluster.TransitionToStandby(0); cluster.TransitionToActive(1); delayer.Proceed(); brFinished.Await(); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn2.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetCorruptReplicaBlocks()); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetCorruptReplicaBlocks()); DFSTestUtil.ReadFile(fs, TestFilePath); }
/// <summary> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. /// </summary> /// <remarks> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. The /// DN running the recovery should then fail to commit the synchronization /// and a later retry will succeed. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestFailoverRightBeforeCommitSynchronization() { Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); // Look into the block manager on the active node for the block // under construction. NameNode nn0 = cluster.GetNameNode(0); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, TestPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN , nn0); // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf); NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath)); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Failing over to NN 1"); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Let the commitBlockSynchronization call go through, and check that // it failed with the correct exception. delayer.Proceed(); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t == null) { NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby" ); } GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported" , t); // Now, if we try again to recover the block, it should succeed on the new // active. LoopRecoverLease(fsOtherUser, TestPath); AppendTestUtil.Check(fs, TestPath, BlockSize / 2); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
public virtual void TestQueueingWithAppend() { int numQueued = 0; int numDN = cluster.GetDataNodes().Count; // case 1: create file and call hflush after write FSDataOutputStream @out = fs.Create(TestFilePath); try { AppendTestUtil.Write(@out, 0, 10); @out.Hflush(); // Opening the file will report RBW replicas, but will be // queued on the StandbyNode. // However, the delivery of RBW messages is delayed by HDFS-7217 fix. // Apply cluster.triggerBlockReports() to trigger the reporting sooner. // cluster.TriggerBlockReports(); numQueued += numDN; // RBW messages // The cluster.triggerBlockReports() call above does a full // block report that incurs 3 extra RBW messages numQueued += numDN; } finally { // RBW messages IOUtils.CloseStream(@out); numQueued += numDN; } // blockReceived messages cluster.TriggerBlockReports(); numQueued += numDN; NUnit.Framework.Assert.AreEqual(numQueued, cluster.GetNameNode(1).GetNamesystem() .GetPendingDataNodeMessageCount()); // case 2: append to file and call hflush after write try { @out = fs.Append(TestFilePath); AppendTestUtil.Write(@out, 10, 10); @out.Hflush(); cluster.TriggerBlockReports(); numQueued += numDN * 2; } finally { // RBW messages, see comments in case 1 IOUtils.CloseStream(@out); numQueued += numDN; } // blockReceived NUnit.Framework.Assert.AreEqual(numQueued, cluster.GetNameNode(1).GetNamesystem() .GetPendingDataNodeMessageCount()); // case 3: similar to case 2, except no hflush is called. try { @out = fs.Append(TestFilePath); AppendTestUtil.Write(@out, 20, 10); } finally { // The write operation in the try block is buffered, thus no RBW message // is reported yet until the closeStream call here. When closeStream is // called, before HDFS-7217 fix, there would be three RBW messages // (blockReceiving), plus three FINALIZED messages (blockReceived) // delivered to NN. However, because of HDFS-7217 fix, the reporting of // RBW messages is postponed. In this case, they are even overwritten // by the blockReceived messages of the same block when they are waiting // to be delivered. All this happens within the closeStream() call. // What's delivered to NN is the three blockReceived messages. See // BPServiceActor#addPendingReplicationBlockInfo // IOUtils.CloseStream(@out); numQueued += numDN; } // blockReceived cluster.TriggerBlockReports(); numQueued += numDN; Log.Info("Expect " + numQueued + " and got: " + cluster.GetNameNode(1).GetNamesystem ().GetPendingDataNodeMessageCount()); NUnit.Framework.Assert.AreEqual(numQueued, cluster.GetNameNode(1).GetNamesystem() .GetPendingDataNodeMessageCount()); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn2.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetCorruptReplicaBlocks()); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetCorruptReplicaBlocks()); AppendTestUtil.Check(fs, TestFilePath, 30); }
/// <summary> /// Test that we cannot read a file beyond its snapshot length /// when accessing it via a snapshot path. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestSnapshotfileLength() { hdfs.Mkdirs(sub); int bytesRead; byte[] buffer = new byte[Blocksize * 8]; int origLen = Blocksize + 1; int toAppend = Blocksize; FSDataInputStream fis = null; FileStatus fileStatus = null; // Create and write a file. Path file1 = new Path(sub, file1Name); DFSTestUtil.CreateFile(hdfs, file1, Blocksize, 0, Blocksize, Replication, Seed); DFSTestUtil.AppendFile(hdfs, file1, origLen); // Create a snapshot on the parent directory. hdfs.AllowSnapshot(sub); hdfs.CreateSnapshot(sub, snapshot1); Path file1snap1 = SnapshotTestHelper.GetSnapshotPath(sub, snapshot1, file1Name); FileChecksum snapChksum1 = hdfs.GetFileChecksum(file1snap1); Assert.AssertThat("file and snapshot file checksums are not equal", hdfs.GetFileChecksum (file1), CoreMatchers.Is(snapChksum1)); // Append to the file. FSDataOutputStream @out = hdfs.Append(file1); // Nothing has been appended yet. All checksums should still be equal. Assert.AssertThat("file and snapshot checksums (open for append) are not equal", hdfs.GetFileChecksum(file1), CoreMatchers.Is(snapChksum1)); Assert.AssertThat("snapshot checksum (post-open for append) has changed", hdfs.GetFileChecksum (file1snap1), CoreMatchers.Is(snapChksum1)); try { AppendTestUtil.Write(@out, 0, toAppend); // Test reading from snapshot of file that is open for append byte[] dataFromSnapshot = DFSTestUtil.ReadFileBuffer(hdfs, file1snap1); Assert.AssertThat("Wrong data size in snapshot.", dataFromSnapshot.Length, CoreMatchers.Is (origLen)); // Verify that checksum didn't change Assert.AssertThat("snapshot file checksum (pre-close) has changed", hdfs.GetFileChecksum (file1), CoreMatchers.Is(snapChksum1)); Assert.AssertThat("snapshot checksum (post-append) has changed", hdfs.GetFileChecksum (file1snap1), CoreMatchers.Is(snapChksum1)); } finally { @out.Close(); } Assert.AssertThat("file and snapshot file checksums (post-close) are equal", hdfs .GetFileChecksum(file1), CoreMatchers.Not(snapChksum1)); Assert.AssertThat("snapshot file checksum (post-close) has changed", hdfs.GetFileChecksum (file1snap1), CoreMatchers.Is(snapChksum1)); // Make sure we can read the entire file via its non-snapshot path. fileStatus = hdfs.GetFileStatus(file1); Assert.AssertThat(fileStatus.GetLen(), CoreMatchers.Is((long)origLen + toAppend)); fis = hdfs.Open(file1); bytesRead = fis.Read(0, buffer, 0, buffer.Length); Assert.AssertThat(bytesRead, CoreMatchers.Is(origLen + toAppend)); fis.Close(); // Try to open the file via its snapshot path. fis = hdfs.Open(file1snap1); fileStatus = hdfs.GetFileStatus(file1snap1); Assert.AssertThat(fileStatus.GetLen(), CoreMatchers.Is((long)origLen)); // Make sure we can only read up to the snapshot length. bytesRead = fis.Read(0, buffer, 0, buffer.Length); Assert.AssertThat(bytesRead, CoreMatchers.Is(origLen)); fis.Close(); byte[] dataFromSnapshot_1 = DFSTestUtil.ReadFileBuffer(hdfs, file1snap1); Assert.AssertThat("Wrong data size in snapshot.", dataFromSnapshot_1.Length, CoreMatchers.Is (origLen)); }
/// <summary>Test race between delete operation and commitBlockSynchronization method. /// </summary> /// <remarks> /// Test race between delete operation and commitBlockSynchronization method. /// See HDFS-6825. /// </remarks> /// <param name="hasSnapshot"/> /// <exception cref="System.Exception"/> private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot) { Log.Info("Start testing, hasSnapshot: " + hasSnapshot); AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry <string, bool> >(); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file", false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1" , true)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file" , false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1" , true)); Path rootPath = new Path("/"); Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary <DataNode, DatanodeProtocolClientSideTranslatorPB>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); int stId = 0; foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList) { string testPath = stest.Key; bool mkSameDir = stest.Value; Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot ); Path fPath = new Path(testPath); //find grandest non-root parent Path grandestNonRootParent = fPath; while (!grandestNonRootParent.GetParent().Equals(rootPath)) { grandestNonRootParent = grandestNonRootParent.GetParent(); } stm = fs.Create(fPath); Log.Info("test on " + testPath + " created " + fPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); if (hasSnapshot) { SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString()); ++stId; } // Look into the block manager on the active node for the block // under construction. NameNode nn = cluster.GetNameNode(); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, fPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN]; if (nnSpy == null) { nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn); dnMap[primaryDN] = nnSpy; } // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages fs.RecoverLease(fPath); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Deleting recursively " + grandestNonRootParent); fs.Delete(grandestNonRootParent, true); if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath)) { Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath); fs.Mkdirs(grandestNonRootParent); } delayer.Proceed(); Log.Info("Now wait for result"); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t != null) { Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t); } } // end of loop each fPath Log.Info("Now check we can restart"); cluster.RestartNameNodes(); Log.Info("Restart finished"); } finally { if (stm != null) { IOUtils.CloseStream(stm); } if (cluster != null) { cluster.Shutdown(); } } }