public virtual void TestDisplayRecentEditLogOpCodes() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).EnableManagedDfsDirsRedundancy (false).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); FSImage fsimage = namesystem.GetFSImage(); for (int i = 0; i < 20; i++) { fileSys.Mkdirs(new Path("/tmp/tmp" + i)); } Storage.StorageDirectory sd = fsimage.GetStorage().DirIterator(NNStorage.NameNodeDirType .Edits).Next(); cluster.Shutdown(); FilePath editFile = FSImageTestUtil.FindLatestEditsLog(sd).GetFile(); NUnit.Framework.Assert.IsTrue("Should exist: " + editFile, editFile.Exists()); // Corrupt the edits file. long fileLen = editFile.Length(); RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.Seek(fileLen - 40); for (int i_1 = 0; i_1 < 20; i_1++) { rwf.Write(FSEditLogOpCodes.OpDelete.GetOpCode()); } rwf.Close(); StringBuilder bld = new StringBuilder(); bld.Append("^Error replaying edit log at offset \\d+. "); bld.Append("Expected transaction ID was \\d+\n"); bld.Append("Recent opcode offsets: (\\d+\\s*){4}$"); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); NUnit.Framework.Assert.Fail("should not be able to start"); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("error message contains opcodes message", e.Message .Matches(bld.ToString())); } }
public virtual void TestBackupNodeTailsEdits() { Configuration conf = new HdfsConfiguration(); HAUtil.SetAllowStandbyReads(conf, true); MiniDFSCluster cluster = null; FileSystem fileSys = null; BackupNode backup = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Build(); fileSys = cluster.GetFileSystem(); backup = StartBackupNode(conf, HdfsServerConstants.StartupOption.Backup, 1); BackupImage bnImage = (BackupImage)backup.GetFSImage(); TestBNInSync(cluster, backup, 1); // Force a roll -- BN should roll with NN. NameNode nn = cluster.GetNameNode(); NamenodeProtocols nnRpc = nn.GetRpcServer(); nnRpc.RollEditLog(); NUnit.Framework.Assert.AreEqual(bnImage.GetEditLog().GetCurSegmentTxId(), nn.GetFSImage ().GetEditLog().GetCurSegmentTxId()); // BN should stay in sync after roll TestBNInSync(cluster, backup, 2); long nnImageBefore = nn.GetFSImage().GetStorage().GetMostRecentCheckpointTxId(); // BN checkpoint backup.DoCheckpoint(); // NN should have received a new image long nnImageAfter = nn.GetFSImage().GetStorage().GetMostRecentCheckpointTxId(); NUnit.Framework.Assert.IsTrue("nn should have received new checkpoint. before: " + nnImageBefore + " after: " + nnImageAfter, nnImageAfter > nnImageBefore); // BN should stay in sync after checkpoint TestBNInSync(cluster, backup, 3); // Stop BN Storage.StorageDirectory sd = bnImage.GetStorage().GetStorageDir(0); backup.Stop(); backup = null; // When shutting down the BN, it shouldn't finalize logs that are // still open on the NN FileJournalManager.EditLogFile editsLog = FSImageTestUtil.FindLatestEditsLog(sd); NUnit.Framework.Assert.AreEqual(editsLog.GetFirstTxId(), nn.GetFSImage().GetEditLog ().GetCurSegmentTxId()); NUnit.Framework.Assert.IsTrue("Should not have finalized " + editsLog, editsLog.IsInProgress ()); // do some edits NUnit.Framework.Assert.IsTrue(fileSys.Mkdirs(new Path("/edit-while-bn-down"))); // start a new backup node backup = StartBackupNode(conf, HdfsServerConstants.StartupOption.Backup, 1); TestBNInSync(cluster, backup, 4); NUnit.Framework.Assert.IsNotNull(backup.GetNamesystem().GetFileInfo("/edit-while-bn-down" , false)); } finally { Log.Info("Shutting down..."); if (backup != null) { backup.Stop(); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } AssertStorageDirsMatch(cluster.GetNameNode(), backup); }
/// <exception cref="System.IO.IOException"/> internal static void TestNameNodeRecoveryImpl(TestNameNodeRecovery.Corruptor corruptor , bool finalize) { string TestPath = "/test/path/dir"; string TestPath2 = "/second/dir"; bool needRecovery = corruptor.NeedRecovery(finalize); // start a cluster Configuration conf = new HdfsConfiguration(); SetupRecoveryTestConf(conf); MiniDFSCluster cluster = null; FileSystem fileSys = null; Storage.StorageDirectory sd = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).ManageNameDfsDirs(false ).Build(); cluster.WaitActive(); if (!finalize) { // Normally, the in-progress edit log would be finalized by // FSEditLog#endCurrentLogSegment. For testing purposes, we // disable that here. FSEditLog spyLog = Org.Mockito.Mockito.Spy(cluster.GetNameNode().GetFSImage().GetEditLog ()); Org.Mockito.Mockito.DoNothing().When(spyLog).EndCurrentLogSegment(true); DFSTestUtil.SetEditLogForTesting(cluster.GetNamesystem(), spyLog); } fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); FSImage fsimage = namesystem.GetFSImage(); fileSys.Mkdirs(new Path(TestPath)); fileSys.Mkdirs(new Path(TestPath2)); sd = fsimage.GetStorage().DirIterator(NNStorage.NameNodeDirType.Edits).Next(); } finally { if (cluster != null) { cluster.Shutdown(); } } FilePath editFile = FSImageTestUtil.FindLatestEditsLog(sd).GetFile(); NUnit.Framework.Assert.IsTrue("Should exist: " + editFile, editFile.Exists()); // Corrupt the edit log Log.Info("corrupting edit log file '" + editFile + "'"); corruptor.Corrupt(editFile); // If needRecovery == true, make sure that we can't start the // cluster normally before recovery cluster = null; try { Log.Debug("trying to start normally (this should fail)..."); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); cluster.WaitActive(); cluster.Shutdown(); if (needRecovery) { NUnit.Framework.Assert.Fail("expected the corrupted edit log to prevent normal startup" ); } } catch (IOException e) { if (!needRecovery) { Log.Error("Got unexpected failure with " + corruptor.GetName() + corruptor, e); NUnit.Framework.Assert.Fail("got unexpected exception " + e.Message); } } finally { if (cluster != null) { cluster.Shutdown(); } } // Perform NameNode recovery. // Even if there was nothing wrong previously (needRecovery == false), // this should still work fine. cluster = null; try { Log.Debug("running recovery..."); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).EnableManagedDfsDirsRedundancy (false).Format(false).StartupOption(recoverStartOpt).Build(); } catch (IOException e) { NUnit.Framework.Assert.Fail("caught IOException while trying to recover. " + "message was " + e.Message + "\nstack trace\n" + StringUtils.StringifyException(e)); } finally { if (cluster != null) { cluster.Shutdown(); } } // Make sure that we can start the cluster normally after recovery cluster = null; try { Log.Debug("starting cluster normally after recovery..."); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); Log.Debug("successfully recovered the " + corruptor.GetName() + " corrupted edit log" ); cluster.WaitActive(); NUnit.Framework.Assert.IsTrue(cluster.GetFileSystem().Exists(new Path(TestPath))); } catch (IOException e) { NUnit.Framework.Assert.Fail("failed to recover. Error message: " + e.Message); } finally { if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestSaveNamespace() { DistributedFileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeDelegationTokenAlwaysUseKey, true); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); string renewer = UserGroupInformation.GetLoginUser().GetUserName(); Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token1 = namesystem .GetDelegationToken(new Text(renewer)); Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token2 = namesystem .GetDelegationToken(new Text(renewer)); // Saving image without safe mode should fail DFSAdmin admin = new DFSAdmin(conf); string[] args = new string[] { "-saveNamespace" }; // verify that the edits file is NOT empty NameNode nn = cluster.GetNameNode(); foreach (Storage.StorageDirectory sd in nn.GetFSImage().GetStorage().DirIterable( null)) { FileJournalManager.EditLogFile log = FSImageTestUtil.FindLatestEditsLog(sd); NUnit.Framework.Assert.IsTrue(log.IsInProgress()); log.ValidateLog(); long numTransactions = (log.GetLastTxId() - log.GetFirstTxId()) + 1; NUnit.Framework.Assert.AreEqual("In-progress log " + log + " should have 5 transactions" , 5, numTransactions); } // Saving image in safe mode should succeed fs.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { admin.Run(args); } catch (Exception e) { throw new IOException(e.Message); } // verify that the edits file is empty except for the START txn foreach (Storage.StorageDirectory sd_1 in nn.GetFSImage().GetStorage().DirIterable (null)) { FileJournalManager.EditLogFile log = FSImageTestUtil.FindLatestEditsLog(sd_1); NUnit.Framework.Assert.IsTrue(log.IsInProgress()); log.ValidateLog(); long numTransactions = (log.GetLastTxId() - log.GetFirstTxId()) + 1; NUnit.Framework.Assert.AreEqual("In-progress log " + log + " should only have START txn" , 1, numTransactions); } // restart cluster cluster.Shutdown(); cluster = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes).Format(false ).Build(); cluster.WaitActive(); //Should be able to renew & cancel the delegation token after cluster restart try { RenewToken(token1); RenewToken(token2); } catch (IOException) { NUnit.Framework.Assert.Fail("Could not renew or cancel the token"); } namesystem = cluster.GetNamesystem(); Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token3 = namesystem .GetDelegationToken(new Text(renewer)); Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token4 = namesystem .GetDelegationToken(new Text(renewer)); // restart cluster again cluster.Shutdown(); cluster = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes).Format(false ).Build(); cluster.WaitActive(); namesystem = cluster.GetNamesystem(); Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token5 = namesystem .GetDelegationToken(new Text(renewer)); try { RenewToken(token1); RenewToken(token2); RenewToken(token3); RenewToken(token4); RenewToken(token5); } catch (IOException) { NUnit.Framework.Assert.Fail("Could not renew or cancel the token"); } // restart cluster again cluster.Shutdown(); cluster = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes).Format(false ).Build(); cluster.WaitActive(); namesystem = cluster.GetNamesystem(); try { RenewToken(token1); CancelToken(token1); RenewToken(token2); CancelToken(token2); RenewToken(token3); CancelToken(token3); RenewToken(token4); CancelToken(token4); RenewToken(token5); CancelToken(token5); } catch (IOException) { NUnit.Framework.Assert.Fail("Could not renew or cancel the token"); } } finally { if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }