/// <exception cref="System.IO.IOException"/> internal virtual BackupNode StartBackupNode(Configuration conf, HdfsServerConstants.StartupOption startupOpt, int idx) { Configuration c = new HdfsConfiguration(conf); string dirs = GetBackupNodeDir(startupOpt, idx); c.Set(DFSConfigKeys.DfsNamenodeNameDirKey, dirs); c.Set(DFSConfigKeys.DfsNamenodeEditsDirKey, "${" + DFSConfigKeys.DfsNamenodeNameDirKey + "}"); c.Set(DFSConfigKeys.DfsNamenodeBackupAddressKey, "127.0.0.1:0"); c.Set(DFSConfigKeys.DfsNamenodeBackupHttpAddressKey, "127.0.0.1:0"); BackupNode bn = (BackupNode)NameNode.CreateNameNode(new string[] { startupOpt.GetName () }, c); NUnit.Framework.Assert.IsTrue(bn.GetRole() + " must be in SafeMode.", bn.IsInSafeMode ()); NUnit.Framework.Assert.IsTrue(bn.GetRole() + " must be in StandbyState", Sharpen.Runtime.EqualsIgnoreCase (bn.GetNamesystem().GetHAState(), HAServiceProtocol.HAServiceState.Standby.ToString ())); return(bn); }
/// <exception cref="System.Exception"/> internal virtual void TestCheckpoint(HdfsServerConstants.StartupOption op) { Path file1 = new Path("/checkpoint.dat"); Path file2 = new Path("/checkpoint2.dat"); Path file3 = new Path("/backup.dat"); Configuration conf = new HdfsConfiguration(); HAUtil.SetAllowStandbyReads(conf, true); short replication = (short)conf.GetInt("dfs.replication", 3); int numDatanodes = Math.Max(3, replication); conf.Set(DFSConfigKeys.DfsNamenodeBackupHttpAddressKey, "localhost:0"); conf.Set(DFSConfigKeys.DfsBlockreportInitialDelayKey, "0"); conf.SetInt(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, -1); // disable block scanner conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointTxnsKey, 1); MiniDFSCluster cluster = null; FileSystem fileSys = null; BackupNode backup = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Build(); fileSys = cluster.GetFileSystem(); // // verify that 'format' really blew away all pre-existing files // NUnit.Framework.Assert.IsTrue(!fileSys.Exists(file1)); NUnit.Framework.Assert.IsTrue(!fileSys.Exists(file2)); // // Create file1 // NUnit.Framework.Assert.IsTrue(fileSys.Mkdirs(file1)); // // Take a checkpoint // long txid = cluster.GetNameNodeRpc().GetTransactionID(); backup = StartBackupNode(conf, op, 1); WaitCheckpointDone(cluster, txid); } catch (IOException e) { Log.Error("Error in TestBackupNode:", e); NUnit.Framework.Assert.IsTrue(e.GetLocalizedMessage(), false); } finally { if (backup != null) { backup.Stop(); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } FilePath nnCurDir = new FilePath(BaseDir, "name1/current/"); FilePath bnCurDir = new FilePath(GetBackupNodeDir(op, 1), "/current/"); FSImageTestUtil.AssertParallelFilesAreIdentical(ImmutableList.Of(bnCurDir, nnCurDir ), ImmutableSet.Of <string>("VERSION")); try { // // Restart cluster and verify that file1 still exist. // cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes).Format(false ).Build(); fileSys = cluster.GetFileSystem(); // check that file1 still exists NUnit.Framework.Assert.IsTrue(fileSys.Exists(file1)); fileSys.Delete(file1, true); // create new file file2 fileSys.Mkdirs(file2); // // Take a checkpoint // long txid = cluster.GetNameNodeRpc().GetTransactionID(); backup = StartBackupNode(conf, op, 1); WaitCheckpointDone(cluster, txid); for (int i = 0; i < 10; i++) { fileSys.Mkdirs(new Path("file_" + i)); } txid = cluster.GetNameNodeRpc().GetTransactionID(); backup.DoCheckpoint(); WaitCheckpointDone(cluster, txid); txid = cluster.GetNameNodeRpc().GetTransactionID(); backup.DoCheckpoint(); WaitCheckpointDone(cluster, txid); // Try BackupNode operations IPEndPoint add = backup.GetNameNodeAddress(); // Write to BN FileSystem bnFS = FileSystem.Get(new Path("hdfs://" + NetUtils.GetHostPortString( add)).ToUri(), conf); bool canWrite = true; try { Org.Apache.Hadoop.Hdfs.Server.Namenode.TestCheckpoint.WriteFile(bnFS, file3, replication ); } catch (IOException eio) { Log.Info("Write to " + backup.GetRole() + " failed as expected: ", eio); canWrite = false; } NUnit.Framework.Assert.IsFalse("Write to BackupNode must be prohibited.", canWrite ); // Reads are allowed for BackupNode, but not for CheckpointNode bool canRead = true; try { bnFS.Exists(file2); } catch (IOException eio) { Log.Info("Read from " + backup.GetRole() + " failed: ", eio); canRead = false; } NUnit.Framework.Assert.AreEqual("Reads to BackupNode are allowed, but not CheckpointNode." , canRead, backup.IsRole(HdfsServerConstants.NamenodeRole.Backup)); Org.Apache.Hadoop.Hdfs.Server.Namenode.TestCheckpoint.WriteFile(fileSys, file3, replication ); Org.Apache.Hadoop.Hdfs.Server.Namenode.TestCheckpoint.CheckFile(fileSys, file3, replication ); // should also be on BN right away NUnit.Framework.Assert.IsTrue("file3 does not exist on BackupNode", op != HdfsServerConstants.StartupOption .Backup || backup.GetNamesystem().GetFileInfo(file3.ToUri().GetPath(), false) != null); } catch (IOException e) { Log.Error("Error in TestBackupNode:", e); throw new Exception(e); } finally { if (backup != null) { backup.Stop(); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } FSImageTestUtil.AssertParallelFilesAreIdentical(ImmutableList.Of(bnCurDir, nnCurDir ), ImmutableSet.Of <string>("VERSION")); try { // // Restart cluster and verify that file2 exists and // file1 does not exist. // cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Format(false).Build(); fileSys = cluster.GetFileSystem(); NUnit.Framework.Assert.IsTrue(!fileSys.Exists(file1)); // verify that file2 exists NUnit.Framework.Assert.IsTrue(fileSys.Exists(file2)); } catch (IOException e) { Log.Error("Error in TestBackupNode: ", e); NUnit.Framework.Assert.IsTrue(e.GetLocalizedMessage(), false); } finally { fileSys.Close(); cluster.Shutdown(); } }
/// <summary>Create a new checkpoint</summary> /// <exception cref="System.IO.IOException"/> internal virtual void DoCheckpoint() { BackupImage bnImage = GetFSImage(); NNStorage bnStorage = bnImage.GetStorage(); long startTime = Time.MonotonicNow(); bnImage.FreezeNamespaceAtNextRoll(); NamenodeCommand cmd = GetRemoteNamenodeProxy().StartCheckpoint(backupNode.GetRegistration ()); CheckpointCommand cpCmd = null; switch (cmd.GetAction()) { case NamenodeProtocol.ActShutdown: { Shutdown(); throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown." ); } case NamenodeProtocol.ActCheckpoint: { cpCmd = (CheckpointCommand)cmd; break; } default: { throw new IOException("Unsupported NamenodeCommand: " + cmd.GetAction()); } } bnImage.WaitUntilNamespaceFrozen(); CheckpointSignature sig = cpCmd.GetSignature(); // Make sure we're talking to the same NN! sig.ValidateStorageInfo(bnImage); long lastApplied = bnImage.GetLastAppliedTxId(); Log.Debug("Doing checkpoint. Last applied: " + lastApplied); RemoteEditLogManifest manifest = GetRemoteNamenodeProxy().GetEditLogManifest(bnImage .GetLastAppliedTxId() + 1); bool needReloadImage = false; if (!manifest.GetLogs().IsEmpty()) { RemoteEditLog firstRemoteLog = manifest.GetLogs()[0]; // we don't have enough logs to roll forward using only logs. Need // to download and load the image. if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { Log.Info("Unable to roll forward using only logs. Downloading " + "image with txid " + sig.mostRecentCheckpointTxId); MD5Hash downloadedHash = TransferFsImage.DownloadImageToStorage(backupNode.nnHttpAddress , sig.mostRecentCheckpointTxId, bnStorage, true); bnImage.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId , downloadedHash); lastApplied = sig.mostRecentCheckpointTxId; needReloadImage = true; } if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { throw new IOException("No logs to roll forward from " + lastApplied); } // get edits files foreach (RemoteEditLog log in manifest.GetLogs()) { TransferFsImage.DownloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage); } if (needReloadImage) { Log.Info("Loading image with txid " + sig.mostRecentCheckpointTxId); FilePath file = bnStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); bnImage.ReloadFromImageFile(file, backupNode.GetNamesystem()); } RollForwardByApplyingLogs(manifest, bnImage, backupNode.GetNamesystem()); } long txid = bnImage.GetLastAppliedTxId(); backupNode.namesystem.WriteLock(); try { backupNode.namesystem.SetImageLoaded(); if (backupNode.namesystem.GetBlocksTotal() > 0) { backupNode.namesystem.SetBlockTotal(); } bnImage.SaveFSImageInAllDirs(backupNode.GetNamesystem(), txid); bnStorage.WriteAll(); } finally { backupNode.namesystem.WriteUnlock(); } if (cpCmd.NeedToReturnImage()) { TransferFsImage.UploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage, NNStorage.NameNodeFile.Image, txid); } GetRemoteNamenodeProxy().EndCheckpoint(backupNode.GetRegistration(), sig); if (backupNode.GetRole() == HdfsServerConstants.NamenodeRole.Backup) { bnImage.ConvergeJournalSpool(); } backupNode.SetRegistration(); // keep registration up to date long imageSize = bnImage.GetStorage().GetFsImageName(txid).Length(); Log.Info("Checkpoint completed in " + (Time.MonotonicNow() - startTime) / 1000 + " seconds." + " New Image Size: " + imageSize); }