Esempio n. 1
0
        /// <exception cref="System.IO.IOException"/>
        internal virtual BackupNode StartBackupNode(Configuration conf, HdfsServerConstants.StartupOption
                                                    startupOpt, int idx)
        {
            Configuration c    = new HdfsConfiguration(conf);
            string        dirs = GetBackupNodeDir(startupOpt, idx);

            c.Set(DFSConfigKeys.DfsNamenodeNameDirKey, dirs);
            c.Set(DFSConfigKeys.DfsNamenodeEditsDirKey, "${" + DFSConfigKeys.DfsNamenodeNameDirKey
                  + "}");
            c.Set(DFSConfigKeys.DfsNamenodeBackupAddressKey, "127.0.0.1:0");
            c.Set(DFSConfigKeys.DfsNamenodeBackupHttpAddressKey, "127.0.0.1:0");
            BackupNode bn = (BackupNode)NameNode.CreateNameNode(new string[] { startupOpt.GetName
                                                                                   () }, c);

            NUnit.Framework.Assert.IsTrue(bn.GetRole() + " must be in SafeMode.", bn.IsInSafeMode
                                              ());
            NUnit.Framework.Assert.IsTrue(bn.GetRole() + " must be in StandbyState", Sharpen.Runtime.EqualsIgnoreCase
                                              (bn.GetNamesystem().GetHAState(), HAServiceProtocol.HAServiceState.Standby.ToString
                                                  ()));
            return(bn);
        }
Esempio n. 2
0
        public virtual void TestBackupNodeTailsEdits()
        {
            Configuration conf = new HdfsConfiguration();

            HAUtil.SetAllowStandbyReads(conf, true);
            MiniDFSCluster cluster = null;
            FileSystem     fileSys = null;
            BackupNode     backup  = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Build();
                fileSys = cluster.GetFileSystem();
                backup  = StartBackupNode(conf, HdfsServerConstants.StartupOption.Backup, 1);
                BackupImage bnImage = (BackupImage)backup.GetFSImage();
                TestBNInSync(cluster, backup, 1);
                // Force a roll -- BN should roll with NN.
                NameNode          nn    = cluster.GetNameNode();
                NamenodeProtocols nnRpc = nn.GetRpcServer();
                nnRpc.RollEditLog();
                NUnit.Framework.Assert.AreEqual(bnImage.GetEditLog().GetCurSegmentTxId(), nn.GetFSImage
                                                    ().GetEditLog().GetCurSegmentTxId());
                // BN should stay in sync after roll
                TestBNInSync(cluster, backup, 2);
                long nnImageBefore = nn.GetFSImage().GetStorage().GetMostRecentCheckpointTxId();
                // BN checkpoint
                backup.DoCheckpoint();
                // NN should have received a new image
                long nnImageAfter = nn.GetFSImage().GetStorage().GetMostRecentCheckpointTxId();
                NUnit.Framework.Assert.IsTrue("nn should have received new checkpoint. before: "
                                              + nnImageBefore + " after: " + nnImageAfter, nnImageAfter > nnImageBefore);
                // BN should stay in sync after checkpoint
                TestBNInSync(cluster, backup, 3);
                // Stop BN
                Storage.StorageDirectory sd = bnImage.GetStorage().GetStorageDir(0);
                backup.Stop();
                backup = null;
                // When shutting down the BN, it shouldn't finalize logs that are
                // still open on the NN
                FileJournalManager.EditLogFile editsLog = FSImageTestUtil.FindLatestEditsLog(sd);
                NUnit.Framework.Assert.AreEqual(editsLog.GetFirstTxId(), nn.GetFSImage().GetEditLog
                                                    ().GetCurSegmentTxId());
                NUnit.Framework.Assert.IsTrue("Should not have finalized " + editsLog, editsLog.IsInProgress
                                                  ());
                // do some edits
                NUnit.Framework.Assert.IsTrue(fileSys.Mkdirs(new Path("/edit-while-bn-down")));
                // start a new backup node
                backup = StartBackupNode(conf, HdfsServerConstants.StartupOption.Backup, 1);
                TestBNInSync(cluster, backup, 4);
                NUnit.Framework.Assert.IsNotNull(backup.GetNamesystem().GetFileInfo("/edit-while-bn-down"
                                                                                    , false));
            }
            finally
            {
                Log.Info("Shutting down...");
                if (backup != null)
                {
                    backup.Stop();
                }
                if (fileSys != null)
                {
                    fileSys.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
            AssertStorageDirsMatch(cluster.GetNameNode(), backup);
        }
Esempio n. 3
0
        /// <exception cref="System.Exception"/>
        internal virtual void TestCheckpoint(HdfsServerConstants.StartupOption op)
        {
            Path          file1 = new Path("/checkpoint.dat");
            Path          file2 = new Path("/checkpoint2.dat");
            Path          file3 = new Path("/backup.dat");
            Configuration conf  = new HdfsConfiguration();

            HAUtil.SetAllowStandbyReads(conf, true);
            short replication  = (short)conf.GetInt("dfs.replication", 3);
            int   numDatanodes = Math.Max(3, replication);

            conf.Set(DFSConfigKeys.DfsNamenodeBackupHttpAddressKey, "localhost:0");
            conf.Set(DFSConfigKeys.DfsBlockreportInitialDelayKey, "0");
            conf.SetInt(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, -1);
            // disable block scanner
            conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointTxnsKey, 1);
            MiniDFSCluster cluster = null;
            FileSystem     fileSys = null;
            BackupNode     backup  = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Build();
                fileSys = cluster.GetFileSystem();
                //
                // verify that 'format' really blew away all pre-existing files
                //
                NUnit.Framework.Assert.IsTrue(!fileSys.Exists(file1));
                NUnit.Framework.Assert.IsTrue(!fileSys.Exists(file2));
                //
                // Create file1
                //
                NUnit.Framework.Assert.IsTrue(fileSys.Mkdirs(file1));
                //
                // Take a checkpoint
                //
                long txid = cluster.GetNameNodeRpc().GetTransactionID();
                backup = StartBackupNode(conf, op, 1);
                WaitCheckpointDone(cluster, txid);
            }
            catch (IOException e)
            {
                Log.Error("Error in TestBackupNode:", e);
                NUnit.Framework.Assert.IsTrue(e.GetLocalizedMessage(), false);
            }
            finally
            {
                if (backup != null)
                {
                    backup.Stop();
                }
                if (fileSys != null)
                {
                    fileSys.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
            FilePath nnCurDir = new FilePath(BaseDir, "name1/current/");
            FilePath bnCurDir = new FilePath(GetBackupNodeDir(op, 1), "/current/");

            FSImageTestUtil.AssertParallelFilesAreIdentical(ImmutableList.Of(bnCurDir, nnCurDir
                                                                             ), ImmutableSet.Of <string>("VERSION"));
            try
            {
                //
                // Restart cluster and verify that file1 still exist.
                //
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes).Format(false
                                                                                             ).Build();
                fileSys = cluster.GetFileSystem();
                // check that file1 still exists
                NUnit.Framework.Assert.IsTrue(fileSys.Exists(file1));
                fileSys.Delete(file1, true);
                // create new file file2
                fileSys.Mkdirs(file2);
                //
                // Take a checkpoint
                //
                long txid = cluster.GetNameNodeRpc().GetTransactionID();
                backup = StartBackupNode(conf, op, 1);
                WaitCheckpointDone(cluster, txid);
                for (int i = 0; i < 10; i++)
                {
                    fileSys.Mkdirs(new Path("file_" + i));
                }
                txid = cluster.GetNameNodeRpc().GetTransactionID();
                backup.DoCheckpoint();
                WaitCheckpointDone(cluster, txid);
                txid = cluster.GetNameNodeRpc().GetTransactionID();
                backup.DoCheckpoint();
                WaitCheckpointDone(cluster, txid);
                // Try BackupNode operations
                IPEndPoint add = backup.GetNameNodeAddress();
                // Write to BN
                FileSystem bnFS = FileSystem.Get(new Path("hdfs://" + NetUtils.GetHostPortString(
                                                              add)).ToUri(), conf);
                bool canWrite = true;
                try
                {
                    Org.Apache.Hadoop.Hdfs.Server.Namenode.TestCheckpoint.WriteFile(bnFS, file3, replication
                                                                                    );
                }
                catch (IOException eio)
                {
                    Log.Info("Write to " + backup.GetRole() + " failed as expected: ", eio);
                    canWrite = false;
                }
                NUnit.Framework.Assert.IsFalse("Write to BackupNode must be prohibited.", canWrite
                                               );
                // Reads are allowed for BackupNode, but not for CheckpointNode
                bool canRead = true;
                try
                {
                    bnFS.Exists(file2);
                }
                catch (IOException eio)
                {
                    Log.Info("Read from " + backup.GetRole() + " failed: ", eio);
                    canRead = false;
                }
                NUnit.Framework.Assert.AreEqual("Reads to BackupNode are allowed, but not CheckpointNode."
                                                , canRead, backup.IsRole(HdfsServerConstants.NamenodeRole.Backup));
                Org.Apache.Hadoop.Hdfs.Server.Namenode.TestCheckpoint.WriteFile(fileSys, file3, replication
                                                                                );
                Org.Apache.Hadoop.Hdfs.Server.Namenode.TestCheckpoint.CheckFile(fileSys, file3, replication
                                                                                );
                // should also be on BN right away
                NUnit.Framework.Assert.IsTrue("file3 does not exist on BackupNode", op != HdfsServerConstants.StartupOption
                                              .Backup || backup.GetNamesystem().GetFileInfo(file3.ToUri().GetPath(), false) !=
                                              null);
            }
            catch (IOException e)
            {
                Log.Error("Error in TestBackupNode:", e);
                throw new Exception(e);
            }
            finally
            {
                if (backup != null)
                {
                    backup.Stop();
                }
                if (fileSys != null)
                {
                    fileSys.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
            FSImageTestUtil.AssertParallelFilesAreIdentical(ImmutableList.Of(bnCurDir, nnCurDir
                                                                             ), ImmutableSet.Of <string>("VERSION"));
            try
            {
                //
                // Restart cluster and verify that file2 exists and
                // file1 does not exist.
                //
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Format(false).Build();
                fileSys = cluster.GetFileSystem();
                NUnit.Framework.Assert.IsTrue(!fileSys.Exists(file1));
                // verify that file2 exists
                NUnit.Framework.Assert.IsTrue(fileSys.Exists(file2));
            }
            catch (IOException e)
            {
                Log.Error("Error in TestBackupNode: ", e);
                NUnit.Framework.Assert.IsTrue(e.GetLocalizedMessage(), false);
            }
            finally
            {
                fileSys.Close();
                cluster.Shutdown();
            }
        }
Esempio n. 4
0
        /// <summary>Create a new checkpoint</summary>
        /// <exception cref="System.IO.IOException"/>
        internal virtual void DoCheckpoint()
        {
            BackupImage bnImage   = GetFSImage();
            NNStorage   bnStorage = bnImage.GetStorage();
            long        startTime = Time.MonotonicNow();

            bnImage.FreezeNamespaceAtNextRoll();
            NamenodeCommand cmd = GetRemoteNamenodeProxy().StartCheckpoint(backupNode.GetRegistration
                                                                               ());
            CheckpointCommand cpCmd = null;

            switch (cmd.GetAction())
            {
            case NamenodeProtocol.ActShutdown:
            {
                Shutdown();
                throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown."
                                      );
            }

            case NamenodeProtocol.ActCheckpoint:
            {
                cpCmd = (CheckpointCommand)cmd;
                break;
            }

            default:
            {
                throw new IOException("Unsupported NamenodeCommand: " + cmd.GetAction());
            }
            }
            bnImage.WaitUntilNamespaceFrozen();
            CheckpointSignature sig = cpCmd.GetSignature();

            // Make sure we're talking to the same NN!
            sig.ValidateStorageInfo(bnImage);
            long lastApplied = bnImage.GetLastAppliedTxId();

            Log.Debug("Doing checkpoint. Last applied: " + lastApplied);
            RemoteEditLogManifest manifest = GetRemoteNamenodeProxy().GetEditLogManifest(bnImage
                                                                                         .GetLastAppliedTxId() + 1);
            bool needReloadImage = false;

            if (!manifest.GetLogs().IsEmpty())
            {
                RemoteEditLog firstRemoteLog = manifest.GetLogs()[0];
                // we don't have enough logs to roll forward using only logs. Need
                // to download and load the image.
                if (firstRemoteLog.GetStartTxId() > lastApplied + 1)
                {
                    Log.Info("Unable to roll forward using only logs. Downloading " + "image with txid "
                             + sig.mostRecentCheckpointTxId);
                    MD5Hash downloadedHash = TransferFsImage.DownloadImageToStorage(backupNode.nnHttpAddress
                                                                                    , sig.mostRecentCheckpointTxId, bnStorage, true);
                    bnImage.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId
                                                               , downloadedHash);
                    lastApplied     = sig.mostRecentCheckpointTxId;
                    needReloadImage = true;
                }
                if (firstRemoteLog.GetStartTxId() > lastApplied + 1)
                {
                    throw new IOException("No logs to roll forward from " + lastApplied);
                }
                // get edits files
                foreach (RemoteEditLog log in manifest.GetLogs())
                {
                    TransferFsImage.DownloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage);
                }
                if (needReloadImage)
                {
                    Log.Info("Loading image with txid " + sig.mostRecentCheckpointTxId);
                    FilePath file = bnStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId
                                                            );
                    bnImage.ReloadFromImageFile(file, backupNode.GetNamesystem());
                }
                RollForwardByApplyingLogs(manifest, bnImage, backupNode.GetNamesystem());
            }
            long txid = bnImage.GetLastAppliedTxId();

            backupNode.namesystem.WriteLock();
            try
            {
                backupNode.namesystem.SetImageLoaded();
                if (backupNode.namesystem.GetBlocksTotal() > 0)
                {
                    backupNode.namesystem.SetBlockTotal();
                }
                bnImage.SaveFSImageInAllDirs(backupNode.GetNamesystem(), txid);
                bnStorage.WriteAll();
            }
            finally
            {
                backupNode.namesystem.WriteUnlock();
            }
            if (cpCmd.NeedToReturnImage())
            {
                TransferFsImage.UploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage,
                                                       NNStorage.NameNodeFile.Image, txid);
            }
            GetRemoteNamenodeProxy().EndCheckpoint(backupNode.GetRegistration(), sig);
            if (backupNode.GetRole() == HdfsServerConstants.NamenodeRole.Backup)
            {
                bnImage.ConvergeJournalSpool();
            }
            backupNode.SetRegistration();
            // keep registration up to date
            long imageSize = bnImage.GetStorage().GetFsImageName(txid).Length();

            Log.Info("Checkpoint completed in " + (Time.MonotonicNow() - startTime) / 1000 +
                     " seconds." + " New Image Size: " + imageSize);
        }