public virtual void TestOpenFilesWithRename()
        {
            Path path = new Path("/test");

            DoWriteAndAbort(fs, path);
            // check for zero sized blocks
            Path fileWithEmptyBlock = new Path("/test/test/test4");

            fs.Create(fileWithEmptyBlock);
            NamenodeProtocols nameNodeRpc = cluster.GetNameNodeRpc();
            string            clientName  = fs.GetClient().GetClientName();

            // create one empty block
            nameNodeRpc.AddBlock(fileWithEmptyBlock.ToString(), clientName, null, null, INodeId
                                 .GrandfatherInodeId, null);
            fs.CreateSnapshot(path, "s2");
            fs.Rename(new Path("/test/test"), new Path("/test/test-renamed"));
            fs.Delete(new Path("/test/test-renamed"), true);
            NameNode nameNode = cluster.GetNameNode();

            NameNodeAdapter.EnterSafeMode(nameNode, false);
            NameNodeAdapter.SaveNamespace(nameNode);
            NameNodeAdapter.LeaveSafeMode(nameNode);
            cluster.RestartNameNode(true);
        }
Beispiel #2
0
        public virtual void TestEnterSafeModeInSBNShouldNotThrowNPE()
        {
            Banner("Starting with NN0 active and NN1 standby, creating some blocks");
            DFSTestUtil.CreateFile(fs, new Path("/test"), 3 * BlockSize, (short)3, 1L);
            // Roll edit log so that, when the SBN restarts, it will load
            // the namespace during startup and enter safemode.
            nn0.GetRpcServer().RollEditLog();
            Banner("Creating some blocks that won't be in the edit log");
            DFSTestUtil.CreateFile(fs, new Path("/test2"), 5 * BlockSize, (short)3, 1L);
            Banner("Deleting the original blocks");
            fs.Delete(new Path("/test"), true);
            Banner("Restarting standby");
            RestartStandby();
            FSNamesystem namesystem = nn1.GetNamesystem();
            string       status     = namesystem.GetSafemode();

            NUnit.Framework.Assert.IsTrue("Bad safemode status: '" + status + "'", status.StartsWith
                                              ("Safe mode is ON."));
            NameNodeAdapter.EnterSafeMode(nn1, false);
            NUnit.Framework.Assert.IsTrue("Failed to enter into safemode in standby", namesystem
                                          .IsInSafeMode());
            NameNodeAdapter.EnterSafeMode(nn1, false);
            NUnit.Framework.Assert.IsTrue("Failed to enter into safemode in standby", namesystem
                                          .IsInSafeMode());
        }
Beispiel #3
0
        /// <summary>Make sure the client retries when the active NN is in safemode</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestClientRetrySafeMode()
        {
            IDictionary <Path, bool> results = Collections.SynchronizedMap(new Dictionary <Path
                                                                                           , bool>());
            Path test = new Path("/test");

            // let nn0 enter safemode
            NameNodeAdapter.EnterSafeMode(nn0, false);
            FSNamesystem.SafeModeInfo safeMode = (FSNamesystem.SafeModeInfo)Whitebox.GetInternalState
                                                     (nn0.GetNamesystem(), "safeMode");
            Whitebox.SetInternalState(safeMode, "extension", Sharpen.Extensions.ValueOf(30000
                                                                                        ));
            Log.Info("enter safemode");
            new _Thread_133(this, test, results).Start();
            // make sure the client's call has actually been handled by the active NN
            NUnit.Framework.Assert.IsFalse("The directory should not be created while NN in safemode"
                                           , fs.Exists(test));
            Sharpen.Thread.Sleep(1000);
            // let nn0 leave safemode
            NameNodeAdapter.LeaveSafeMode(nn0);
            Log.Info("leave safemode");
            lock (this)
            {
                while (!results.Contains(test))
                {
                    Sharpen.Runtime.Wait(this);
                }
                NUnit.Framework.Assert.IsTrue(results[test]);
            }
        }
Beispiel #4
0
 public virtual void TestGetServiceState()
 {
     NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn1"));
     NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn2"));
     cluster.TransitionToActive(0);
     NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn1"));
     NameNodeAdapter.EnterSafeMode(cluster.GetNameNode(0), false);
     NUnit.Framework.Assert.AreEqual(0, RunTool("-getServiceState", "nn1"));
 }
Beispiel #5
0
 public virtual void TestTryFailoverToSafeMode()
 {
     conf.Set(DFSConfigKeys.DfsHaFenceMethodsKey, TestDFSHAAdmin.GetFencerTrueCommand(
                  ));
     tool.SetConf(conf);
     NameNodeAdapter.EnterSafeMode(cluster.GetNameNode(0), false);
     NUnit.Framework.Assert.AreEqual(-1, RunTool("-failover", "nn2", "nn1"));
     NUnit.Framework.Assert.IsTrue("Bad output: " + errOutput, errOutput.Contains("is not ready to become active: "
                                                                                  + "The NameNode is in safemode"));
 }
Beispiel #6
0
        public virtual void TestDnFencing()
        {
            // Create a file with replication level 3.
            DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)3, 1L);
            ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, TestFilePath);

            // Drop its replication count to 1, so it becomes over-replicated.
            // Then compute the invalidation of the extra blocks and trigger
            // heartbeats so the invalidations are flushed to the DNs.
            nn1.GetRpcServer().SetReplication(TestFile, (short)1);
            BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager(
                                                             ));
            cluster.TriggerHeartbeats();
            // Transition nn2 to active even though nn1 still thinks it's active.
            Banner("Failing to NN2 but let NN1 continue to think it's active");
            NameNodeAdapter.AbortEditLogs(nn1);
            NameNodeAdapter.EnterSafeMode(nn1, false);
            cluster.TransitionToActive(1);
            // Check that the standby picked up the replication change.
            NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication
                                                ());
            // Dump some info for debugging purposes.
            Banner("NN2 Metadata immediately after failover");
            DoMetasave(nn2);
            Banner("Triggering heartbeats and block reports so that fencing is completed");
            cluster.TriggerHeartbeats();
            cluster.TriggerBlockReports();
            Banner("Metadata after nodes have all block-reported");
            DoMetasave(nn2);
            // Force a rescan of postponedMisreplicatedBlocks.
            BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager();

            BlockManagerTestUtil.CheckHeartbeat(nn2BM);
            BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM);
            // The blocks should no longer be postponed.
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks
                                                ());
            // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            cluster.TriggerHeartbeats();
            HATestUtil.WaitForDNDeletions(cluster);
            cluster.TriggerDeletionReports();
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks()
                                            );
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks
                                                ());
            Banner("Making sure the file is still readable");
            FileSystem fs2 = cluster.GetFileSystem(1);

            DFSTestUtil.ReadFile(fs2, TestFilePath);
            Banner("Waiting for the actual block files to get deleted from DNs.");
            WaitForTrueReplication(cluster, block, 1);
        }
        /// <summary>Restart the cluster, optionally saving a new checkpoint.</summary>
        /// <param name="checkpoint">boolean true to save a new checkpoint</param>
        /// <exception cref="System.Exception">if restart fails</exception>
        private static void Restart(bool checkpoint)
        {
            NameNode nameNode = cluster.GetNameNode();

            if (checkpoint)
            {
                NameNodeAdapter.EnterSafeMode(nameNode, false);
                NameNodeAdapter.SaveNamespace(nameNode);
            }
            Shutdown();
            InitCluster(false);
        }
Beispiel #8
0
 public virtual void TestBlocksDeletedInEditLog()
 {
     Banner("Starting with NN0 active and NN1 standby, creating some blocks");
     // Make 4 blocks persisted in the image.
     DFSTestUtil.CreateFile(fs, new Path("/test"), 4 * BlockSize, (short)3, 1L);
     NameNodeAdapter.EnterSafeMode(nn0, false);
     NameNodeAdapter.SaveNamespace(nn0);
     NameNodeAdapter.LeaveSafeMode(nn0);
     // OP_ADD for 2 blocks
     DFSTestUtil.CreateFile(fs, new Path("/test2"), 2 * BlockSize, (short)3, 1L);
     // OP_DELETE for 4 blocks
     fs.Delete(new Path("/test"), true);
     RestartActive();
 }
        /// <exception cref="System.IO.IOException"/>
        private void DoTestMultipleSnapshots(bool saveNamespace)
        {
            Path path = new Path("/test");

            DoWriteAndAbort(fs, path);
            fs.CreateSnapshot(path, "s2");
            fs.Delete(new Path("/test/test"), true);
            fs.DeleteSnapshot(path, "s2");
            cluster.TriggerBlockReports();
            if (saveNamespace)
            {
                NameNode nameNode = cluster.GetNameNode();
                NameNodeAdapter.EnterSafeMode(nameNode, false);
                NameNodeAdapter.SaveNamespace(nameNode);
                NameNodeAdapter.LeaveSafeMode(nameNode);
            }
            cluster.RestartNameNode(true);
        }
Beispiel #10
0
        public virtual void TestEnterSafeModeInANNShouldNotThrowNPE()
        {
            Banner("Restarting active");
            DFSTestUtil.CreateFile(fs, new Path("/test"), 3 * BlockSize, (short)3, 1L);
            RestartActive();
            nn0.GetRpcServer().TransitionToActive(new HAServiceProtocol.StateChangeRequestInfo
                                                      (HAServiceProtocol.RequestSource.RequestByUser));
            FSNamesystem namesystem = nn0.GetNamesystem();
            string       status     = namesystem.GetSafemode();

            NUnit.Framework.Assert.IsTrue("Bad safemode status: '" + status + "'", status.StartsWith
                                              ("Safe mode is ON."));
            NameNodeAdapter.EnterSafeMode(nn0, false);
            NUnit.Framework.Assert.IsTrue("Failed to enter into safemode in active", namesystem
                                          .IsInSafeMode());
            NameNodeAdapter.EnterSafeMode(nn0, false);
            NUnit.Framework.Assert.IsTrue("Failed to enter into safemode in active", namesystem
                                          .IsInSafeMode());
        }
        public virtual void TestDTManagerInSafeMode()
        {
            cluster.StartDataNodes(config, 1, true, HdfsServerConstants.StartupOption.Regular
                                   , null);
            FileSystem fs = cluster.GetFileSystem();

            for (int i = 0; i < 5; i++)
            {
                DFSTestUtil.CreateFile(fs, new Path("/test-" + i), 100, (short)1, 1L);
            }
            cluster.GetConfiguration(0).SetInt(DFSConfigKeys.DfsNamenodeDelegationKeyUpdateIntervalKey
                                               , 500);
            cluster.GetConfiguration(0).SetInt(DFSConfigKeys.DfsNamenodeSafemodeExtensionKey,
                                               30000);
            cluster.SetWaitSafeMode(false);
            cluster.RestartNameNode();
            NameNode nn = cluster.GetNameNode();

            NUnit.Framework.Assert.IsTrue(nn.IsInSafeMode());
            DelegationTokenSecretManager sm = NameNodeAdapter.GetDtSecretManager(nn.GetNamesystem
                                                                                     ());

            NUnit.Framework.Assert.IsFalse("Secret manager should not run in safe mode", sm.IsRunning
                                               ());
            NameNodeAdapter.LeaveSafeMode(nn);
            NUnit.Framework.Assert.IsTrue("Secret manager should start when safe mode is exited"
                                          , sm.IsRunning());
            Log.Info("========= entering safemode again");
            NameNodeAdapter.EnterSafeMode(nn, false);
            NUnit.Framework.Assert.IsFalse("Secret manager should stop again when safe mode "
                                           + "is manually entered", sm.IsRunning());
            // Set the cluster to leave safemode quickly on its own.
            cluster.GetConfiguration(0).SetInt(DFSConfigKeys.DfsNamenodeSafemodeExtensionKey,
                                               0);
            cluster.SetWaitSafeMode(true);
            cluster.RestartNameNode();
            nn = cluster.GetNameNode();
            sm = NameNodeAdapter.GetDtSecretManager(nn.GetNamesystem());
            NUnit.Framework.Assert.IsFalse(nn.IsInSafeMode());
            NUnit.Framework.Assert.IsTrue(sm.IsRunning());
        }
        public virtual void TestWithCheckpoint()
        {
            Path path = new Path("/test");

            DoWriteAndAbort(fs, path);
            fs.Delete(new Path("/test/test"), true);
            NameNode nameNode = cluster.GetNameNode();

            NameNodeAdapter.EnterSafeMode(nameNode, false);
            NameNodeAdapter.SaveNamespace(nameNode);
            NameNodeAdapter.LeaveSafeMode(nameNode);
            cluster.RestartNameNode(true);
            // read snapshot file after restart
            string test2snapshotPath = Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot
                                       .GetSnapshotPath(path.ToString(), "s1/test/test2");

            DFSTestUtil.ReadFile(fs, new Path(test2snapshotPath));
            string test3snapshotPath = Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot
                                       .GetSnapshotPath(path.ToString(), "s1/test/test3");

            DFSTestUtil.ReadFile(fs, new Path(test3snapshotPath));
        }
Beispiel #13
0
        /*
         * Try to read the files inside snapshot but renamed to different file and
         * deleted after restarting post checkpoint. refer HDFS-5427
         */
        /// <exception cref="System.Exception"/>
        public virtual void TestReadRenamedSnapshotFileWithCheckpoint()
        {
            Path foo  = new Path("/foo");
            Path foo2 = new Path("/foo2");

            hdfs.Mkdirs(foo);
            hdfs.Mkdirs(foo2);
            hdfs.AllowSnapshot(foo);
            hdfs.AllowSnapshot(foo2);
            Path bar  = new Path(foo, "bar");
            Path bar2 = new Path(foo2, "bar");

            DFSTestUtil.CreateFile(hdfs, bar, 100, (short)2, 100024L);
            hdfs.CreateSnapshot(foo, "s1");
            // rename to another snapshottable directory and take snapshot
            NUnit.Framework.Assert.IsTrue(hdfs.Rename(bar, bar2));
            hdfs.CreateSnapshot(foo2, "s2");
            // delete the original renamed file to make sure blocks are not updated by
            // the original file
            NUnit.Framework.Assert.IsTrue(hdfs.Delete(bar2, true));
            // checkpoint
            NameNode nameNode = cluster.GetNameNode();

            NameNodeAdapter.EnterSafeMode(nameNode, false);
            NameNodeAdapter.SaveNamespace(nameNode);
            NameNodeAdapter.LeaveSafeMode(nameNode);
            // restart namenode to load snapshot files from fsimage
            cluster.RestartNameNode(true);
            // file in first snapshot
            string barSnapshotPath = Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot
                                     .GetSnapshotPath(foo.ToString(), "s1/bar");

            DFSTestUtil.ReadFile(hdfs, new Path(barSnapshotPath));
            // file in second snapshot after rename+delete
            string bar2SnapshotPath = Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot
                                      .GetSnapshotPath(foo2.ToString(), "s2/bar");

            DFSTestUtil.ReadFile(hdfs, new Path(bar2SnapshotPath));
        }
        public virtual void TestDownloadingLaterCheckpoint()
        {
            // Roll edit logs a few times to inflate txid
            nn0.GetRpcServer().RollEditLog();
            nn0.GetRpcServer().RollEditLog();
            // Make checkpoint
            NameNodeAdapter.EnterSafeMode(nn0, false);
            NameNodeAdapter.SaveNamespace(nn0);
            NameNodeAdapter.LeaveSafeMode(nn0);
            long expectedCheckpointTxId = NameNodeAdapter.GetNamesystem(nn0).GetFSImage().GetMostRecentCheckpointTxId
                                              ();

            NUnit.Framework.Assert.AreEqual(6, expectedCheckpointTxId);
            int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration
                                              (1));

            NUnit.Framework.Assert.AreEqual(0, rc);
            // Should have copied over the namespace from the active
            FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of((int)expectedCheckpointTxId
                                                                                ));
            FSImageTestUtil.AssertNNFilesMatch(cluster);
            // We should now be able to start the standby successfully.
            cluster.RestartNameNode(1);
        }
Beispiel #15
0
        /*
         * Try to read the files inside snapshot but deleted in original place after
         * restarting post checkpoint. refer HDFS-5427
         */
        /// <exception cref="System.Exception"/>
        public virtual void TestReadSnapshotFileWithCheckpoint()
        {
            Path foo = new Path("/foo");

            hdfs.Mkdirs(foo);
            hdfs.AllowSnapshot(foo);
            Path bar = new Path("/foo/bar");

            DFSTestUtil.CreateFile(hdfs, bar, 100, (short)2, 100024L);
            hdfs.CreateSnapshot(foo, "s1");
            NUnit.Framework.Assert.IsTrue(hdfs.Delete(bar, true));
            // checkpoint
            NameNode nameNode = cluster.GetNameNode();

            NameNodeAdapter.EnterSafeMode(nameNode, false);
            NameNodeAdapter.SaveNamespace(nameNode);
            NameNodeAdapter.LeaveSafeMode(nameNode);
            // restart namenode to load snapshot files from fsimage
            cluster.RestartNameNode(true);
            string snapshotPath = Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot.GetSnapshotPath
                                      (foo.ToString(), "s1/bar");

            DFSTestUtil.ReadFile(hdfs, new Path(snapshotPath));
        }
Beispiel #16
0
        public virtual void TestCheckpoint()
        {
            MiniDFSCluster    cluster   = null;
            SecondaryNameNode secondary = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).Build();
                cluster.WaitActive();
                secondary = new SecondaryNameNode(conf);
                SnapshotManager nnSnapshotManager        = cluster.GetNamesystem().GetSnapshotManager();
                SnapshotManager secondarySnapshotManager = secondary.GetFSNamesystem().GetSnapshotManager
                                                               ();
                FileSystem fs    = cluster.GetFileSystem();
                HdfsAdmin  admin = new HdfsAdmin(FileSystem.GetDefaultUri(conf), conf);
                NUnit.Framework.Assert.AreEqual(0, nnSnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(0, nnSnapshotManager.GetNumSnapshottableDirs());
                NUnit.Framework.Assert.AreEqual(0, secondarySnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(0, secondarySnapshotManager.GetNumSnapshottableDirs
                                                    ());
                // 1. Create a snapshottable directory foo on the NN.
                fs.Mkdirs(TestPath);
                admin.AllowSnapshot(TestPath);
                NUnit.Framework.Assert.AreEqual(0, nnSnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(1, nnSnapshotManager.GetNumSnapshottableDirs());
                // 2. Create a snapshot of the dir foo. This will be referenced both in
                // the SnapshotManager as well as in the file system tree. The snapshot
                // count will go up to 1.
                Path snapshotPath = fs.CreateSnapshot(TestPath);
                NUnit.Framework.Assert.AreEqual(1, nnSnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(1, nnSnapshotManager.GetNumSnapshottableDirs());
                // 3. Start up a 2NN and have it do a checkpoint. It will have foo and its
                // snapshot in its list of snapshottable dirs referenced from the
                // SnapshotManager, as well as in the file system tree.
                secondary.DoCheckpoint();
                NUnit.Framework.Assert.AreEqual(1, secondarySnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(1, secondarySnapshotManager.GetNumSnapshottableDirs
                                                    ());
                // 4. Disallow snapshots on and delete foo on the NN. The snapshot count
                // will go down to 0 and the snapshottable dir will be removed from the fs
                // tree.
                fs.DeleteSnapshot(TestPath, snapshotPath.GetName());
                admin.DisallowSnapshot(TestPath);
                NUnit.Framework.Assert.AreEqual(0, nnSnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(0, nnSnapshotManager.GetNumSnapshottableDirs());
                // 5. Have the NN do a saveNamespace, writing out a new fsimage with
                // snapshot count 0.
                NameNodeAdapter.EnterSafeMode(cluster.GetNameNode(), false);
                NameNodeAdapter.SaveNamespace(cluster.GetNameNode());
                NameNodeAdapter.LeaveSafeMode(cluster.GetNameNode());
                // 6. Have the still-running 2NN do a checkpoint. It will notice that the
                // fsimage has changed on the NN and redownload/reload from that image.
                // This will replace all INodes in the file system tree as well as reset
                // the snapshot counter to 0 in the SnapshotManager. However, it will not
                // clear the list of snapshottable dirs referenced from the
                // SnapshotManager. When it writes out an fsimage, the 2NN will write out
                // 0 for the snapshot count, but still serialize the snapshottable dir
                // referenced in the SnapshotManager even though it no longer appears in
                // the file system tree. The NN will not be able to start up with this.
                secondary.DoCheckpoint();
                NUnit.Framework.Assert.AreEqual(0, secondarySnapshotManager.GetNumSnapshots());
                NUnit.Framework.Assert.AreEqual(0, secondarySnapshotManager.GetNumSnapshottableDirs
                                                    ());
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
                if (secondary != null)
                {
                    secondary.Shutdown();
                }
            }
        }
Beispiel #17
0
        public virtual void TestNNClearsCommandsOnFailoverWithReplChanges()
        {
            // Make lots of blocks to increase chances of triggering a bug.
            DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)1, 1L);
            Banner("rolling NN1's edit log, forcing catch-up");
            HATestUtil.WaitForStandbyToCatchUp(nn1, nn2);
            // Get some new replicas reported so that NN2 now considers
            // them over-replicated and schedules some more deletions
            nn1.GetRpcServer().SetReplication(TestFile, (short)2);
            while (BlockManagerTestUtil.GetComputedDatanodeWork(nn1.GetNamesystem().GetBlockManager
                                                                    ()) > 0)
            {
                Log.Info("Getting more replication work computed");
            }
            BlockManager bm1 = nn1.GetNamesystem().GetBlockManager();

            while (bm1.GetPendingReplicationBlocksCount() > 0)
            {
                BlockManagerTestUtil.UpdateState(bm1);
                cluster.TriggerHeartbeats();
                Sharpen.Thread.Sleep(1000);
            }
            Banner("triggering BRs");
            cluster.TriggerBlockReports();
            nn1.GetRpcServer().SetReplication(TestFile, (short)1);
            Banner("computing invalidation on nn1");
            BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager(
                                                             ));
            DoMetasave(nn1);
            Banner("computing invalidation on nn2");
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            DoMetasave(nn2);
            // Dump some info for debugging purposes.
            Banner("Metadata immediately before failover");
            DoMetasave(nn2);
            // Transition nn2 to active even though nn1 still thinks it's active
            Banner("Failing to NN2 but let NN1 continue to think it's active");
            NameNodeAdapter.AbortEditLogs(nn1);
            NameNodeAdapter.EnterSafeMode(nn1, false);
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            cluster.TransitionToActive(1);
            // Check that the standby picked up the replication change.
            NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication
                                                ());
            // Dump some info for debugging purposes.
            Banner("Metadata immediately after failover");
            DoMetasave(nn2);
            Banner("Triggering heartbeats and block reports so that fencing is completed");
            cluster.TriggerHeartbeats();
            cluster.TriggerBlockReports();
            Banner("Metadata after nodes have all block-reported");
            DoMetasave(nn2);
            // Force a rescan of postponedMisreplicatedBlocks.
            BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager();

            BlockManagerTestUtil.CheckHeartbeat(nn2BM);
            BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM);
            // The block should no longer be postponed.
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks
                                                ());
            // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            HATestUtil.WaitForNNToIssueDeletions(nn2);
            cluster.TriggerHeartbeats();
            HATestUtil.WaitForDNDeletions(cluster);
            cluster.TriggerDeletionReports();
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks()
                                            );
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks
                                                ());
            Banner("Making sure the file is still readable");
            FileSystem fs2 = cluster.GetFileSystem(1);

            DFSTestUtil.ReadFile(fs2, TestFilePath);
        }
Beispiel #18
0
        public virtual void TestNNClearsCommandsOnFailoverAfterStartup()
        {
            // Make lots of blocks to increase chances of triggering a bug.
            DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)3, 1L);
            Banner("Shutting down NN2");
            cluster.ShutdownNameNode(1);
            Banner("Setting replication to 1, rolling edit log.");
            nn1.GetRpcServer().SetReplication(TestFile, (short)1);
            nn1.GetRpcServer().RollEditLog();
            // Start NN2 again. When it starts up, it will see all of the
            // blocks as over-replicated, since it has the metadata for
            // replication=1, but the DNs haven't yet processed the deletions.
            Banner("Starting NN2 again.");
            cluster.RestartNameNode(1);
            nn2 = cluster.GetNameNode(1);
            Banner("triggering BRs");
            cluster.TriggerBlockReports();
            // We expect that both NN1 and NN2 will have some number of
            // deletions queued up for the DNs.
            Banner("computing invalidation on nn1");
            BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager(
                                                             ));
            Banner("computing invalidation on nn2");
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            // Dump some info for debugging purposes.
            Banner("Metadata immediately before failover");
            DoMetasave(nn2);
            // Transition nn2 to active even though nn1 still thinks it's active
            Banner("Failing to NN2 but let NN1 continue to think it's active");
            NameNodeAdapter.AbortEditLogs(nn1);
            NameNodeAdapter.EnterSafeMode(nn1, false);
            cluster.TransitionToActive(1);
            // Check that the standby picked up the replication change.
            NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication
                                                ());
            // Dump some info for debugging purposes.
            Banner("Metadata immediately after failover");
            DoMetasave(nn2);
            Banner("Triggering heartbeats and block reports so that fencing is completed");
            cluster.TriggerHeartbeats();
            cluster.TriggerBlockReports();
            Banner("Metadata after nodes have all block-reported");
            DoMetasave(nn2);
            // Force a rescan of postponedMisreplicatedBlocks.
            BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager();

            BlockManagerTestUtil.CheckHeartbeat(nn2BM);
            BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM);
            // The block should no longer be postponed.
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks
                                                ());
            // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            HATestUtil.WaitForNNToIssueDeletions(nn2);
            cluster.TriggerHeartbeats();
            HATestUtil.WaitForDNDeletions(cluster);
            cluster.TriggerDeletionReports();
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks()
                                            );
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks
                                                ());
            Banner("Making sure the file is still readable");
            FileSystem fs2 = cluster.GetFileSystem(1);

            DFSTestUtil.ReadFile(fs2, TestFilePath);
        }
        /// <summary>
        /// The secret manager needs to start/stop - the invariant should be that
        /// the secret manager runs if and only if the NN is active and not in
        /// safe mode.
        /// </summary>
        /// <remarks>
        /// The secret manager needs to start/stop - the invariant should be that
        /// the secret manager runs if and only if the NN is active and not in
        /// safe mode. As a state diagram, we need to test all of the following
        /// transitions to make sure the secret manager is started when we transition
        /// into state 4, but none of the others.
        /// <pre>
        /// SafeMode     Not SafeMode
        /// Standby   1 <------> 2
        /// ^          ^
        /// |          |
        /// v          v
        /// Active    3 <------> 4
        /// </pre>
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestSecretManagerState()
        {
            Configuration conf = new Configuration();

            conf.SetBoolean(DFSConfigKeys.DfsNamenodeDelegationTokenAlwaysUseKey, true);
            conf.SetInt(DFSConfigKeys.DfsNamenodeDelegationKeyUpdateIntervalKey, 50);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, 1024);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(1).WaitSafeMode(false).Build();

            try
            {
                cluster.TransitionToActive(0);
                DFSTestUtil.CreateFile(cluster.GetFileSystem(0), TestFilePath, 6000, (short)1, 1L
                                       );
                cluster.GetConfiguration(0).SetInt(DFSConfigKeys.DfsNamenodeSafemodeExtensionKey,
                                                   60000);
                cluster.RestartNameNode(0);
                NameNode nn = cluster.GetNameNode(0);
                Banner("Started in state 1.");
                NUnit.Framework.Assert.IsTrue(nn.IsStandbyState());
                NUnit.Framework.Assert.IsTrue(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                Banner("Transition 1->2. Should not start secret manager");
                NameNodeAdapter.LeaveSafeMode(nn);
                NUnit.Framework.Assert.IsTrue(nn.IsStandbyState());
                NUnit.Framework.Assert.IsFalse(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                Banner("Transition 2->1. Should not start secret manager.");
                NameNodeAdapter.EnterSafeMode(nn, false);
                NUnit.Framework.Assert.IsTrue(nn.IsStandbyState());
                NUnit.Framework.Assert.IsTrue(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                Banner("Transition 1->3. Should not start secret manager.");
                nn.GetRpcServer().TransitionToActive(ReqInfo);
                NUnit.Framework.Assert.IsFalse(nn.IsStandbyState());
                NUnit.Framework.Assert.IsTrue(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                Banner("Transition 3->1. Should not start secret manager.");
                nn.GetRpcServer().TransitionToStandby(ReqInfo);
                NUnit.Framework.Assert.IsTrue(nn.IsStandbyState());
                NUnit.Framework.Assert.IsTrue(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                Banner("Transition 1->3->4. Should start secret manager.");
                nn.GetRpcServer().TransitionToActive(ReqInfo);
                NameNodeAdapter.LeaveSafeMode(nn);
                NUnit.Framework.Assert.IsFalse(nn.IsStandbyState());
                NUnit.Framework.Assert.IsFalse(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsTrue(IsDTRunning(nn));
                Banner("Transition 4->3. Should stop secret manager");
                NameNodeAdapter.EnterSafeMode(nn, false);
                NUnit.Framework.Assert.IsFalse(nn.IsStandbyState());
                NUnit.Framework.Assert.IsTrue(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                Banner("Transition 3->4. Should start secret manager");
                NameNodeAdapter.LeaveSafeMode(nn);
                NUnit.Framework.Assert.IsFalse(nn.IsStandbyState());
                NUnit.Framework.Assert.IsFalse(nn.IsInSafeMode());
                NUnit.Framework.Assert.IsTrue(IsDTRunning(nn));
                for (int i = 0; i < 20; i++)
                {
                    // Loop the last check to suss out races.
                    Banner("Transition 4->2. Should stop secret manager.");
                    nn.GetRpcServer().TransitionToStandby(ReqInfo);
                    NUnit.Framework.Assert.IsTrue(nn.IsStandbyState());
                    NUnit.Framework.Assert.IsFalse(nn.IsInSafeMode());
                    NUnit.Framework.Assert.IsFalse(IsDTRunning(nn));
                    Banner("Transition 2->4. Should start secret manager");
                    nn.GetRpcServer().TransitionToActive(ReqInfo);
                    NUnit.Framework.Assert.IsFalse(nn.IsStandbyState());
                    NUnit.Framework.Assert.IsFalse(nn.IsInSafeMode());
                    NUnit.Framework.Assert.IsTrue(IsDTRunning(nn));
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }