Exemplo n.º 1
0
 public virtual void SetUpCluster()
 {
     conf = new Configuration();
     conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointCheckPeriodKey, 1);
     conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointTxnsKey, 1);
     conf.SetInt(DFSConfigKeys.DfsNamenodeNumCheckpointsRetainedKey, 10);
     conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
     HAUtil.SetAllowStandbyReads(conf, true);
     if (clusterType == TestFailureToReadEdits.TestType.SharedDirHa)
     {
         MiniDFSNNTopology topology = MiniQJMHACluster.CreateDefaultTopology(10000);
         cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(0).CheckExitOnShutdown
                       (false).Build();
     }
     else
     {
         MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf);
         builder.GetDfsBuilder().NumDataNodes(0).CheckExitOnShutdown(false);
         miniQjmHaCluster = builder.Build();
         cluster          = miniQjmHaCluster.GetDfsCluster();
     }
     cluster.WaitActive();
     nn0 = cluster.GetNameNode(0);
     nn1 = cluster.GetNameNode(1);
     cluster.TransitionToActive(0);
     fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
 }
Exemplo n.º 2
0
        public virtual void SetupCluster()
        {
            Configuration conf = SetupCommonConfig();

            // Dial down the retention of extra edits and checkpoints. This is to
            // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
            conf.SetInt(DFSConfigKeys.DfsNamenodeNumCheckpointsRetainedKey, 1);
            conf.SetInt(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 0);
            int retryCount = 0;

            while (true)
            {
                try
                {
                    int basePort = 10060 + random.Next(100) * 2;
                    MiniDFSNNTopology topology = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf
                                                                                            ("ns1").AddNN(new MiniDFSNNTopology.NNConf("nn1").SetHttpPort(basePort)).AddNN(new
                                                                                                                                                                           MiniDFSNNTopology.NNConf("nn2").SetHttpPort(basePort + 1)));
                    cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(1).Build
                                  ();
                    cluster.WaitActive();
                    nn0 = cluster.GetNameNode(0);
                    nn1 = cluster.GetNameNode(1);
                    fs  = HATestUtil.ConfigureFailoverFs(cluster, conf);
                    cluster.TransitionToActive(0);
                    ++retryCount;
                    break;
                }
                catch (BindException)
                {
                    Log.Info("Set up MiniDFSCluster failed due to port conflicts, retry " + retryCount
                             + " times");
                }
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Regression test for HDFS-2693: when doing state transitions, we need to
        /// lock the FSNamesystem so that we don't end up doing any writes while it's
        /// "in between" states.
        /// </summary>
        /// <remarks>
        /// Regression test for HDFS-2693: when doing state transitions, we need to
        /// lock the FSNamesystem so that we don't end up doing any writes while it's
        /// "in between" states.
        /// This test case starts up several client threads which do mutation operations
        /// while flipping a NN back and forth from active to standby.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestTransitionSynchronization()
        {
            Configuration  conf    = new Configuration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(0).Build();

            try
            {
                cluster.WaitActive();
                ReentrantReadWriteLock spyLock = NameNodeAdapter.SpyOnFsLock(cluster.GetNameNode(
                                                                                 0).GetNamesystem());
                Org.Mockito.Mockito.DoAnswer(new GenericTestUtils.SleepAnswer(50)).When(spyLock).
                WriteLock();
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext();
                for (int i = 0; i < 50; i++)
                {
                    int finalI = i;
                    ctx.AddThread(new _RepeatingTestThread_256(finalI, fs, ctx));
                }
                ctx.AddThread(new _RepeatingTestThread_266(cluster, ctx));
                ctx.StartThreads();
                ctx.WaitFor(20000);
                ctx.Stop();
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Exemplo n.º 4
0
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        /// <exception cref="System.Exception"/>
        private void AssertCanStartHaNameNodes(string pathSuffix)
        {
            // Now should be able to start both NNs. Pass "false" here so that we don't
            // try to waitActive on all NNs, since the second NN doesn't exist yet.
            cluster.RestartNameNode(0, false);
            cluster.RestartNameNode(1, true);
            // Make sure HA is working.
            cluster.GetNameNode(0).GetRpcServer().TransitionToActive(new HAServiceProtocol.StateChangeRequestInfo
                                                                         (HAServiceProtocol.RequestSource.RequestByUser));
            FileSystem fs = null;

            try
            {
                Path newPath = new Path(TestPath, pathSuffix);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(newPath));
                HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1)
                                                   );
                NUnit.Framework.Assert.IsTrue(NameNodeAdapter.GetFileInfo(cluster.GetNameNode(1),
                                                                          newPath.ToString(), false).IsDir());
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
            }
        }
Exemplo n.º 5
0
        public virtual void EnsureInvalidBlockTokensAreRejected()
        {
            cluster.TransitionToActive(0);
            FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);

            DFSTestUtil.WriteFile(fs, TestPath, TestData);
            NUnit.Framework.Assert.AreEqual(TestData, DFSTestUtil.ReadFile(fs, TestPath));
            DFSClient dfsClient    = DFSClientAdapter.GetDFSClient((DistributedFileSystem)fs);
            DFSClient spyDfsClient = Org.Mockito.Mockito.Spy(dfsClient);

            Org.Mockito.Mockito.DoAnswer(new _Answer_121()).When(spyDfsClient).GetLocatedBlocks
                (Org.Mockito.Mockito.AnyString(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                .AnyLong());
            // This will make the token invalid, since the password
            // won't match anymore
            DFSClientAdapter.SetDFSClient((DistributedFileSystem)fs, spyDfsClient);
            try
            {
                NUnit.Framework.Assert.AreEqual(TestData, DFSTestUtil.ReadFile(fs, TestPath));
                NUnit.Framework.Assert.Fail("Shouldn't have been able to read a file with invalid block tokens"
                                            );
            }
            catch (IOException ioe)
            {
                GenericTestUtils.AssertExceptionContains("Could not obtain block", ioe);
            }
        }
Exemplo n.º 6
0
        public virtual void TestNfsUpgrade()
        {
            MiniDFSCluster cluster = null;
            FileSystem     fs      = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                          ()).NumDataNodes(0).Build();
                FilePath sharedDir = new FilePath(cluster.GetSharedEditsDir(0, 1));
                // No upgrade is in progress at the moment.
                CheckClusterPreviousDirExistence(cluster, false);
                AssertCTimesEqual(cluster);
                CheckPreviousDirExistence(sharedDir, false);
                // Transition NN0 to active and do some FS ops.
                cluster.TransitionToActive(0);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1")));
                // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
                // flag.
                cluster.ShutdownNameNode(1);
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade
                                                          );
                cluster.RestartNameNode(0, false);
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, false);
                CheckPreviousDirExistence(sharedDir, true);
                // NN0 should come up in the active state when given the -upgrade option,
                // so no need to transition it to active.
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2")));
                // Restart NN0 without the -upgrade flag, to make sure that works.
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Regular
                                                          );
                cluster.RestartNameNode(0, false);
                // Make sure we can still do FS ops after upgrading.
                cluster.TransitionToActive(0);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo3")));
                // Now bootstrap the standby with the upgraded info.
                int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration
                                                  (1));
                NUnit.Framework.Assert.AreEqual(0, rc);
                // Now restart NN1 and make sure that we can do ops against that as well.
                cluster.RestartNameNode(1);
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo4")));
                AssertCTimesEqual(cluster);
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Exemplo n.º 7
0
        public virtual void Setup()
        {
            conf = new Configuration();
            // Specify the quorum per-nameservice, to ensure that these configs
            // can be nameservice-scoped.
            conf.Set(ZKFailoverController.ZkQuorumKey + ".ns1", hostPort);
            conf.Set(DFSConfigKeys.DfsHaFenceMethodsKey, typeof(TestNodeFencer.AlwaysSucceedFencer
                                                                ).FullName);
            conf.SetBoolean(DFSConfigKeys.DfsHaAutoFailoverEnabledKey, true);
            // Turn off IPC client caching, so that the suite can handle
            // the restart of the daemons between test cases.
            conf.SetInt(CommonConfigurationKeysPublic.IpcClientConnectionMaxidletimeKey, 0);
            conf.SetInt(DFSConfigKeys.DfsHaZkfcPortKey + ".ns1.nn1", 10023);
            conf.SetInt(DFSConfigKeys.DfsHaZkfcPortKey + ".ns1.nn2", 10024);
            MiniDFSNNTopology topology = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf
                                                                                    ("ns1").AddNN(new MiniDFSNNTopology.NNConf("nn1").SetIpcPort(10021)).AddNN(new MiniDFSNNTopology.NNConf
                                                                                                                                                                   ("nn2").SetIpcPort(10022)));

            cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(0).Build
                          ();
            cluster.WaitActive();
            ctx = new MultithreadedTestUtil.TestContext();
            ctx.AddThread(thr1 = new TestDFSZKFailoverController.ZKFCThread(this, ctx, 0));
            NUnit.Framework.Assert.AreEqual(0, thr1.zkfc.Run(new string[] { "-formatZK" }));
            thr1.Start();
            WaitForHAState(0, HAServiceProtocol.HAServiceState.Active);
            ctx.AddThread(thr2 = new TestDFSZKFailoverController.ZKFCThread(this, ctx, 1));
            thr2.Start();
            // Wait for the ZKFCs to fully start up
            ZKFCTestUtil.WaitForHealthState(thr1.zkfc, HealthMonitor.State.ServiceHealthy, ctx
                                            );
            ZKFCTestUtil.WaitForHealthState(thr2.zkfc, HealthMonitor.State.ServiceHealthy, ctx
                                            );
            fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
        }
Exemplo n.º 8
0
        /// <summary>Test manual failover failback for one namespace</summary>
        /// <param name="cluster">single process test cluster</param>
        /// <param name="conf">cluster configuration</param>
        /// <param name="nsIndex">namespace index starting from zero</param>
        /// <exception cref="System.Exception"/>
        private void TestManualFailoverFailback(MiniDFSCluster cluster, Configuration conf
                                                , int nsIndex)
        {
            int nn0 = 2 * nsIndex;
            int nn1 = 2 * nsIndex + 1;

            cluster.TransitionToActive(nn0);
            Log.Info("Starting with NN 0 active in namespace " + nsIndex);
            FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);

            fs.Mkdirs(TestDir);
            Log.Info("Failing over to NN 1 in namespace " + nsIndex);
            cluster.TransitionToStandby(nn0);
            cluster.TransitionToActive(nn1);
            NUnit.Framework.Assert.IsTrue(fs.Exists(TestDir));
            DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData);
            Log.Info("Failing over to NN 0 in namespace " + nsIndex);
            cluster.TransitionToStandby(nn1);
            cluster.TransitionToActive(nn0);
            NUnit.Framework.Assert.IsTrue(fs.Exists(TestDir));
            NUnit.Framework.Assert.AreEqual(TestFileData, DFSTestUtil.ReadFile(fs, TestFilePath
                                                                               ));
            Log.Info("Removing test file");
            fs.Delete(TestDir, true);
            NUnit.Framework.Assert.IsFalse(fs.Exists(TestDir));
            Log.Info("Failing over to NN 1 in namespace " + nsIndex);
            cluster.TransitionToStandby(nn0);
            cluster.TransitionToActive(nn1);
            NUnit.Framework.Assert.IsFalse(fs.Exists(TestDir));
        }
Exemplo n.º 9
0
        public virtual void TestFinalizeWithJournalNodes()
        {
            MiniQJMHACluster qjCluster = null;
            FileSystem       fs        = null;

            try
            {
                MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf);
                builder.GetDfsBuilder().NumDataNodes(0);
                qjCluster = builder.Build();
                MiniDFSCluster cluster = qjCluster.GetDfsCluster();
                // No upgrade is in progress at the moment.
                CheckJnPreviousDirExistence(qjCluster, false);
                CheckClusterPreviousDirExistence(cluster, false);
                AssertCTimesEqual(cluster);
                // Transition NN0 to active and do some FS ops.
                cluster.TransitionToActive(0);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1")));
                long cidBeforeUpgrade = GetCommittedTxnIdValue(qjCluster);
                // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
                // flag.
                cluster.ShutdownNameNode(1);
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade
                                                          );
                cluster.RestartNameNode(0, false);
                NUnit.Framework.Assert.IsTrue(cidBeforeUpgrade <= GetCommittedTxnIdValue(qjCluster
                                                                                         ));
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2")));
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, false);
                CheckJnPreviousDirExistence(qjCluster, true);
                // Now bootstrap the standby with the upgraded info.
                int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration
                                                  (1));
                NUnit.Framework.Assert.AreEqual(0, rc);
                cluster.RestartNameNode(1);
                long cidDuringUpgrade = GetCommittedTxnIdValue(qjCluster);
                NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidBeforeUpgrade);
                RunFinalizeCommand(cluster);
                NUnit.Framework.Assert.AreEqual(cidDuringUpgrade, GetCommittedTxnIdValue(qjCluster
                                                                                         ));
                CheckClusterPreviousDirExistence(cluster, false);
                CheckJnPreviousDirExistence(qjCluster, false);
                AssertCTimesEqual(cluster);
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (qjCluster != null)
                {
                    qjCluster.Shutdown();
                }
            }
        }
Exemplo n.º 10
0
        /// <exception cref="System.Exception"/>
        private void DoWriteOverFailoverTest(TestPipelinesFailover.TestScenario scenario,
                                             TestPipelinesFailover.MethodToTestIdempotence methodToTest)
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            // Don't check replication periodically.
            conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, 1000);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                int sizeWritten = 0;
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a block and a half
                AppendTestUtil.Write(stm, 0, BlockAndAHalf);
                sizeWritten += BlockAndAHalf;
                // Make sure all of the blocks are written out before failover.
                stm.Hflush();
                Log.Info("Failing over to NN 1");
                scenario.Run(cluster);
                // NOTE: explicitly do *not* make any further metadata calls
                // to the NN here. The next IPC call should be to allocate the next
                // block. Any other call would notice the failover and not test
                // idempotence of the operation (HDFS-3031)
                FSNamesystem ns1 = cluster.GetNameNode(1).GetNamesystem();
                BlockManagerTestUtil.UpdateState(ns1.GetBlockManager());
                NUnit.Framework.Assert.AreEqual(0, ns1.GetPendingReplicationBlocks());
                NUnit.Framework.Assert.AreEqual(0, ns1.GetCorruptReplicaBlocks());
                NUnit.Framework.Assert.AreEqual(0, ns1.GetMissingBlocksCount());
                // If we're testing allocateBlock()'s idempotence, write another
                // block and a half, so we have to allocate a new block.
                // Otherise, don't write anything, so our next RPC will be
                // completeFile() if we're testing idempotence of that operation.
                if (methodToTest == TestPipelinesFailover.MethodToTestIdempotence.AllocateBlock)
                {
                    // write another block and a half
                    AppendTestUtil.Write(stm, sizeWritten, BlockAndAHalf);
                    sizeWritten += BlockAndAHalf;
                }
                stm.Close();
                stm = null;
                AppendTestUtil.Check(fs, TestPath, sizeWritten);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Exemplo n.º 11
0
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        private void WriteUsingBothNameNodes()
        {
            cluster.TransitionToActive(0);
            FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);

            DFSTestUtil.WriteFile(fs, TestPath, TestData);
            cluster.TransitionToStandby(0);
            cluster.TransitionToActive(1);
            fs.Delete(TestPath, false);
            DFSTestUtil.WriteFile(fs, TestPath, TestData);
        }
Exemplo n.º 12
0
        /// <exception cref="System.Exception"/>
        public virtual void TestStandbyIsHot()
        {
            Configuration conf = new Configuration();

            // We read from the standby to watch block locations
            HAUtil.SetAllowStandbyReads(conf, true);
            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                NameNode   nn1 = cluster.GetNameNode(0);
                NameNode   nn2 = cluster.GetNameNode(1);
                FileSystem fs  = HATestUtil.ConfigureFailoverFs(cluster, conf);
                Sharpen.Thread.Sleep(1000);
                System.Console.Error.WriteLine("==================================");
                DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData);
                // Have to force an edit log roll so that the standby catches up
                nn1.GetRpcServer().RollEditLog();
                System.Console.Error.WriteLine("==================================");
                // Block locations should show up on standby.
                Log.Info("Waiting for block locations to appear on standby node");
                WaitForBlockLocations(cluster, nn2, TestFile, 3);
                // Trigger immediate heartbeats and block reports so
                // that the active "trusts" all of the DNs
                cluster.TriggerHeartbeats();
                cluster.TriggerBlockReports();
                // Change replication
                Log.Info("Changing replication to 1");
                fs.SetReplication(TestFilePath, (short)1);
                BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager());
                WaitForBlockLocations(cluster, nn1, TestFile, 1);
                nn1.GetRpcServer().RollEditLog();
                Log.Info("Waiting for lowered replication to show up on standby");
                WaitForBlockLocations(cluster, nn2, TestFile, 1);
                // Change back to 3
                Log.Info("Changing replication to 3");
                fs.SetReplication(TestFilePath, (short)3);
                BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager());
                nn1.GetRpcServer().RollEditLog();
                Log.Info("Waiting for higher replication to show up on standby");
                WaitForBlockLocations(cluster, nn2, TestFile, 3);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Exemplo n.º 13
0
        public virtual void SetupCluster()
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            HAUtil.SetAllowStandbyReads(conf, true);
            cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                      ()).NumDataNodes(1).WaitSafeMode(false).Build();
            cluster.WaitActive();
            nn0 = cluster.GetNameNode(0);
            nn1 = cluster.GetNameNode(1);
            fs  = HATestUtil.ConfigureFailoverFs(cluster, conf);
            cluster.TransitionToActive(0);
        }
Exemplo n.º 14
0
        /// <exception cref="System.Exception"/>
        private void DoTestWriteOverFailoverWithDnFail(TestPipelinesFailover.TestScenario
                                                       scenario)
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(5).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a block and a half
                AppendTestUtil.Write(stm, 0, BlockAndAHalf);
                // Make sure all the blocks are written before failover
                stm.Hflush();
                Log.Info("Failing over to NN 1");
                scenario.Run(cluster);
                NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath));
                cluster.StopDataNode(0);
                // write another block and a half
                AppendTestUtil.Write(stm, BlockAndAHalf, BlockAndAHalf);
                stm.Hflush();
                Log.Info("Failing back to NN 0");
                cluster.TransitionToStandby(1);
                cluster.TransitionToActive(0);
                cluster.StopDataNode(1);
                AppendTestUtil.Write(stm, BlockAndAHalf * 2, BlockAndAHalf);
                stm.Hflush();
                stm.Close();
                stm = null;
                AppendTestUtil.Check(fs, TestPath, BlockAndAHalf * 3);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Exemplo n.º 15
0
        /// <summary>Test for HDFS-2812.</summary>
        /// <remarks>
        /// Test for HDFS-2812. Since lease renewals go from the client
        /// only to the active NN, the SBN will have out-of-date lease
        /// info when it becomes active. We need to make sure we don't
        /// accidentally mark the leases as expired when the failover
        /// proceeds.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestLeasesRenewedOnTransition()
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(1).Build();
            FSDataOutputStream stm = null;
            FileSystem         fs  = HATestUtil.ConfigureFailoverFs(cluster, conf);
            NameNode           nn0 = cluster.GetNameNode(0);
            NameNode           nn1 = cluster.GetNameNode(1);

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Log.Info("Starting with NN 0 active");
                stm = fs.Create(TestFilePath);
                long nn0t0 = NameNodeAdapter.GetLeaseRenewalTime(nn0, TestFileStr);
                NUnit.Framework.Assert.IsTrue(nn0t0 > 0);
                long nn1t0 = NameNodeAdapter.GetLeaseRenewalTime(nn1, TestFileStr);
                NUnit.Framework.Assert.AreEqual("Lease should not yet exist on nn1", -1, nn1t0);
                Sharpen.Thread.Sleep(5);
                // make sure time advances!
                HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
                long nn1t1 = NameNodeAdapter.GetLeaseRenewalTime(nn1, TestFileStr);
                NUnit.Framework.Assert.IsTrue("Lease should have been created on standby. Time was: "
                                              + nn1t1, nn1t1 > nn0t0);
                Sharpen.Thread.Sleep(5);
                // make sure time advances!
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                long nn1t2 = NameNodeAdapter.GetLeaseRenewalTime(nn1, TestFileStr);
                NUnit.Framework.Assert.IsTrue("Lease should have been renewed by failover process"
                                              , nn1t2 > nn1t1);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
        public virtual void SetupCluster()
        {
            SecurityUtilTestHelper.SetTokenServiceUseIp(true);
            conf.SetBoolean(DFSConfigKeys.DfsNamenodeDelegationTokenAlwaysUseKey, true);
            conf.Set(CommonConfigurationKeysPublic.HadoopSecurityAuthToLocal, "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//"
                     + "DEFAULT");
            cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                      ()).NumDataNodes(0).Build();
            cluster.WaitActive();
            string logicalName = HATestUtil.GetLogicalHostname(cluster);

            HATestUtil.SetFailoverConfigurations(cluster, conf, logicalName, 0);
            nn0 = cluster.GetNameNode(0);
            nn1 = cluster.GetNameNode(1);
            fs  = HATestUtil.ConfigureFailoverFs(cluster, conf);
            dfs = (DistributedFileSystem)fs;
            cluster.TransitionToActive(0);
            dtSecretManager = NameNodeAdapter.GetDtSecretManager(nn0.GetNamesystem());
        }
Exemplo n.º 17
0
        public virtual void Setup()
        {
            Configuration conf = new Configuration();

            // Turn off IPC client caching, so that the suite can handle
            // the restart of the daemons between test cases.
            conf.SetInt(CommonConfigurationKeysPublic.IpcClientConnectionMaxidletimeKey, 0);
            MiniQJMHACluster miniQjmHaCluster = new MiniQJMHACluster.Builder(conf).Build();

            cluster  = miniQjmHaCluster.GetDfsCluster();
            jCluster = miniQjmHaCluster.GetJournalCluster();
            // make nn0 active
            cluster.TransitionToActive(0);
            // do sth to generate in-progress edit log data
            DistributedFileSystem dfs = (DistributedFileSystem)HATestUtil.ConfigureFailoverFs
                                            (cluster, conf);

            dfs.Mkdirs(new Path("/test2"));
            dfs.Close();
        }
Exemplo n.º 18
0
        /// <summary>Tests lease recovery if a client crashes.</summary>
        /// <remarks>
        /// Tests lease recovery if a client crashes. This approximates the
        /// use case of HBase WALs being recovered after a NN failover.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestLeaseRecoveryAfterFailover()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a block and a half
                AppendTestUtil.Write(stm, 0, BlockAndAHalf);
                stm.Hflush();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath));
                FileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockAndAHalf);
                // Fail back to ensure that the block locations weren't lost on the
                // original node.
                cluster.TransitionToStandby(1);
                cluster.TransitionToActive(0);
                AppendTestUtil.Check(fs, TestPath, BlockAndAHalf);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Exemplo n.º 19
0
        public virtual void TestHarUriWithHaUriWithNoPort()
        {
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).NnTopology(MiniDFSNNTopology
                                                                                      .SimpleHATopology()).Build();
                cluster.TransitionToActive(0);
                HATestUtil.SetFailoverConfigurations(cluster, conf);
                CreateEmptyHarArchive(HATestUtil.ConfigureFailoverFs(cluster, conf), TestHarPath);
                URI  failoverUri = FileSystem.GetDefaultUri(conf);
                Path p           = new Path("har://hdfs-" + failoverUri.GetAuthority() + TestHarPath);
                p.GetFileSystem(conf);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidateBlock()
        {
            Configuration conf = new Configuration();

            HAUtil.SetAllowStandbyReads(conf, true);
            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                NameNode   nn1 = cluster.GetNameNode(0);
                NameNode   nn2 = cluster.GetNameNode(1);
                FileSystem fs  = HATestUtil.ConfigureFailoverFs(cluster, conf);
                Sharpen.Thread.Sleep(1000);
                Log.Info("==================================");
                DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData);
                // Have to force an edit log roll so that the standby catches up
                nn1.GetRpcServer().RollEditLog();
                Log.Info("==================================");
                // delete the file
                fs.Delete(TestFilePath, false);
                BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager());
                nn1.GetRpcServer().RollEditLog();
                // standby nn doesn't need to invalidate blocks.
                NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetBlockManager().GetPendingDeletionBlocksCount
                                                    ());
                cluster.TriggerHeartbeats();
                cluster.TriggerBlockReports();
                // standby nn doesn't need to invalidate blocks.
                NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetBlockManager().GetPendingDeletionBlocksCount
                                                    ());
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Exemplo n.º 21
0
        public virtual void TestHaFsck()
        {
            Configuration conf = new Configuration();
            // need some HTTP ports
            MiniDFSNNTopology topology = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf
                                                                                    ("ha-nn-uri-0").AddNN(new MiniDFSNNTopology.NNConf("nn1").SetHttpPort(10051)).AddNN
                                                                                    (new MiniDFSNNTopology.NNConf("nn2").SetHttpPort(10052)));
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes
                                         (0).Build();
            FileSystem fs = null;

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                // Make sure conf has the relevant HA configs.
                HATestUtil.SetFailoverConfigurations(cluster, conf, "ha-nn-uri-0", 0);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                fs.Mkdirs(new Path("/test1"));
                fs.Mkdirs(new Path("/test2"));
                RunFsck(conf);
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                RunFsck(conf);
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Exemplo n.º 22
0
 public virtual void SetupCluster()
 {
     conf = new Configuration();
     conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, SmallBlock);
     // Bump up replication interval so that we only run replication
     // checks explicitly.
     conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, 600);
     // Increase max streams so that we re-replicate quickly.
     conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationMaxStreamsKey, 1000);
     // See RandomDeleterPolicy javadoc.
     conf.SetClass(DFSConfigKeys.DfsBlockReplicatorClassnameKey, typeof(TestDNFencing.RandomDeleterPolicy
                                                                        ), typeof(BlockPlacementPolicy));
     conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                               ()).NumDataNodes(3).Build();
     nn1 = cluster.GetNameNode(0);
     nn2 = cluster.GetNameNode(1);
     cluster.WaitActive();
     cluster.TransitionToActive(0);
     // Trigger block reports so that the first NN trusts all
     // of the DNs, and will issue deletions
     cluster.TriggerBlockReports();
     fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
 }
Exemplo n.º 23
0
        /// <exception cref="System.Exception"/>
        private static void TestFailoverAfterCrashDuringLogRoll(bool writeHeader)
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, int.MaxValue);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(0).Build();
            FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);

            try
            {
                cluster.TransitionToActive(0);
                NameNode nn0 = cluster.GetNameNode(0);
                nn0.GetRpcServer().RollEditLog();
                cluster.ShutdownNameNode(0);
                CreateEmptyInProgressEditLog(cluster, nn0, writeHeader);
                cluster.TransitionToActive(1);
            }
            finally
            {
                IOUtils.Cleanup(Log, fs);
                cluster.Shutdown();
            }
        }
Exemplo n.º 24
0
        public virtual void TestChangedStorageId()
        {
            HdfsConfiguration conf = new HdfsConfiguration();

            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).NnTopology
                                         (MiniDFSNNTopology.SimpleHATopology()).Build();

            try
            {
                cluster.TransitionToActive(0);
                FileSystem   fs   = HATestUtil.ConfigureFailoverFs(cluster, conf);
                OutputStream @out = fs.Create(filePath);
                @out.Write(Sharpen.Runtime.GetBytesForString("foo bar baz"));
                @out.Close();
                HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1)
                                                   );
                // Change the gen stamp of the block on datanode to go back in time (gen
                // stamps start at 1000)
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, filePath);
                NUnit.Framework.Assert.IsTrue(cluster.ChangeGenStampOfBlock(0, block, 900));
                // Stop the DN so the replica with the changed gen stamp will be reported
                // when this DN starts up.
                MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0);
                // Restart the namenode so that when the DN comes up it will see an initial
                // block report.
                cluster.RestartNameNode(1, false);
                NUnit.Framework.Assert.IsTrue(cluster.RestartDataNode(dnProps, true));
                // Wait until the standby NN queues up the corrupt block in the pending DN
                // message queue.
                while (cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount(
                           ) < 1)
                {
                    ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
                }
                NUnit.Framework.Assert.AreEqual(1, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount
                                                    ());
                string oldStorageId = GetRegisteredDatanodeUid(cluster, 1);
                // Reformat/restart the DN.
                NUnit.Framework.Assert.IsTrue(WipeAndRestartDn(cluster, 0));
                // Give the DN time to start up and register, which will cause the
                // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
                string newStorageId = string.Empty;
                do
                {
                    ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
                    newStorageId = GetRegisteredDatanodeUid(cluster, 1);
                    System.Console.Out.WriteLine("====> oldStorageId: " + oldStorageId + " newStorageId: "
                                                 + newStorageId);
                }while (newStorageId.Equals(oldStorageId));
                NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount
                                                    ());
                // Now try to fail over.
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Exemplo n.º 25
0
 /// <summary>
 /// Return a filesystem with client-failover configured for the
 /// cluster.
 /// </summary>
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="Sharpen.URISyntaxException"/>
 public virtual FileSystem GetFailoverFs()
 {
     return(HATestUtil.ConfigureFailoverFs(cluster, conf));
 }
Exemplo n.º 26
0
        public virtual void TestRollbackWithNfs()
        {
            MiniDFSCluster cluster = null;
            FileSystem     fs      = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                          ()).NumDataNodes(0).Build();
                FilePath sharedDir = new FilePath(cluster.GetSharedEditsDir(0, 1));
                // No upgrade is in progress at the moment.
                CheckClusterPreviousDirExistence(cluster, false);
                AssertCTimesEqual(cluster);
                CheckPreviousDirExistence(sharedDir, false);
                // Transition NN0 to active and do some FS ops.
                cluster.TransitionToActive(0);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1")));
                // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
                // flag.
                cluster.ShutdownNameNode(1);
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade
                                                          );
                cluster.RestartNameNode(0, false);
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, false);
                CheckPreviousDirExistence(sharedDir, true);
                // NN0 should come up in the active state when given the -upgrade option,
                // so no need to transition it to active.
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2")));
                // Now bootstrap the standby with the upgraded info.
                int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration
                                                  (1));
                NUnit.Framework.Assert.AreEqual(0, rc);
                cluster.RestartNameNode(1);
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, true);
                CheckPreviousDirExistence(sharedDir, true);
                AssertCTimesEqual(cluster);
                // Now shut down the cluster and do the rollback.
                ICollection <URI> nn1NameDirs = cluster.GetNameDirs(0);
                cluster.Shutdown();
                conf.SetStrings(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(nn1NameDirs
                                                                                         ));
                NameNode.DoRollback(conf, false);
                // The rollback operation should have rolled back the first NN's local
                // dirs, and the shared dir, but not the other NN's dirs. Those have to be
                // done by bootstrapping the standby.
                CheckNnPreviousDirExistence(cluster, 0, false);
                CheckPreviousDirExistence(sharedDir, false);
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Exemplo n.º 27
0
        public virtual void TestRollbackWithJournalNodes()
        {
            MiniQJMHACluster qjCluster = null;
            FileSystem       fs        = null;

            try
            {
                MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf);
                builder.GetDfsBuilder().NumDataNodes(0);
                qjCluster = builder.Build();
                MiniDFSCluster cluster = qjCluster.GetDfsCluster();
                // No upgrade is in progress at the moment.
                CheckClusterPreviousDirExistence(cluster, false);
                AssertCTimesEqual(cluster);
                CheckJnPreviousDirExistence(qjCluster, false);
                // Transition NN0 to active and do some FS ops.
                cluster.TransitionToActive(0);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1")));
                long cidBeforeUpgrade = GetCommittedTxnIdValue(qjCluster);
                // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
                // flag.
                cluster.ShutdownNameNode(1);
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade
                                                          );
                cluster.RestartNameNode(0, false);
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, false);
                CheckJnPreviousDirExistence(qjCluster, true);
                // NN0 should come up in the active state when given the -upgrade option,
                // so no need to transition it to active.
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2")));
                long cidDuringUpgrade = GetCommittedTxnIdValue(qjCluster);
                NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidBeforeUpgrade);
                // Now bootstrap the standby with the upgraded info.
                int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration
                                                  (1));
                NUnit.Framework.Assert.AreEqual(0, rc);
                cluster.RestartNameNode(1);
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, true);
                CheckJnPreviousDirExistence(qjCluster, true);
                AssertCTimesEqual(cluster);
                // Shut down the NNs, but deliberately leave the JNs up and running.
                ICollection <URI> nn1NameDirs = cluster.GetNameDirs(0);
                cluster.Shutdown();
                conf.SetStrings(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(nn1NameDirs
                                                                                         ));
                NameNode.DoRollback(conf, false);
                long cidAfterRollback = GetCommittedTxnIdValue(qjCluster);
                NUnit.Framework.Assert.IsTrue(cidBeforeUpgrade < cidAfterRollback);
                // make sure the committedTxnId has been reset correctly after rollback
                NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidAfterRollback);
                // The rollback operation should have rolled back the first NN's local
                // dirs, and the shared dir, but not the other NN's dirs. Those have to be
                // done by bootstrapping the standby.
                CheckNnPreviousDirExistence(cluster, 0, false);
                CheckJnPreviousDirExistence(qjCluster, false);
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (qjCluster != null)
                {
                    qjCluster.Shutdown();
                }
            }
        }
Exemplo n.º 28
0
        public virtual void TestCannotUpgradeSecondNameNode()
        {
            MiniDFSCluster cluster = null;
            FileSystem     fs      = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                          ()).NumDataNodes(0).Build();
                FilePath sharedDir = new FilePath(cluster.GetSharedEditsDir(0, 1));
                // No upgrade is in progress at the moment.
                CheckClusterPreviousDirExistence(cluster, false);
                AssertCTimesEqual(cluster);
                CheckPreviousDirExistence(sharedDir, false);
                // Transition NN0 to active and do some FS ops.
                cluster.TransitionToActive(0);
                fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1")));
                // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
                // flag.
                cluster.ShutdownNameNode(1);
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade
                                                          );
                cluster.RestartNameNode(0, false);
                CheckNnPreviousDirExistence(cluster, 0, true);
                CheckNnPreviousDirExistence(cluster, 1, false);
                CheckPreviousDirExistence(sharedDir, true);
                // NN0 should come up in the active state when given the -upgrade option,
                // so no need to transition it to active.
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2")));
                // Restart NN0 without the -upgrade flag, to make sure that works.
                cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Regular
                                                          );
                cluster.RestartNameNode(0, false);
                // Make sure we can still do FS ops after upgrading.
                cluster.TransitionToActive(0);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo3")));
                // Make sure that starting the second NN with the -upgrade flag fails.
                cluster.GetNameNodeInfos()[1].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade
                                                          );
                try
                {
                    cluster.RestartNameNode(1, false);
                    NUnit.Framework.Assert.Fail("Should not have been able to start second NN with -upgrade"
                                                );
                }
                catch (IOException ioe)
                {
                    GenericTestUtils.AssertExceptionContains("It looks like the shared log is already being upgraded"
                                                             , ioe);
                }
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Exemplo n.º 29
0
 /// <exception cref="System.Exception"/>
 public FileSystem Run()
 {
     return(HATestUtil.ConfigureFailoverFs(cluster, conf));
 }
Exemplo n.º 30
0
        /// <summary>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed.
        /// </summary>
        /// <remarks>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed. The
        /// DN running the recovery should then fail to commit the synchronization
        /// and a later retry will succeed.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestFailoverRightBeforeCommitSynchronization()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a half block
                AppendTestUtil.Write(stm, 0, BlockSize / 2);
                stm.Hflush();
                // Look into the block manager on the active node for the block
                // under construction.
                NameNode           nn0             = cluster.GetNameNode(0);
                ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, TestPath);
                DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk);
                Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                // Find the corresponding DN daemon, and spy on its connection to the
                // active.
                DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN
                                                                                               , nn0);
                // Delay the commitBlockSynchronization call
                GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                             .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                             .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                 (), (string[])Org.Mockito.Mockito.AnyObject());
                // new genstamp
                // new length
                // close file
                // delete block
                // new targets
                // new target storages
                DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath));
                Log.Info("Waiting for commitBlockSynchronization call from primary");
                delayer.WaitForCall();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                // Let the commitBlockSynchronization call go through, and check that
                // it failed with the correct exception.
                delayer.Proceed();
                delayer.WaitForResult();
                Exception t = delayer.GetThrown();
                if (t == null)
                {
                    NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby"
                                                );
                }
                GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported"
                                                         , t);
                // Now, if we try again to recover the block, it should succeed on the new
                // active.
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockSize / 2);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }