Exemplo n.º 1
0
        /// <summary>
        /// Make sure that clients will receive StandbyExceptions even when a
        /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer
        /// thread will have FSNS lock.
        /// </summary>
        /// <remarks>
        /// Make sure that clients will receive StandbyExceptions even when a
        /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer
        /// thread will have FSNS lock. Regression test for HDFS-4591.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestStandbyExceptionThrownDuringCheckpoint()
        {
            // Set it up so that we know when the SBN checkpoint starts and ends.
            FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1);

            GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log);
            Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito
                                                                                 .Any <FSNamesystem>(), Org.Mockito.Mockito.Eq(NNStorage.NameNodeFile.Image), Org.Mockito.Mockito
                                                                                 .Any <Canceler>());
            // Perform some edits and wait for a checkpoint to start on the SBN.
            DoEdits(0, 1000);
            nn0.GetRpcServer().RollEditLog();
            answerer.WaitForCall();
            NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be."
                                          , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0);
            // Make sure that the lock has actually been taken by the checkpointing
            // thread.
            ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
            try
            {
                // Perform an RPC to the SBN and make sure it throws a StandbyException.
                nn1.GetRpcServer().GetFileInfo("/");
                NUnit.Framework.Assert.Fail("Should have thrown StandbyException, but instead succeeded."
                                            );
            }
            catch (StandbyException se)
            {
                GenericTestUtils.AssertExceptionContains("is not supported", se);
            }
            // Make sure new incremental block reports are processed during
            // checkpointing on the SBN.
            NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetPendingDataNodeMessageCount
                                                ());
            DoCreate();
            Sharpen.Thread.Sleep(1000);
            NUnit.Framework.Assert.IsTrue(cluster.GetNamesystem(1).GetPendingDataNodeMessageCount
                                              () > 0);
            // Make sure that the checkpoint is still going on, implying that the client
            // RPC to the SBN happened during the checkpoint.
            NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer
                                          .GetFireCount() == 1 && answerer.GetResultCount() == 0);
            answerer.Proceed();
            answerer.WaitForResult();
            NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer
                                          .GetFireCount() == 1 && answerer.GetResultCount() == 1);
        }
Exemplo n.º 2
0
        /// <exception cref="System.Exception"/>
        public virtual void TestReadsAllowedDuringCheckpoint()
        {
            // Set it up so that we know when the SBN checkpoint starts and ends.
            FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1);

            GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log);
            Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito
                                                                                 .Any <FSNamesystem>(), Org.Mockito.Mockito.Any <NNStorage.NameNodeFile>(), Org.Mockito.Mockito
                                                                                 .Any <Canceler>());
            // Perform some edits and wait for a checkpoint to start on the SBN.
            DoEdits(0, 1000);
            nn0.GetRpcServer().RollEditLog();
            answerer.WaitForCall();
            NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be."
                                          , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0);
            // Make sure that the lock has actually been taken by the checkpointing
            // thread.
            ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
            // Perform an RPC that needs to take the write lock.
            Sharpen.Thread t = new _Thread_404(this);
            t.Start();
            // Make sure that our thread is waiting for the lock.
            ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
            NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().HasQueuedThreads
                                               ());
            NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().IsWriteLocked
                                               ());
            NUnit.Framework.Assert.IsTrue(nn1.GetNamesystem().GetCpLockForTests().HasQueuedThreads
                                              ());
            // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to
            // be taken.
            string pageContents = DFSTestUtil.UrlGet(new Uri("http://" + nn1.GetHttpAddress()
                                                             .GetHostName() + ":" + nn1.GetHttpAddress().Port + "/jmx"));

            NUnit.Framework.Assert.IsTrue(pageContents.Contains("NumLiveDataNodes"));
            // Make sure that the checkpoint is still going on, implying that the client
            // RPC to the SBN happened during the checkpoint.
            NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer
                                          .GetFireCount() == 1 && answerer.GetResultCount() == 0);
            answerer.Proceed();
            answerer.WaitForResult();
            NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer
                                          .GetFireCount() == 1 && answerer.GetResultCount() == 1);
            t.Join();
        }
Exemplo n.º 3
0
        public virtual void TestModTimePersistsAfterRestart()
        {
            long sleepTime = 10;
            // 10 milliseconds
            MiniDFSCluster cluster = null;
            FileSystem     fs      = null;
            Configuration  conf    = new HdfsConfiguration();

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).Build();
                fs      = cluster.GetFileSystem();
                Path testPath = new Path("/test");
                // Open a file, and get its initial modification time.
                OutputStream @out           = fs.Create(testPath);
                long         initialModTime = fs.GetFileStatus(testPath).GetModificationTime();
                NUnit.Framework.Assert.IsTrue(initialModTime > 0);
                // Wait and then close the file. Ensure that the mod time goes up.
                ThreadUtil.SleepAtLeastIgnoreInterrupts(sleepTime);
                @out.Close();
                long modTimeAfterClose = fs.GetFileStatus(testPath).GetModificationTime();
                NUnit.Framework.Assert.IsTrue(modTimeAfterClose >= initialModTime + sleepTime);
                // Restart the NN, and make sure that the later mod time is still used.
                cluster.RestartNameNode();
                long modTimeAfterRestart = fs.GetFileStatus(testPath).GetModificationTime();
                NUnit.Framework.Assert.AreEqual(modTimeAfterClose, modTimeAfterRestart);
            }
            finally
            {
                if (fs != null)
                {
                    fs.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Exemplo n.º 4
0
        public virtual void TestChangedStorageId()
        {
            HdfsConfiguration conf = new HdfsConfiguration();

            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).NnTopology
                                         (MiniDFSNNTopology.SimpleHATopology()).Build();

            try
            {
                cluster.TransitionToActive(0);
                FileSystem   fs   = HATestUtil.ConfigureFailoverFs(cluster, conf);
                OutputStream @out = fs.Create(filePath);
                @out.Write(Sharpen.Runtime.GetBytesForString("foo bar baz"));
                @out.Close();
                HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1)
                                                   );
                // Change the gen stamp of the block on datanode to go back in time (gen
                // stamps start at 1000)
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, filePath);
                NUnit.Framework.Assert.IsTrue(cluster.ChangeGenStampOfBlock(0, block, 900));
                // Stop the DN so the replica with the changed gen stamp will be reported
                // when this DN starts up.
                MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0);
                // Restart the namenode so that when the DN comes up it will see an initial
                // block report.
                cluster.RestartNameNode(1, false);
                NUnit.Framework.Assert.IsTrue(cluster.RestartDataNode(dnProps, true));
                // Wait until the standby NN queues up the corrupt block in the pending DN
                // message queue.
                while (cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount(
                           ) < 1)
                {
                    ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
                }
                NUnit.Framework.Assert.AreEqual(1, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount
                                                    ());
                string oldStorageId = GetRegisteredDatanodeUid(cluster, 1);
                // Reformat/restart the DN.
                NUnit.Framework.Assert.IsTrue(WipeAndRestartDn(cluster, 0));
                // Give the DN time to start up and register, which will cause the
                // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
                string newStorageId = string.Empty;
                do
                {
                    ThreadUtil.SleepAtLeastIgnoreInterrupts(1000);
                    newStorageId = GetRegisteredDatanodeUid(cluster, 1);
                    System.Console.Out.WriteLine("====> oldStorageId: " + oldStorageId + " newStorageId: "
                                                 + newStorageId);
                }while (newStorageId.Equals(oldStorageId));
                NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount
                                                    ());
                // Now try to fail over.
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestExcludedNodesForgiveness()
        {
            // Forgive nodes in under 2.5s for this test case.
            conf.SetLong(DFSConfigKeys.DfsClientWriteExcludeNodesCacheExpiryInterval, 2500);
            // We'll be using a 512 bytes block size just for tests
            // so making sure the checksum bytes too match it.
            conf.SetInt("io.bytes.per.checksum", 512);
            cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();
            IList <MiniDFSCluster.DataNodeProperties> props = cluster.dataNodes;
            FileSystem fs       = cluster.GetFileSystem();
            Path       filePath = new Path("/testForgivingExcludedNodes");

            // 256 bytes data chunk for writes
            byte[] bytes = new byte[256];
            for (int index = 0; index < bytes.Length; index++)
            {
                bytes[index] = (byte)('0');
            }
            // File with a 512 bytes block size
            FSDataOutputStream @out = fs.Create(filePath, true, 4096, (short)3, 512);

            // Write a block to all 3 DNs (2x256bytes).
            @out.Write(bytes);
            @out.Write(bytes);
            @out.Hflush();
            // Remove two DNs, to put them into the exclude list.
            MiniDFSCluster.DataNodeProperties two = cluster.StopDataNode(2);
            MiniDFSCluster.DataNodeProperties one = cluster.StopDataNode(1);
            // Write another block.
            // At this point, we have two nodes already in excluded list.
            @out.Write(bytes);
            @out.Write(bytes);
            @out.Hflush();
            // Bring back the older DNs, since they are gonna be forgiven only
            // afterwards of this previous block write.
            NUnit.Framework.Assert.AreEqual(true, cluster.RestartDataNode(one, true));
            NUnit.Framework.Assert.AreEqual(true, cluster.RestartDataNode(two, true));
            cluster.WaitActive();
            // Sleep for 5s, to let the excluded nodes be expired
            // from the excludes list (i.e. forgiven after the configured wait period).
            // [Sleeping just in case the restart of the DNs completed < 5s cause
            // otherwise, we'll end up quickly excluding those again.]
            ThreadUtil.SleepAtLeastIgnoreInterrupts(5000);
            // Terminate the last good DN, to assert that there's no
            // single-DN-available scenario, caused by not forgiving the other
            // two by now.
            cluster.StopDataNode(0);
            try
            {
                // Attempt writing another block, which should still pass
                // cause the previous two should have been forgiven by now,
                // while the last good DN added to excludes this time.
                @out.Write(bytes);
                @out.Hflush();
                @out.Close();
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.Fail("Excluded DataNodes should be forgiven after a while and "
                                            + "not cause file writing exception of: '" + e.Message + "'");
            }
        }
Exemplo n.º 6
0
 public override void Run()
 {
     ThreadUtil.SleepAtLeastIgnoreInterrupts(millisToSleep);
     impl1.SetIdentifier("renamed-impl1");
 }