Ejemplo n.º 1
0
        /// <exception cref="System.Exception"/>
        private void DoWriteOverFailoverTest(TestPipelinesFailover.TestScenario scenario,
                                             TestPipelinesFailover.MethodToTestIdempotence methodToTest)
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            // Don't check replication periodically.
            conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, 1000);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                int sizeWritten = 0;
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a block and a half
                AppendTestUtil.Write(stm, 0, BlockAndAHalf);
                sizeWritten += BlockAndAHalf;
                // Make sure all of the blocks are written out before failover.
                stm.Hflush();
                Log.Info("Failing over to NN 1");
                scenario.Run(cluster);
                // NOTE: explicitly do *not* make any further metadata calls
                // to the NN here. The next IPC call should be to allocate the next
                // block. Any other call would notice the failover and not test
                // idempotence of the operation (HDFS-3031)
                FSNamesystem ns1 = cluster.GetNameNode(1).GetNamesystem();
                BlockManagerTestUtil.UpdateState(ns1.GetBlockManager());
                NUnit.Framework.Assert.AreEqual(0, ns1.GetPendingReplicationBlocks());
                NUnit.Framework.Assert.AreEqual(0, ns1.GetCorruptReplicaBlocks());
                NUnit.Framework.Assert.AreEqual(0, ns1.GetMissingBlocksCount());
                // If we're testing allocateBlock()'s idempotence, write another
                // block and a half, so we have to allocate a new block.
                // Otherise, don't write anything, so our next RPC will be
                // completeFile() if we're testing idempotence of that operation.
                if (methodToTest == TestPipelinesFailover.MethodToTestIdempotence.AllocateBlock)
                {
                    // write another block and a half
                    AppendTestUtil.Write(stm, sizeWritten, BlockAndAHalf);
                    sizeWritten += BlockAndAHalf;
                }
                stm.Close();
                stm = null;
                AppendTestUtil.Check(fs, TestPath, sizeWritten);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Ejemplo n.º 2
0
        /// <exception cref="System.Exception"/>
        private void DoTestWriteOverFailoverWithDnFail(TestPipelinesFailover.TestScenario
                                                       scenario)
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(5).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a block and a half
                AppendTestUtil.Write(stm, 0, BlockAndAHalf);
                // Make sure all the blocks are written before failover
                stm.Hflush();
                Log.Info("Failing over to NN 1");
                scenario.Run(cluster);
                NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath));
                cluster.StopDataNode(0);
                // write another block and a half
                AppendTestUtil.Write(stm, BlockAndAHalf, BlockAndAHalf);
                stm.Hflush();
                Log.Info("Failing back to NN 0");
                cluster.TransitionToStandby(1);
                cluster.TransitionToActive(0);
                cluster.StopDataNode(1);
                AppendTestUtil.Write(stm, BlockAndAHalf * 2, BlockAndAHalf);
                stm.Hflush();
                stm.Close();
                stm = null;
                AppendTestUtil.Check(fs, TestPath, BlockAndAHalf * 3);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Ejemplo n.º 3
0
        /// <summary>Tests lease recovery if a client crashes.</summary>
        /// <remarks>
        /// Tests lease recovery if a client crashes. This approximates the
        /// use case of HBase WALs being recovered after a NN failover.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestLeaseRecoveryAfterFailover()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a block and a half
                AppendTestUtil.Write(stm, 0, BlockAndAHalf);
                stm.Hflush();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath));
                FileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockAndAHalf);
                // Fail back to ensure that the block locations weren't lost on the
                // original node.
                cluster.TransitionToStandby(1);
                cluster.TransitionToActive(0);
                AppendTestUtil.Check(fs, TestPath, BlockAndAHalf);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Ejemplo n.º 4
0
            /// <exception cref="System.Exception"/>
            public override void DoAnAction()
            {
                FSDataOutputStream stm = fs.Create(path, true);

                try
                {
                    AppendTestUtil.Write(stm, 0, 100);
                    stm.Hflush();
                    LoopRecoverLease(fsOtherUser, path);
                    AppendTestUtil.Check(fs, path, 100);
                }
                finally
                {
                    try
                    {
                        stm.Close();
                    }
                    catch (IOException)
                    {
                    }
                }
            }
Ejemplo n.º 5
0
        /// <summary>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed.
        /// </summary>
        /// <remarks>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed. The
        /// DN running the recovery should then fail to commit the synchronization
        /// and a later retry will succeed.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestFailoverRightBeforeCommitSynchronization()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a half block
                AppendTestUtil.Write(stm, 0, BlockSize / 2);
                stm.Hflush();
                // Look into the block manager on the active node for the block
                // under construction.
                NameNode           nn0             = cluster.GetNameNode(0);
                ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, TestPath);
                DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk);
                Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                // Find the corresponding DN daemon, and spy on its connection to the
                // active.
                DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN
                                                                                               , nn0);
                // Delay the commitBlockSynchronization call
                GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                             .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                             .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                 (), (string[])Org.Mockito.Mockito.AnyObject());
                // new genstamp
                // new length
                // close file
                // delete block
                // new targets
                // new target storages
                DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath));
                Log.Info("Waiting for commitBlockSynchronization call from primary");
                delayer.WaitForCall();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                // Let the commitBlockSynchronization call go through, and check that
                // it failed with the correct exception.
                delayer.Proceed();
                delayer.WaitForResult();
                Exception t = delayer.GetThrown();
                if (t == null)
                {
                    NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby"
                                                );
                }
                GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported"
                                                         , t);
                // Now, if we try again to recover the block, it should succeed on the new
                // active.
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockSize / 2);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Ejemplo n.º 6
0
        public virtual void TestQueueingWithAppend()
        {
            int numQueued = 0;
            int numDN     = cluster.GetDataNodes().Count;
            // case 1: create file and call hflush after write
            FSDataOutputStream @out = fs.Create(TestFilePath);

            try
            {
                AppendTestUtil.Write(@out, 0, 10);
                @out.Hflush();
                // Opening the file will report RBW replicas, but will be
                // queued on the StandbyNode.
                // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
                // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
                //
                cluster.TriggerBlockReports();
                numQueued += numDN;
                // RBW messages
                // The cluster.triggerBlockReports() call above does a full
                // block report that incurs 3 extra RBW messages
                numQueued += numDN;
            }
            finally
            {
                // RBW messages
                IOUtils.CloseStream(@out);
                numQueued += numDN;
            }
            // blockReceived messages
            cluster.TriggerBlockReports();
            numQueued += numDN;
            NUnit.Framework.Assert.AreEqual(numQueued, cluster.GetNameNode(1).GetNamesystem()
                                            .GetPendingDataNodeMessageCount());
            // case 2: append to file and call hflush after write
            try
            {
                @out = fs.Append(TestFilePath);
                AppendTestUtil.Write(@out, 10, 10);
                @out.Hflush();
                cluster.TriggerBlockReports();
                numQueued += numDN * 2;
            }
            finally
            {
                // RBW messages, see comments in case 1
                IOUtils.CloseStream(@out);
                numQueued += numDN;
            }
            // blockReceived
            NUnit.Framework.Assert.AreEqual(numQueued, cluster.GetNameNode(1).GetNamesystem()
                                            .GetPendingDataNodeMessageCount());
            // case 3: similar to case 2, except no hflush is called.
            try
            {
                @out = fs.Append(TestFilePath);
                AppendTestUtil.Write(@out, 20, 10);
            }
            finally
            {
                // The write operation in the try block is buffered, thus no RBW message
                // is reported yet until the closeStream call here. When closeStream is
                // called, before HDFS-7217 fix, there would be three RBW messages
                // (blockReceiving), plus three FINALIZED messages (blockReceived)
                // delivered to NN. However, because of HDFS-7217 fix, the reporting of
                // RBW  messages is postponed. In this case, they are even overwritten
                // by the blockReceived messages of the same block when they are waiting
                // to be delivered. All this happens within the closeStream() call.
                // What's delivered to NN is the three blockReceived messages. See
                //    BPServiceActor#addPendingReplicationBlockInfo
                //
                IOUtils.CloseStream(@out);
                numQueued += numDN;
            }
            // blockReceived
            cluster.TriggerBlockReports();
            numQueued += numDN;
            Log.Info("Expect " + numQueued + " and got: " + cluster.GetNameNode(1).GetNamesystem
                         ().GetPendingDataNodeMessageCount());
            NUnit.Framework.Assert.AreEqual(numQueued, cluster.GetNameNode(1).GetNamesystem()
                                            .GetPendingDataNodeMessageCount());
            cluster.TransitionToStandby(0);
            cluster.TransitionToActive(1);
            // Verify that no replicas are marked corrupt, and that the
            // file is readable from the failed-over standby.
            BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager());
            BlockManagerTestUtil.UpdateState(nn2.GetNamesystem().GetBlockManager());
            NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetCorruptReplicaBlocks());
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetCorruptReplicaBlocks());
            AppendTestUtil.Check(fs, TestFilePath, 30);
        }