Exemplo n.º 1
0
        /// <summary>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed.
        /// </summary>
        /// <remarks>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed. The
        /// DN running the recovery should then fail to commit the synchronization
        /// and a later retry will succeed.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestFailoverRightBeforeCommitSynchronization()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a half block
                AppendTestUtil.Write(stm, 0, BlockSize / 2);
                stm.Hflush();
                // Look into the block manager on the active node for the block
                // under construction.
                NameNode           nn0             = cluster.GetNameNode(0);
                ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, TestPath);
                DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk);
                Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                // Find the corresponding DN daemon, and spy on its connection to the
                // active.
                DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN
                                                                                               , nn0);
                // Delay the commitBlockSynchronization call
                GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                             .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                             .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                 (), (string[])Org.Mockito.Mockito.AnyObject());
                // new genstamp
                // new length
                // close file
                // delete block
                // new targets
                // new target storages
                DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath));
                Log.Info("Waiting for commitBlockSynchronization call from primary");
                delayer.WaitForCall();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                // Let the commitBlockSynchronization call go through, and check that
                // it failed with the correct exception.
                delayer.Proceed();
                delayer.WaitForResult();
                Exception t = delayer.GetThrown();
                if (t == null)
                {
                    NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby"
                                                );
                }
                GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported"
                                                         , t);
                // Now, if we try again to recover the block, it should succeed on the new
                // active.
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockSize / 2);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Exemplo n.º 2
0
        /// <summary>Test race between delete operation and commitBlockSynchronization method.
        ///     </summary>
        /// <remarks>
        /// Test race between delete operation and commitBlockSynchronization method.
        /// See HDFS-6825.
        /// </remarks>
        /// <param name="hasSnapshot"/>
        /// <exception cref="System.Exception"/>
        private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot)
        {
            Log.Info("Start testing, hasSnapshot: " + hasSnapshot);
            AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry
                                                                                           <string, bool> >();

            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file",
                                                                                 false));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1"
                                                                                 , true));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file"
                                                                                 , false));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1"
                                                                                 , true));
            Path          rootPath = new Path("/");
            Configuration conf     = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm = null;
            IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary
                                                                                   <DataNode, DatanodeProtocolClientSideTranslatorPB>();

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();
                cluster.WaitActive();
                DistributedFileSystem fs = cluster.GetFileSystem();
                int stId = 0;
                foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList)
                {
                    string testPath  = stest.Key;
                    bool   mkSameDir = stest.Value;
                    Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot
                             );
                    Path fPath = new Path(testPath);
                    //find grandest non-root parent
                    Path grandestNonRootParent = fPath;
                    while (!grandestNonRootParent.GetParent().Equals(rootPath))
                    {
                        grandestNonRootParent = grandestNonRootParent.GetParent();
                    }
                    stm = fs.Create(fPath);
                    Log.Info("test on " + testPath + " created " + fPath);
                    // write a half block
                    AppendTestUtil.Write(stm, 0, BlockSize / 2);
                    stm.Hflush();
                    if (hasSnapshot)
                    {
                        SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString());
                        ++stId;
                    }
                    // Look into the block manager on the active node for the block
                    // under construction.
                    NameNode           nn              = cluster.GetNameNode();
                    ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, fPath);
                    DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk);
                    Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                    // Find the corresponding DN daemon, and spy on its connection to the
                    // active.
                    DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                    DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN];
                    if (nnSpy == null)
                    {
                        nnSpy            = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn);
                        dnMap[primaryDN] = nnSpy;
                    }
                    // Delay the commitBlockSynchronization call
                    GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                    Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                                 .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                                 .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                     (), (string[])Org.Mockito.Mockito.AnyObject());
                    // new genstamp
                    // new length
                    // close file
                    // delete block
                    // new targets
                    // new target storages
                    fs.RecoverLease(fPath);
                    Log.Info("Waiting for commitBlockSynchronization call from primary");
                    delayer.WaitForCall();
                    Log.Info("Deleting recursively " + grandestNonRootParent);
                    fs.Delete(grandestNonRootParent, true);
                    if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath))
                    {
                        Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath);
                        fs.Mkdirs(grandestNonRootParent);
                    }
                    delayer.Proceed();
                    Log.Info("Now wait for result");
                    delayer.WaitForResult();
                    Exception t = delayer.GetThrown();
                    if (t != null)
                    {
                        Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t);
                    }
                }
                // end of loop each fPath
                Log.Info("Now check we can restart");
                cluster.RestartNameNodes();
                Log.Info("Restart finished");
            }
            finally
            {
                if (stm != null)
                {
                    IOUtils.CloseStream(stm);
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }