예제 #1
0
 /// <summary>
 /// Test cancellation of ongoing checkpoints when failover happens
 /// mid-checkpoint during image upload from standby to active NN.
 /// </summary>
 /// <exception cref="System.Exception"/>
 public virtual void TestCheckpointCancellationDuringUpload()
 {
     // don't compress, we want a big image
     cluster.GetConfiguration(0).SetBoolean(DFSConfigKeys.DfsImageCompressKey, false);
     cluster.GetConfiguration(1).SetBoolean(DFSConfigKeys.DfsImageCompressKey, false);
     // Throttle SBN upload to make it hang during upload to ANN
     cluster.GetConfiguration(1).SetLong(DFSConfigKeys.DfsImageTransferRateKey, 100);
     cluster.RestartNameNode(0);
     cluster.RestartNameNode(1);
     nn0 = cluster.GetNameNode(0);
     nn1 = cluster.GetNameNode(1);
     cluster.TransitionToActive(0);
     DoEdits(0, 100);
     HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
     HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(104));
     cluster.TransitionToStandby(0);
     cluster.TransitionToActive(1);
     // Wait to make sure background TransferFsImageUpload thread was cancelled.
     // This needs to be done before the next test in the suite starts, so that a
     // file descriptor is not held open during the next cluster init.
     cluster.Shutdown();
     cluster = null;
     GenericTestUtils.WaitFor(new _Supplier_312(), 1000, 30000);
     // Assert that former active did not accept the canceled checkpoint file.
     NUnit.Framework.Assert.AreEqual(0, nn0.GetFSImage().GetMostRecentCheckpointTxId()
                                     );
 }
예제 #2
0
 public virtual void TestFailureToReadEditsOnTransitionToActive()
 {
     NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir1)));
     HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
     // It should also upload it back to the active.
     HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3));
     CauseFailureOnEditLogRead();
     NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir2)));
     NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir3)));
     try
     {
         HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
         NUnit.Framework.Assert.Fail("Standby fully caught up, but should not have been able to"
                                     );
     }
     catch (HATestUtil.CouldNotCatchUpException)
     {
     }
     // Expected. The NN did not exit.
     // Shutdown the active NN.
     cluster.ShutdownNameNode(0);
     try
     {
         // Transition the standby to active.
         cluster.TransitionToActive(1);
         NUnit.Framework.Assert.Fail("Standby transitioned to active, but should not have been able to"
                                     );
     }
     catch (ExitUtil.ExitException ee)
     {
         GenericTestUtils.AssertExceptionContains("Error replaying edit log", ee);
     }
 }
예제 #3
0
        public virtual void TestCheckpointStartingMidEditsFile()
        {
            NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir1)));
            HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
            // Once the standby catches up, it should notice that it needs to
            // do a checkpoint and save one to its local directories.
            HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(0, 3));
            // It should also upload it back to the active.
            HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3));
            CauseFailureOnEditLogRead();
            NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir2)));
            NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir3)));
            try
            {
                HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
                NUnit.Framework.Assert.Fail("Standby fully caught up, but should not have been able to"
                                            );
            }
            catch (HATestUtil.CouldNotCatchUpException)
            {
            }
            // Expected. The NN did not exit.
            // 5 because we should get OP_START_LOG_SEGMENT and one successful OP_MKDIR
            HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(0, 3, 5));
            // It should also upload it back to the active.
            HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3, 5));
            // Restart the active NN
            cluster.RestartNameNode(0);
            HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3, 5));
            FileSystem fs0 = null;

            try
            {
                // Make sure that when the active restarts, it loads all the edits.
                fs0 = FileSystem.Get(NameNode.GetUri(nn0.GetNameNodeAddress()), conf);
                NUnit.Framework.Assert.IsTrue(fs0.Exists(new Path(TestDir1)));
                NUnit.Framework.Assert.IsTrue(fs0.Exists(new Path(TestDir2)));
                NUnit.Framework.Assert.IsTrue(fs0.Exists(new Path(TestDir3)));
            }
            finally
            {
                if (fs0 != null)
                {
                    fs0.Close();
                }
            }
        }
예제 #4
0
        /// <exception cref="System.Exception"/>
        public virtual void TestSBNCheckpoints()
        {
            JournalSet standbyJournalSet = NameNodeAdapter.SpyOnJournalSet(nn1);

            DoEdits(0, 10);
            HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
            // Once the standby catches up, it should notice that it needs to
            // do a checkpoint and save one to its local directories.
            HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(12));
            GenericTestUtils.WaitFor(new _Supplier_147(this), 1000, 60000);
            // It should have saved the oiv image too.
            NUnit.Framework.Assert.AreEqual("One file is expected", 1, tmpOivImgDir.List().Length
                                            );
            // It should also upload it back to the active.
            HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(12));
            // The standby should never try to purge edit logs on shared storage.
            Org.Mockito.Mockito.Verify(standbyJournalSet, Org.Mockito.Mockito.Never()).PurgeLogsOlderThan
                (Org.Mockito.Mockito.AnyLong());
        }
예제 #5
0
        /// <summary>
        /// Test for the case when both of the NNs in the cluster are
        /// in the standby state, and thus are both creating checkpoints
        /// and uploading them to each other.
        /// </summary>
        /// <remarks>
        /// Test for the case when both of the NNs in the cluster are
        /// in the standby state, and thus are both creating checkpoints
        /// and uploading them to each other.
        /// In this circumstance, they should receive the error from the
        /// other node indicating that the other node already has a
        /// checkpoint for the given txid, but this should not cause
        /// an abort, etc.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestBothNodesInStandbyState()
        {
            DoEdits(0, 10);
            cluster.TransitionToStandby(0);
            // Transitioning to standby closed the edit log on the active,
            // so the standby will catch up. Then, both will be in standby mode
            // with enough uncheckpointed txns to cause a checkpoint, and they
            // will each try to take a checkpoint and upload to each other.
            HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(12));
            HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(12));
            NUnit.Framework.Assert.AreEqual(12, nn0.GetNamesystem().GetFSImage().GetMostRecentCheckpointTxId
                                                ());
            NUnit.Framework.Assert.AreEqual(12, nn1.GetNamesystem().GetFSImage().GetMostRecentCheckpointTxId
                                                ());
            IList <FilePath> dirs = Lists.NewArrayList();

            Sharpen.Collections.AddAll(dirs, FSImageTestUtil.GetNameNodeCurrentDirs(cluster,
                                                                                    0));
            Sharpen.Collections.AddAll(dirs, FSImageTestUtil.GetNameNodeCurrentDirs(cluster,
                                                                                    1));
            FSImageTestUtil.AssertParallelFilesAreIdentical(dirs, ImmutableSet.Of <string>());
        }