public virtual void TestPurgeLogs()
        {
            for (int txid = 1; txid <= 5; txid++)
            {
                QJMTestUtil.WriteSegment(cluster, qjm, txid, 1, true);
            }
            FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid);

            GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                  (1, 1), NNStorage.GetFinalizedEditsFileName(2, 2), NNStorage.GetFinalizedEditsFileName
                                                  (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName
                                                  (5, 5));
            FilePath paxosDir = new FilePath(curDir, "paxos");

            GenericTestUtils.AssertExists(paxosDir);
            // Create new files in the paxos directory, which should get purged too.
            NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "1").CreateNewFile());
            NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "3").CreateNewFile());
            GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "1", "3");
            // Create some temporary files of the sort that are used during recovery.
            NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000001.epoch=140"
                                                       ).CreateNewFile());
            NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000002.empty"
                                                       ).CreateNewFile());
            qjm.PurgeLogsOlderThan(3);
            // Log purging is asynchronous, so we have to wait for the calls
            // to be sent and respond before verifying.
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            // Older edits should be purged
            GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                  (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName
                                                  (5, 5));
            // Older paxos files should be purged
            GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "3");
        }
        public virtual void TestSelectInputStreamsNotOnBoundary()
        {
            int txIdsPerSegment = 10;

            for (int txid = 1; txid <= 5 * txIdsPerSegment; txid += txIdsPerSegment)
            {
                QJMTestUtil.WriteSegment(cluster, qjm, txid, txIdsPerSegment, true);
            }
            FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid);

            GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                  (1, 10), NNStorage.GetFinalizedEditsFileName(11, 20), NNStorage.GetFinalizedEditsFileName
                                                  (21, 30), NNStorage.GetFinalizedEditsFileName(31, 40), NNStorage.GetFinalizedEditsFileName
                                                  (41, 50));
            AList <EditLogInputStream> streams = new AList <EditLogInputStream>();

            qjm.SelectInputStreams(streams, 25, false);
            QJMTestUtil.VerifyEdits(streams, 25, 50);
        }
 /// <summary>
 /// Check that the given list of edits files are present in the given storage
 /// dirs.
 /// </summary>
 /// <exception cref="System.IO.IOException"/>
 private void AssertEditFiles(IEnumerable <URI> dirs, params string[] files)
 {
     foreach (URI u in dirs)
     {
         FilePath editDirRoot = new FilePath(u.GetPath());
         FilePath editDir     = new FilePath(editDirRoot, "current");
         GenericTestUtils.AssertExists(editDir);
         if (files.Length == 0)
         {
             Log.Info("Checking no edit files exist in " + editDir);
         }
         else
         {
             Log.Info("Checking for following edit files in " + editDir + ": " + Joiner.On(","
                                                                                           ).Join(files));
         }
         GenericTestUtils.AssertGlobEquals(editDir, "edits_.*", files);
     }
 }
        /// <summary>
        /// Set up the following tricky edge case state which is used by
        /// multiple tests:
        /// Initial writer:
        /// - Writing to 3 JNs: JN0, JN1, JN2:
        /// - A log segment with txnid 1 through 100 succeeds.
        /// </summary>
        /// <remarks>
        /// Set up the following tricky edge case state which is used by
        /// multiple tests:
        /// Initial writer:
        /// - Writing to 3 JNs: JN0, JN1, JN2:
        /// - A log segment with txnid 1 through 100 succeeds.
        /// - The first transaction in the next segment only goes to JN0
        /// before the writer crashes (eg it is partitioned)
        /// Recovery by another writer:
        /// - The new NN starts recovery and talks to all three. Thus, it sees
        /// that the newest log segment which needs recovery is 101.
        /// - It sends the prepareRecovery(101) call, and decides that the
        /// recovery length for 101 is only the 1 transaction.
        /// - It sends acceptRecovery(101-101) to only JN0, before crashing
        /// This yields the following state:
        /// - JN0: 1-100 finalized, 101_inprogress, accepted recovery: 101-101
        /// - JN1: 1-100 finalized, 101_inprogress.empty
        /// - JN2: 1-100 finalized, 101_inprogress.empty
        /// (the .empty files got moved aside during recovery)
        /// </remarks>
        /// <exception cref="System.Exception"></exception>
        private void SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery()
        {
            // Log segment with txns 1-100 succeeds
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 100, true);
            // startLogSegment only makes it to one of the three nodes
            FailLoggerAtTxn(spies[1], 101);
            FailLoggerAtTxn(spies[2], 101);
            try
            {
                QJMTestUtil.WriteSegment(cluster, qjm, 101, 1, true);
                NUnit.Framework.Assert.Fail("Should have failed");
            }
            catch (QuorumException qe)
            {
                GenericTestUtils.AssertExceptionContains("mock failure", qe);
            }
            finally
            {
                qjm.Close();
            }
            // Recovery 1:
            // make acceptRecovery() only make it to the node which has txid 101
            // this should fail because only 1/3 accepted the recovery
            qjm   = CreateSpyingQJM();
            spies = qjm.GetLoggerSetForTests().GetLoggersForTests();
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies
                                                                                            [1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto
                                                                                                                                         >(), Org.Mockito.Mockito.Any <Uri>());
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies
                                                                                            [2]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto
                                                                                                                                         >(), Org.Mockito.Mockito.Any <Uri>());
            try
            {
                qjm.RecoverUnfinalizedSegments();
                NUnit.Framework.Assert.Fail("Should have failed to recover");
            }
            catch (QuorumException qe)
            {
                GenericTestUtils.AssertExceptionContains("mock failure", qe);
            }
            finally
            {
                qjm.Close();
            }
            // Check that we have entered the expected state as described in the
            // method javadoc.
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "edits_.*"
                                              , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName
                                                  (101));
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "edits_.*"
                                              , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName
                                                  (101) + ".empty");
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "edits_.*"
                                              , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName
                                                  (101) + ".empty");
            FilePath paxos0 = new FilePath(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "paxos"
                                           );
            FilePath paxos1 = new FilePath(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "paxos"
                                           );
            FilePath paxos2 = new FilePath(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "paxos"
                                           );

            GenericTestUtils.AssertGlobEquals(paxos0, ".*", "101");
            GenericTestUtils.AssertGlobEquals(paxos1, ".*");
            GenericTestUtils.AssertGlobEquals(paxos2, ".*");
        }
        /// <summary>
        /// Test the case where, at the beginning of a segment, transactions
        /// have been written to one JN but not others.
        /// </summary>
        /// <exception cref="System.Exception"/>
        public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn)
        {
            int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3;
            int nodeMissingSegment   = (nodeWithOneTxn + 2) % 3;

            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0);
            // Open segment on 2/3 nodes
            EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion
                                                          );

            try
            {
                WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
                // Write transactions to only 1/3 nodes
                FailLoggerAtTxn(spies[nodeWithEmptySegment], 4);
                try
                {
                    QJMTestUtil.WriteTxns(stm, 4, 1);
                    NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed");
                }
                catch (QuorumException qe)
                {
                    GenericTestUtils.AssertExceptionContains("mock failure", qe);
                }
            }
            finally
            {
                stm.Abort();
            }
            // Bring back the down JN.
            cluster.RestartJournalNode(nodeMissingSegment);
            // Make a new QJM. At this point, the state is as follows:
            // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty)
            // B: nodeWithOneTxn:       1-3 finalized, 4_inprogress (1 txn)
            // C: nodeMissingSegment:   1-3 finalized
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil
                                                                    .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName
                                                  (4));
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil
                                                                    .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName
                                                  (4));
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil
                                                                    .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3));
            // Stop one of the nodes. Since we run this test three
            // times, rotating the roles of the nodes, we'll test
            // all the permutations.
            cluster.GetJournalNode(2).StopAndJoin(0);
            qjm = CreateSpyingQJM();
            qjm.RecoverUnfinalizedSegments();
            if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1)
            {
                // If the node that had the transaction committed was one of the nodes
                // that responded during recovery, then we should have recovered txid
                // 4.
                CheckRecovery(cluster, 4, 4);
                QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true);
            }
            else
            {
                // Otherwise, we should have recovered only 1-3 and should be able to
                // start a segment at 4.
                CheckRecovery(cluster, 1, 3);
                QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true);
            }
        }
        public virtual void TestPurgingWithNameEditsDirAfterFailure()
        {
            MiniDFSCluster cluster = null;
            Configuration  conf    = new HdfsConfiguration();

            conf.SetLong(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 0);
            FilePath sd0 = new FilePath(TestRootDir, "nn0");
            FilePath sd1 = new FilePath(TestRootDir, "nn1");
            FilePath cd0 = new FilePath(sd0, "current");
            FilePath cd1 = new FilePath(sd1, "current");

            conf.Set(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(sd0, sd1));
            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).ManageNameDfsDirs(false
                                                                                             ).Format(true).Build();
                NameNode nn = cluster.GetNameNode();
                DoSaveNamespace(nn);
                Log.Info("After first save, images 0 and 2 should exist in both dirs");
                GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (0), NNStorage.GetImageFileName(2));
                GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (0), NNStorage.GetImageFileName(2));
                GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (1, 2), NNStorage.GetInProgressEditsFileName(3));
                GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (1, 2), NNStorage.GetInProgressEditsFileName(3));
                DoSaveNamespace(nn);
                Log.Info("After second save, image 0 should be purged, " + "and image 4 should exist in both."
                         );
                GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (2), NNStorage.GetImageFileName(4));
                GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (2), NNStorage.GetImageFileName(4));
                GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (3, 4), NNStorage.GetInProgressEditsFileName(5));
                GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (3, 4), NNStorage.GetInProgressEditsFileName(5));
                Log.Info("Failing first storage dir by chmodding it");
                NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(cd0.GetAbsolutePath(), "000"));
                DoSaveNamespace(nn);
                Log.Info("Restoring accessibility of first storage dir");
                NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(cd0.GetAbsolutePath(), "755"));
                Log.Info("nothing should have been purged in first storage dir");
                GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (2), NNStorage.GetImageFileName(4));
                GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (3, 4), NNStorage.GetInProgressEditsFileName(5));
                Log.Info("fsimage_2 should be purged in second storage dir");
                GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (4), NNStorage.GetImageFileName(6));
                GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (5, 6), NNStorage.GetInProgressEditsFileName(7));
                Log.Info("On next save, we should purge logs from the failed dir," + " but not images, since the image directory is in failed state."
                         );
                DoSaveNamespace(nn);
                GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (6), NNStorage.GetImageFileName(8));
                GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                      (7, 8), NNStorage.GetInProgressEditsFileName(9));
                GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName
                                                      (2), NNStorage.GetImageFileName(4));
                GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetInProgressEditsFileName
                                                      (9));
            }
            finally
            {
                FileUtil.Chmod(cd0.GetAbsolutePath(), "755");
                Log.Info("Shutting down...");
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
        public virtual void TestFailureOfSharedDir()
        {
            Configuration conf = new Configuration();

            conf.SetLong(DFSConfigKeys.DfsNamenodeResourceCheckIntervalKey, 2000);
            // The shared edits dir will automatically be marked required.
            MiniDFSCluster cluster        = null;
            FilePath       sharedEditsDir = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                          ()).NumDataNodes(0).CheckExitOnShutdown(false).Build();
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/test1")));
                // Blow away the shared edits dir.
                URI sharedEditsUri = cluster.GetSharedEditsDir(0, 1);
                sharedEditsDir = new FilePath(sharedEditsUri);
                NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(sharedEditsDir.GetAbsolutePath(
                                                                      ), "-w", true));
                Sharpen.Thread.Sleep(conf.GetLong(DFSConfigKeys.DfsNamenodeResourceCheckIntervalKey
                                                  , DFSConfigKeys.DfsNamenodeResourceCheckIntervalDefault) * 2);
                NameNode nn1 = cluster.GetNameNode(1);
                NUnit.Framework.Assert.IsTrue(nn1.IsStandbyState());
                NUnit.Framework.Assert.IsFalse("StandBy NameNode should not go to SafeMode on resource unavailability"
                                               , nn1.IsInSafeMode());
                NameNode nn0 = cluster.GetNameNode(0);
                try
                {
                    // Make sure that subsequent operations on the NN fail.
                    nn0.GetRpcServer().RollEditLog();
                    NUnit.Framework.Assert.Fail("Succeeded in rolling edit log despite shared dir being deleted"
                                                );
                }
                catch (ExitUtil.ExitException ee)
                {
                    GenericTestUtils.AssertExceptionContains("finalize log segment 1, 3 failed for required journal"
                                                             , ee);
                }
                // Check that none of the edits dirs rolled, since the shared edits
                // dir didn't roll. Regression test for HDFS-2874.
                foreach (URI editsUri in cluster.GetNameEditsDirs(0))
                {
                    if (editsUri.Equals(sharedEditsUri))
                    {
                        continue;
                    }
                    FilePath editsDir = new FilePath(editsUri.GetPath());
                    FilePath curDir   = new FilePath(editsDir, "current");
                    GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetInProgressEditsFileName
                                                          (1));
                }
            }
            finally
            {
                if (sharedEditsDir != null)
                {
                    // without this test cleanup will fail
                    FileUtil.Chmod(sharedEditsDir.GetAbsolutePath(), "+w", true);
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Example #8
0
        /// <exception cref="System.Exception"/>
        public virtual void TestCancelSaveNamespace()
        {
            Configuration conf = GetConf();

            NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode);
            DFSTestUtil.FormatNameNode(conf);
            FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf);
            // Replace the FSImage with a spy
            FSImage   image   = fsn.GetFSImage();
            NNStorage storage = image.GetStorage();

            storage.Close();
            // unlock any directories that FSNamesystem's initialization may have locked
            storage.SetStorageDirectories(FSNamesystem.GetNamespaceDirs(conf), FSNamesystem.GetNamespaceEditsDirs
                                              (conf));
            FSNamesystem spyFsn   = Org.Mockito.Mockito.Spy(fsn);
            FSNamesystem finalFsn = spyFsn;

            GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
            BlockIdManager bid = Org.Mockito.Mockito.Spy(spyFsn.GetBlockIdManager());

            Whitebox.SetInternalState(finalFsn, "blockIdManager", bid);
            Org.Mockito.Mockito.DoAnswer(delayer).When(bid).GetGenerationStampV2();
            ExecutorService pool = Executors.NewFixedThreadPool(2);

            try
            {
                DoAnEdit(fsn, 1);
                Canceler canceler = new Canceler();
                // Save namespace
                fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter);
                try
                {
                    Future <Void> saverFuture = pool.Submit(new _Callable_561(image, finalFsn, canceler
                                                                              ));
                    // Wait until saveNamespace calls getGenerationStamp
                    delayer.WaitForCall();
                    // then cancel the saveNamespace
                    Future <Void> cancelFuture = pool.Submit(new _Callable_572(canceler));
                    // give the cancel call time to run
                    Sharpen.Thread.Sleep(500);
                    // allow saveNamespace to proceed - it should check the cancel flag after
                    // this point and throw an exception
                    delayer.Proceed();
                    cancelFuture.Get();
                    saverFuture.Get();
                    NUnit.Framework.Assert.Fail("saveNamespace did not fail even though cancelled!");
                }
                catch (Exception t)
                {
                    GenericTestUtils.AssertExceptionContains("SaveNamespaceCancelledException", t);
                }
                Log.Info("Successfully cancelled a saveNamespace");
                // Check that we have only the original image and not any
                // cruft left over from half-finished images
                FSImageTestUtil.LogStorageContents(Log, storage);
                foreach (Storage.StorageDirectory sd in storage.DirIterable(null))
                {
                    FilePath curDir = sd.GetCurrentDir();
                    GenericTestUtils.AssertGlobEquals(curDir, "fsimage_.*", NNStorage.GetImageFileName
                                                          (0), NNStorage.GetImageFileName(0) + MD5FileUtils.Md5Suffix);
                }
            }
            finally
            {
                fsn.Close();
            }
        }