public virtual void TestPurgeLogs() { for (int txid = 1; txid <= 5; txid++) { QJMTestUtil.WriteSegment(cluster, qjm, txid, 1, true); } FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 1), NNStorage.GetFinalizedEditsFileName(2, 2), NNStorage.GetFinalizedEditsFileName (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName (5, 5)); FilePath paxosDir = new FilePath(curDir, "paxos"); GenericTestUtils.AssertExists(paxosDir); // Create new files in the paxos directory, which should get purged too. NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "1").CreateNewFile()); NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "3").CreateNewFile()); GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "1", "3"); // Create some temporary files of the sort that are used during recovery. NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000001.epoch=140" ).CreateNewFile()); NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000002.empty" ).CreateNewFile()); qjm.PurgeLogsOlderThan(3); // Log purging is asynchronous, so we have to wait for the calls // to be sent and respond before verifying. WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Older edits should be purged GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName (5, 5)); // Older paxos files should be purged GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "3"); }
public virtual void TestSelectInputStreamsNotOnBoundary() { int txIdsPerSegment = 10; for (int txid = 1; txid <= 5 * txIdsPerSegment; txid += txIdsPerSegment) { QJMTestUtil.WriteSegment(cluster, qjm, txid, txIdsPerSegment, true); } FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 10), NNStorage.GetFinalizedEditsFileName(11, 20), NNStorage.GetFinalizedEditsFileName (21, 30), NNStorage.GetFinalizedEditsFileName(31, 40), NNStorage.GetFinalizedEditsFileName (41, 50)); AList <EditLogInputStream> streams = new AList <EditLogInputStream>(); qjm.SelectInputStreams(streams, 25, false); QJMTestUtil.VerifyEdits(streams, 25, 50); }
/// <summary> /// Check that the given list of edits files are present in the given storage /// dirs. /// </summary> /// <exception cref="System.IO.IOException"/> private void AssertEditFiles(IEnumerable <URI> dirs, params string[] files) { foreach (URI u in dirs) { FilePath editDirRoot = new FilePath(u.GetPath()); FilePath editDir = new FilePath(editDirRoot, "current"); GenericTestUtils.AssertExists(editDir); if (files.Length == 0) { Log.Info("Checking no edit files exist in " + editDir); } else { Log.Info("Checking for following edit files in " + editDir + ": " + Joiner.On("," ).Join(files)); } GenericTestUtils.AssertGlobEquals(editDir, "edits_.*", files); } }
/// <summary> /// Set up the following tricky edge case state which is used by /// multiple tests: /// Initial writer: /// - Writing to 3 JNs: JN0, JN1, JN2: /// - A log segment with txnid 1 through 100 succeeds. /// </summary> /// <remarks> /// Set up the following tricky edge case state which is used by /// multiple tests: /// Initial writer: /// - Writing to 3 JNs: JN0, JN1, JN2: /// - A log segment with txnid 1 through 100 succeeds. /// - The first transaction in the next segment only goes to JN0 /// before the writer crashes (eg it is partitioned) /// Recovery by another writer: /// - The new NN starts recovery and talks to all three. Thus, it sees /// that the newest log segment which needs recovery is 101. /// - It sends the prepareRecovery(101) call, and decides that the /// recovery length for 101 is only the 1 transaction. /// - It sends acceptRecovery(101-101) to only JN0, before crashing /// This yields the following state: /// - JN0: 1-100 finalized, 101_inprogress, accepted recovery: 101-101 /// - JN1: 1-100 finalized, 101_inprogress.empty /// - JN2: 1-100 finalized, 101_inprogress.empty /// (the .empty files got moved aside during recovery) /// </remarks> /// <exception cref="System.Exception"></exception> private void SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery() { // Log segment with txns 1-100 succeeds QJMTestUtil.WriteSegment(cluster, qjm, 1, 100, true); // startLogSegment only makes it to one of the three nodes FailLoggerAtTxn(spies[1], 101); FailLoggerAtTxn(spies[2], 101); try { QJMTestUtil.WriteSegment(cluster, qjm, 101, 1, true); NUnit.Framework.Assert.Fail("Should have failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } finally { qjm.Close(); } // Recovery 1: // make acceptRecovery() only make it to the node which has txid 101 // this should fail because only 1/3 accepted the recovery qjm = CreateSpyingQJM(); spies = qjm.GetLoggerSetForTests().GetLoggersForTests(); TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies [1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies [2]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); try { qjm.RecoverUnfinalizedSegments(); NUnit.Framework.Assert.Fail("Should have failed to recover"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } finally { qjm.Close(); } // Check that we have entered the expected state as described in the // method javadoc. GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101) + ".empty"); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101) + ".empty"); FilePath paxos0 = new FilePath(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "paxos" ); FilePath paxos1 = new FilePath(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "paxos" ); FilePath paxos2 = new FilePath(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "paxos" ); GenericTestUtils.AssertGlobEquals(paxos0, ".*", "101"); GenericTestUtils.AssertGlobEquals(paxos1, ".*"); GenericTestUtils.AssertGlobEquals(paxos2, ".*"); }
/// <summary> /// Test the case where, at the beginning of a segment, transactions /// have been written to one JN but not others. /// </summary> /// <exception cref="System.Exception"/> public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn) { int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3; int nodeMissingSegment = (nodeWithOneTxn + 2) % 3; QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0); // Open segment on 2/3 nodes EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Write transactions to only 1/3 nodes FailLoggerAtTxn(spies[nodeWithEmptySegment], 4); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } } finally { stm.Abort(); } // Bring back the down JN. cluster.RestartJournalNode(nodeMissingSegment); // Make a new QJM. At this point, the state is as follows: // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty) // B: nodeWithOneTxn: 1-3 finalized, 4_inprogress (1 txn) // C: nodeMissingSegment: 1-3 finalized GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3)); // Stop one of the nodes. Since we run this test three // times, rotating the roles of the nodes, we'll test // all the permutations. cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); qjm.RecoverUnfinalizedSegments(); if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1) { // If the node that had the transaction committed was one of the nodes // that responded during recovery, then we should have recovered txid // 4. CheckRecovery(cluster, 4, 4); QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true); } else { // Otherwise, we should have recovered only 1-3 and should be able to // start a segment at 4. CheckRecovery(cluster, 1, 3); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); } }
public virtual void TestPurgingWithNameEditsDirAfterFailure() { MiniDFSCluster cluster = null; Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 0); FilePath sd0 = new FilePath(TestRootDir, "nn0"); FilePath sd1 = new FilePath(TestRootDir, "nn1"); FilePath cd0 = new FilePath(sd0, "current"); FilePath cd1 = new FilePath(sd1, "current"); conf.Set(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(sd0, sd1)); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).ManageNameDfsDirs(false ).Format(true).Build(); NameNode nn = cluster.GetNameNode(); DoSaveNamespace(nn); Log.Info("After first save, images 0 and 2 should exist in both dirs"); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(2)); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(2)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 2), NNStorage.GetInProgressEditsFileName(3)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 2), NNStorage.GetInProgressEditsFileName(3)); DoSaveNamespace(nn); Log.Info("After second save, image 0 should be purged, " + "and image 4 should exist in both." ); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 4), NNStorage.GetInProgressEditsFileName(5)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 4), NNStorage.GetInProgressEditsFileName(5)); Log.Info("Failing first storage dir by chmodding it"); NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(cd0.GetAbsolutePath(), "000")); DoSaveNamespace(nn); Log.Info("Restoring accessibility of first storage dir"); NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(cd0.GetAbsolutePath(), "755")); Log.Info("nothing should have been purged in first storage dir"); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 4), NNStorage.GetInProgressEditsFileName(5)); Log.Info("fsimage_2 should be purged in second storage dir"); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (4), NNStorage.GetImageFileName(6)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (5, 6), NNStorage.GetInProgressEditsFileName(7)); Log.Info("On next save, we should purge logs from the failed dir," + " but not images, since the image directory is in failed state." ); DoSaveNamespace(nn); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (6), NNStorage.GetImageFileName(8)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (7, 8), NNStorage.GetInProgressEditsFileName(9)); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetInProgressEditsFileName (9)); } finally { FileUtil.Chmod(cd0.GetAbsolutePath(), "755"); Log.Info("Shutting down..."); if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestFailureOfSharedDir() { Configuration conf = new Configuration(); conf.SetLong(DFSConfigKeys.DfsNamenodeResourceCheckIntervalKey, 2000); // The shared edits dir will automatically be marked required. MiniDFSCluster cluster = null; FilePath sharedEditsDir = null; try { cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(0).CheckExitOnShutdown(false).Build(); cluster.WaitActive(); cluster.TransitionToActive(0); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/test1"))); // Blow away the shared edits dir. URI sharedEditsUri = cluster.GetSharedEditsDir(0, 1); sharedEditsDir = new FilePath(sharedEditsUri); NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(sharedEditsDir.GetAbsolutePath( ), "-w", true)); Sharpen.Thread.Sleep(conf.GetLong(DFSConfigKeys.DfsNamenodeResourceCheckIntervalKey , DFSConfigKeys.DfsNamenodeResourceCheckIntervalDefault) * 2); NameNode nn1 = cluster.GetNameNode(1); NUnit.Framework.Assert.IsTrue(nn1.IsStandbyState()); NUnit.Framework.Assert.IsFalse("StandBy NameNode should not go to SafeMode on resource unavailability" , nn1.IsInSafeMode()); NameNode nn0 = cluster.GetNameNode(0); try { // Make sure that subsequent operations on the NN fail. nn0.GetRpcServer().RollEditLog(); NUnit.Framework.Assert.Fail("Succeeded in rolling edit log despite shared dir being deleted" ); } catch (ExitUtil.ExitException ee) { GenericTestUtils.AssertExceptionContains("finalize log segment 1, 3 failed for required journal" , ee); } // Check that none of the edits dirs rolled, since the shared edits // dir didn't roll. Regression test for HDFS-2874. foreach (URI editsUri in cluster.GetNameEditsDirs(0)) { if (editsUri.Equals(sharedEditsUri)) { continue; } FilePath editsDir = new FilePath(editsUri.GetPath()); FilePath curDir = new FilePath(editsDir, "current"); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetInProgressEditsFileName (1)); } } finally { if (sharedEditsDir != null) { // without this test cleanup will fail FileUtil.Chmod(sharedEditsDir.GetAbsolutePath(), "+w", true); } if (cluster != null) { cluster.Shutdown(); } } }
/// <exception cref="System.Exception"/> public virtual void TestCancelSaveNamespace() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage image = fsn.GetFSImage(); NNStorage storage = image.GetStorage(); storage.Close(); // unlock any directories that FSNamesystem's initialization may have locked storage.SetStorageDirectories(FSNamesystem.GetNamespaceDirs(conf), FSNamesystem.GetNamespaceEditsDirs (conf)); FSNamesystem spyFsn = Org.Mockito.Mockito.Spy(fsn); FSNamesystem finalFsn = spyFsn; GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); BlockIdManager bid = Org.Mockito.Mockito.Spy(spyFsn.GetBlockIdManager()); Whitebox.SetInternalState(finalFsn, "blockIdManager", bid); Org.Mockito.Mockito.DoAnswer(delayer).When(bid).GetGenerationStampV2(); ExecutorService pool = Executors.NewFixedThreadPool(2); try { DoAnEdit(fsn, 1); Canceler canceler = new Canceler(); // Save namespace fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { Future <Void> saverFuture = pool.Submit(new _Callable_561(image, finalFsn, canceler )); // Wait until saveNamespace calls getGenerationStamp delayer.WaitForCall(); // then cancel the saveNamespace Future <Void> cancelFuture = pool.Submit(new _Callable_572(canceler)); // give the cancel call time to run Sharpen.Thread.Sleep(500); // allow saveNamespace to proceed - it should check the cancel flag after // this point and throw an exception delayer.Proceed(); cancelFuture.Get(); saverFuture.Get(); NUnit.Framework.Assert.Fail("saveNamespace did not fail even though cancelled!"); } catch (Exception t) { GenericTestUtils.AssertExceptionContains("SaveNamespaceCancelledException", t); } Log.Info("Successfully cancelled a saveNamespace"); // Check that we have only the original image and not any // cruft left over from half-finished images FSImageTestUtil.LogStorageContents(Log, storage); foreach (Storage.StorageDirectory sd in storage.DirIterable(null)) { FilePath curDir = sd.GetCurrentDir(); GenericTestUtils.AssertGlobEquals(curDir, "fsimage_.*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(0) + MD5FileUtils.Md5Suffix); } } finally { fsn.Close(); } }