public virtual void TestSharedEditsMissingLogs() { RemoveStandbyNameDirs(); CheckpointSignature sig = nn0.GetRpcServer().RollEditLog(); NUnit.Framework.Assert.AreEqual(3, sig.GetCurSegmentTxId()); // Should have created edits_1-2 in shared edits dir URI editsUri = cluster.GetSharedEditsDir(0, 1); FilePath editsDir = new FilePath(editsUri); FilePath editsSegment = new FilePath(new FilePath(editsDir, "current"), NNStorage .GetFinalizedEditsFileName(1, 2)); GenericTestUtils.AssertExists(editsSegment); // Delete the segment. NUnit.Framework.Assert.IsTrue(editsSegment.Delete()); // Trying to bootstrap standby should now fail since the edit // logs aren't available in the shared dir. GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer.CaptureLogs(LogFactory .GetLog(typeof(BootstrapStandby))); try { int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(BootstrapStandby.ErrCodeLogsUnavailable, rc); } finally { logs.StopCapturing(); } GenericTestUtils.AssertMatches(logs.GetOutput(), "FATAL.*Unable to read transaction ids 1-3 from the configured shared" ); }
public virtual void TestPurgeLogs() { for (int txid = 1; txid <= 5; txid++) { QJMTestUtil.WriteSegment(cluster, qjm, txid, 1, true); } FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 1), NNStorage.GetFinalizedEditsFileName(2, 2), NNStorage.GetFinalizedEditsFileName (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName (5, 5)); FilePath paxosDir = new FilePath(curDir, "paxos"); GenericTestUtils.AssertExists(paxosDir); // Create new files in the paxos directory, which should get purged too. NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "1").CreateNewFile()); NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "3").CreateNewFile()); GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "1", "3"); // Create some temporary files of the sort that are used during recovery. NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000001.epoch=140" ).CreateNewFile()); NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000002.empty" ).CreateNewFile()); qjm.PurgeLogsOlderThan(3); // Log purging is asynchronous, so we have to wait for the calls // to be sent and respond before verifying. WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Older edits should be purged GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName (5, 5)); // Older paxos files should be purged GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "3"); }
public virtual void TestEditLogRolling() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; AtomicReference <Exception> caughtErr = new AtomicReference <Exception>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); NamenodeProtocols nn = cluster.GetNameNode().GetRpcServer(); FSImage fsimage = cluster.GetNamesystem().GetFSImage(); Storage.StorageDirectory sd = fsimage.GetStorage().GetStorageDir(0); StartTransactionWorkers(nn, caughtErr); long previousLogTxId = 1; for (int i = 0; i < NumRolls && caughtErr.Get() == null; i++) { try { Sharpen.Thread.Sleep(20); } catch (Exception) { } Log.Info("Starting roll " + i + "."); CheckpointSignature sig = nn.RollEditLog(); long nextLog = sig.curSegmentTxId; string logFileName = NNStorage.GetFinalizedEditsFileName(previousLogTxId, nextLog - 1); previousLogTxId += VerifyEditLogs(cluster.GetNamesystem(), fsimage, logFileName, previousLogTxId); NUnit.Framework.Assert.AreEqual(previousLogTxId, nextLog); FilePath expectedLog = NNStorage.GetInProgressEditsFile(sd, previousLogTxId); NUnit.Framework.Assert.IsTrue("Expect " + expectedLog + " to exist", expectedLog. Exists()); } } finally { StopTransactionWorkers(); if (caughtErr.Get() != null) { throw new RuntimeException(caughtErr.Get()); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public bool Accept(FilePath dir, string name) { if (name.StartsWith(NNStorage.GetFinalizedEditsFileName(startGapTxId, endGapTxId) )) { return(true); } return(false); }
/// <exception cref="System.IO.IOException"/> internal static void DownloadEditsToStorage(Uri fsName, RemoteEditLog log, NNStorage dstStorage) { System.Diagnostics.Debug.Assert(log.GetStartTxId() > 0 && log.GetEndTxId() > 0, "bad log: " + log); string fileid = ImageServlet.GetParamStringForLog(log, dstStorage); string finalFileName = NNStorage.GetFinalizedEditsFileName(log.GetStartTxId(), log .GetEndTxId()); IList <FilePath> finalFiles = dstStorage.GetFiles(NNStorage.NameNodeDirType.Edits, finalFileName); System.Diagnostics.Debug.Assert(!finalFiles.IsEmpty(), "No checkpoint targets."); foreach (FilePath f in finalFiles) { if (f.Exists() && FileUtil.CanRead(f)) { Log.Info("Skipping download of remote edit log " + log + " since it already is stored locally at " + f); return; } else { if (Log.IsDebugEnabled()) { Log.Debug("Dest file: " + f); } } } long milliTime = Time.MonotonicNow(); string tmpFileName = NNStorage.GetTemporaryEditsFileName(log.GetStartTxId(), log. GetEndTxId(), milliTime); IList <FilePath> tmpFiles = dstStorage.GetFiles(NNStorage.NameNodeDirType.Edits, tmpFileName ); GetFileClient(fsName, fileid, tmpFiles, dstStorage, false); Log.Info("Downloaded file " + tmpFiles[0].GetName() + " size " + finalFiles[0].Length () + " bytes."); CheckpointFaultInjector.GetInstance().BeforeEditsRename(); foreach (Storage.StorageDirectory sd in dstStorage.DirIterable(NNStorage.NameNodeDirType .Edits)) { FilePath tmpFile = NNStorage.GetTemporaryEditsFile(sd, log.GetStartTxId(), log.GetEndTxId (), milliTime); FilePath finalizedFile = NNStorage.GetFinalizedEditsFile(sd, log.GetStartTxId(), log.GetEndTxId()); if (Log.IsDebugEnabled()) { Log.Debug("Renaming " + tmpFile + " to " + finalizedFile); } bool success = tmpFile.RenameTo(finalizedFile); if (!success) { Log.Warn("Unable to rename edits file from " + tmpFile + " to " + finalizedFile); } } }
/// <summary>Find an edits file spanning the given transaction ID range.</summary> /// <remarks> /// Find an edits file spanning the given transaction ID range. /// If no such file exists, an exception is thrown. /// </remarks> /// <exception cref="System.IO.IOException"/> internal virtual FilePath FindFinalizedEditsFile(long startTxId, long endTxId) { FilePath ret = new FilePath(sd.GetCurrentDir(), NNStorage.GetFinalizedEditsFileName (startTxId, endTxId)); if (!ret.Exists()) { throw new IOException("No edits file for range " + startTxId + "-" + endTxId); } return(ret); }
private void CheckJNStorage(FilePath dir, long discardStartTxId, long discardEndTxId ) { FilePath finalizedEdits = new FilePath(dir, NNStorage.GetFinalizedEditsFileName(1 , discardStartTxId - 1)); NUnit.Framework.Assert.IsTrue(finalizedEdits.Exists()); FilePath trashEdits = new FilePath(dir, NNStorage.GetFinalizedEditsFileName(discardStartTxId , discardEndTxId) + ".trash"); NUnit.Framework.Assert.IsTrue(trashEdits.Exists()); }
/// <exception cref="System.Exception"/> private void TestFailoverFinalizesAndReadsInProgress(bool partialTxAtEnd) { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(0).Build(); try { // Create a fake in-progress edit-log in the shared directory URI sharedUri = cluster.GetSharedEditsDir(0, 1); FilePath sharedDir = new FilePath(sharedUri.GetPath(), "current"); FSNamesystem fsn = cluster.GetNamesystem(0); FSImageTestUtil.CreateAbortedLogWithMkdirs(sharedDir, NumDirsInLog, 1, fsn.GetFSDirectory ().GetLastInodeId() + 1); AssertEditFiles(Sharpen.Collections.SingletonList(sharedUri), NNStorage.GetInProgressEditsFileName (1)); if (partialTxAtEnd) { FileOutputStream outs = null; try { FilePath editLogFile = new FilePath(sharedDir, NNStorage.GetInProgressEditsFileName (1)); outs = new FileOutputStream(editLogFile, true); outs.Write(new byte[] { unchecked ((int)(0x18)), unchecked ((int)(0x00)), unchecked ( (int)(0x00)), unchecked ((int)(0x00)) }); Log.Error("editLogFile = " + editLogFile); } finally { IOUtils.Cleanup(Log, outs); } } // Transition one of the NNs to active cluster.TransitionToActive(0); // In the transition to active, it should have read the log -- and // hence see one of the dirs we made in the fake log. string testPath = "/dir" + NumDirsInLog; NUnit.Framework.Assert.IsNotNull(cluster.GetNameNode(0).GetRpcServer().GetFileInfo (testPath)); // It also should have finalized that log in the shared directory and started // writing to a new one at the next txid. AssertEditFiles(Sharpen.Collections.SingletonList(sharedUri), NNStorage.GetFinalizedEditsFileName (1, NumDirsInLog + 1), NNStorage.GetInProgressEditsFileName(NumDirsInLog + 2)); } finally { cluster.Shutdown(); } }
public virtual void TestPurgeLessThanRetention() { TestNNStorageRetentionManager.TestCaseDescription tc = new TestNNStorageRetentionManager.TestCaseDescription (this); tc.AddRoot("/foo1", NNStorage.NameNodeDirType.ImageAndEdits); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(100), false); tc.AddLog("/foo1/current/" + NNStorage.GetFinalizedEditsFileName(101, 200), false ); tc.AddLog("/foo1/current/" + NNStorage.GetFinalizedEditsFileName(201, 300), false ); tc.AddLog("/foo1/current/" + NNStorage.GetFinalizedEditsFileName(301, 400), false ); tc.AddLog("/foo1/current/" + NNStorage.GetInProgressEditsFileName(401), false); RunTest(tc); }
public virtual void TestPurgeEasyCase() { TestNNStorageRetentionManager.TestCaseDescription tc = new TestNNStorageRetentionManager.TestCaseDescription (this); tc.AddRoot("/foo1", NNStorage.NameNodeDirType.ImageAndEdits); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(100), true); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(200), true); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(300), false); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(400), false); tc.AddLog("/foo1/current/" + NNStorage.GetFinalizedEditsFileName(101, 200), true); tc.AddLog("/foo1/current/" + NNStorage.GetFinalizedEditsFileName(201, 300), true); tc.AddLog("/foo1/current/" + NNStorage.GetFinalizedEditsFileName(301, 400), false ); tc.AddLog("/foo1/current/" + NNStorage.GetInProgressEditsFileName(401), false); // Test that other files don't get purged tc.AddLog("/foo1/current/VERSION", false); RunTest(tc); }
public virtual void TestSelectInputStreamsNotOnBoundary() { int txIdsPerSegment = 10; for (int txid = 1; txid <= 5 * txIdsPerSegment; txid += txIdsPerSegment) { QJMTestUtil.WriteSegment(cluster, qjm, txid, txIdsPerSegment, true); } FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 10), NNStorage.GetFinalizedEditsFileName(11, 20), NNStorage.GetFinalizedEditsFileName (21, 30), NNStorage.GetFinalizedEditsFileName(31, 40), NNStorage.GetFinalizedEditsFileName (41, 50)); AList <EditLogInputStream> streams = new AList <EditLogInputStream>(); qjm.SelectInputStreams(streams, 25, false); QJMTestUtil.VerifyEdits(streams, 25, 50); }
public virtual void TestRetainExtraLogs() { conf.SetLong(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 50); TestNNStorageRetentionManager.TestCaseDescription tc = new TestNNStorageRetentionManager.TestCaseDescription (this); tc.AddRoot("/foo1", NNStorage.NameNodeDirType.Image); tc.AddRoot("/foo2", NNStorage.NameNodeDirType.Edits); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(100), true); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(200), true); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(300), false); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(400), false); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(101, 200), true); // Since we need 50 extra edits, *do* retain the 201-300 segment tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(201, 300), false ); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(301, 400), false ); tc.AddLog("/foo2/current/" + NNStorage.GetInProgressEditsFileName(401), false); RunTest(tc); }
public virtual void TestRetainExtraLogsLimitedSegments() { conf.SetLong(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 150); conf.SetLong(DFSConfigKeys.DfsNamenodeMaxExtraEditsSegmentsRetainedKey, 2); TestNNStorageRetentionManager.TestCaseDescription tc = new TestNNStorageRetentionManager.TestCaseDescription (this); tc.AddRoot("/foo1", NNStorage.NameNodeDirType.Image); tc.AddRoot("/foo2", NNStorage.NameNodeDirType.Edits); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(100), true); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(200), true); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(300), false); tc.AddImage("/foo1/current/" + NNStorage.GetImageFileName(400), false); // Segments containing txns upto txId 250 are extra and should be purged. tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(1, 100), true); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(101, 175), true); tc.AddLog("/foo2/current/" + NNStorage.GetInProgressEditsFileName(176) + ".empty" , true); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(176, 200), true); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(201, 225), true); tc.AddLog("/foo2/current/" + NNStorage.GetInProgressEditsFileName(226) + ".corrupt" , true); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(226, 240), true); // Only retain 2 extra segments. The 301-350 and 351-400 segments are // considered required, not extra. tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(241, 275), false ); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(276, 300), false ); tc.AddLog("/foo2/current/" + NNStorage.GetInProgressEditsFileName(301) + ".empty" , false); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(301, 350), false ); tc.AddLog("/foo2/current/" + NNStorage.GetInProgressEditsFileName(351) + ".corrupt" , false); tc.AddLog("/foo2/current/" + NNStorage.GetFinalizedEditsFileName(351, 400), false ); tc.AddLog("/foo2/current/" + NNStorage.GetInProgressEditsFileName(401), false); RunTest(tc); }
private void CheckNNStorage(NNStorage storage, long imageTxId, long trashEndTxId) { IList <FilePath> finalizedEdits = storage.GetFiles(NNStorage.NameNodeDirType.Edits , NNStorage.GetFinalizedEditsFileName(1, imageTxId)); NUnit.Framework.Assert.IsTrue(FileExists(finalizedEdits)); IList <FilePath> inprogressEdits = storage.GetFiles(NNStorage.NameNodeDirType.Edits , NNStorage.GetInProgressEditsFileName(imageTxId + 1)); // For rollback case we will have an inprogress file for future transactions NUnit.Framework.Assert.IsTrue(FileExists(inprogressEdits)); if (trashEndTxId > 0) { IList <FilePath> trashedEdits = storage.GetFiles(NNStorage.NameNodeDirType.Edits, NNStorage.GetFinalizedEditsFileName(imageTxId + 1, trashEndTxId) + ".trash"); NUnit.Framework.Assert.IsTrue(FileExists(trashedEdits)); } string imageFileName = trashEndTxId > 0 ? NNStorage.GetImageFileName(imageTxId) : NNStorage.GetRollbackImageFileName(imageTxId); IList <FilePath> imageFiles = storage.GetFiles(NNStorage.NameNodeDirType.Image, imageFileName ); NUnit.Framework.Assert.IsTrue(FileExists(imageFiles)); }
/// <summary> /// Set up the following tricky edge case state which is used by /// multiple tests: /// Initial writer: /// - Writing to 3 JNs: JN0, JN1, JN2: /// - A log segment with txnid 1 through 100 succeeds. /// </summary> /// <remarks> /// Set up the following tricky edge case state which is used by /// multiple tests: /// Initial writer: /// - Writing to 3 JNs: JN0, JN1, JN2: /// - A log segment with txnid 1 through 100 succeeds. /// - The first transaction in the next segment only goes to JN0 /// before the writer crashes (eg it is partitioned) /// Recovery by another writer: /// - The new NN starts recovery and talks to all three. Thus, it sees /// that the newest log segment which needs recovery is 101. /// - It sends the prepareRecovery(101) call, and decides that the /// recovery length for 101 is only the 1 transaction. /// - It sends acceptRecovery(101-101) to only JN0, before crashing /// This yields the following state: /// - JN0: 1-100 finalized, 101_inprogress, accepted recovery: 101-101 /// - JN1: 1-100 finalized, 101_inprogress.empty /// - JN2: 1-100 finalized, 101_inprogress.empty /// (the .empty files got moved aside during recovery) /// </remarks> /// <exception cref="System.Exception"></exception> private void SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery() { // Log segment with txns 1-100 succeeds QJMTestUtil.WriteSegment(cluster, qjm, 1, 100, true); // startLogSegment only makes it to one of the three nodes FailLoggerAtTxn(spies[1], 101); FailLoggerAtTxn(spies[2], 101); try { QJMTestUtil.WriteSegment(cluster, qjm, 101, 1, true); NUnit.Framework.Assert.Fail("Should have failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } finally { qjm.Close(); } // Recovery 1: // make acceptRecovery() only make it to the node which has txid 101 // this should fail because only 1/3 accepted the recovery qjm = CreateSpyingQJM(); spies = qjm.GetLoggerSetForTests().GetLoggersForTests(); TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies [1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies [2]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); try { qjm.RecoverUnfinalizedSegments(); NUnit.Framework.Assert.Fail("Should have failed to recover"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } finally { qjm.Close(); } // Check that we have entered the expected state as described in the // method javadoc. GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101) + ".empty"); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101) + ".empty"); FilePath paxos0 = new FilePath(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "paxos" ); FilePath paxos1 = new FilePath(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "paxos" ); FilePath paxos2 = new FilePath(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "paxos" ); GenericTestUtils.AssertGlobEquals(paxos0, ".*", "101"); GenericTestUtils.AssertGlobEquals(paxos1, ".*"); GenericTestUtils.AssertGlobEquals(paxos2, ".*"); }
/// <summary> /// Test the case where, at the beginning of a segment, transactions /// have been written to one JN but not others. /// </summary> /// <exception cref="System.Exception"/> public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn) { int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3; int nodeMissingSegment = (nodeWithOneTxn + 2) % 3; QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0); // Open segment on 2/3 nodes EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Write transactions to only 1/3 nodes FailLoggerAtTxn(spies[nodeWithEmptySegment], 4); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } } finally { stm.Abort(); } // Bring back the down JN. cluster.RestartJournalNode(nodeMissingSegment); // Make a new QJM. At this point, the state is as follows: // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty) // B: nodeWithOneTxn: 1-3 finalized, 4_inprogress (1 txn) // C: nodeMissingSegment: 1-3 finalized GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3)); // Stop one of the nodes. Since we run this test three // times, rotating the roles of the nodes, we'll test // all the permutations. cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); qjm.RecoverUnfinalizedSegments(); if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1) { // If the node that had the transaction committed was one of the nodes // that responded during recovery, then we should have recovered txid // 4. CheckRecovery(cluster, 4, 4); QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true); } else { // Otherwise, we should have recovered only 1-3 and should be able to // start a segment at 4. CheckRecovery(cluster, 1, 3); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); } }
public virtual void TestSaveRightBeforeSync() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem namesystem = FSNamesystem.LoadFromDisk(conf); try { FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = Org.Mockito.Mockito.Spy(fsimage.GetEditLog()); DFSTestUtil.SetEditLogForTesting(namesystem, editLog); AtomicReference <Exception> deferredException = new AtomicReference <Exception>(); CountDownLatch waitToEnterSync = new CountDownLatch(1); Sharpen.Thread doAnEditThread = new _Thread_467(namesystem, deferredException, waitToEnterSync ); Answer <Void> blockingSync = new _Answer_484(doAnEditThread, waitToEnterSync); Org.Mockito.Mockito.DoAnswer(blockingSync).When(editLog).LogSync(); doAnEditThread.Start(); Log.Info("Main thread: waiting to just before logSync..."); waitToEnterSync.Await(); NUnit.Framework.Assert.IsNull(deferredException.Get()); Log.Info("Main thread: detected that logSync about to be called."); Log.Info("Trying to enter safe mode."); Log.Info("This should block for " + BlockTime + "sec, since we have pending edits" ); long st = Time.Now(); namesystem.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); long et = Time.Now(); Log.Info("Entered safe mode"); // Make sure we really waited for the flush to complete! NUnit.Framework.Assert.IsTrue(et - st > (BlockTime - 1) * 1000); // Once we're in safe mode, save namespace. namesystem.SaveNamespace(); Log.Info("Joining on edit thread..."); doAnEditThread.Join(); NUnit.Framework.Assert.IsNull(deferredException.Get()); // We did 3 edits: begin, txn, and end NUnit.Framework.Assert.AreEqual(3, VerifyEditLogs(namesystem, fsimage, NNStorage. GetFinalizedEditsFileName(1, 3), 1)); // after the save, just the one "begin" NUnit.Framework.Assert.AreEqual(1, VerifyEditLogs(namesystem, fsimage, NNStorage. GetInProgressEditsFileName(4), 4)); } finally { Log.Info("Closing nn"); if (namesystem != null) { namesystem.Close(); } } }
public virtual void TestSaveImageWhileSyncInProgress() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem namesystem = FSNamesystem.LoadFromDisk(conf); try { FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = fsimage.GetEditLog(); JournalSet.JournalAndStream jas = editLog.GetJournals()[0]; EditLogFileOutputStream spyElos = Org.Mockito.Mockito.Spy((EditLogFileOutputStream )jas.GetCurrentStream()); jas.SetCurrentStreamForTests(spyElos); AtomicReference <Exception> deferredException = new AtomicReference <Exception>(); CountDownLatch waitToEnterFlush = new CountDownLatch(1); Sharpen.Thread doAnEditThread = new _Thread_371(namesystem, deferredException, waitToEnterFlush ); Answer <Void> blockingFlush = new _Answer_388(doAnEditThread, waitToEnterFlush); // Signal to main thread that the edit thread is in the racy section Org.Mockito.Mockito.DoAnswer(blockingFlush).When(spyElos).Flush(); doAnEditThread.Start(); // Wait for the edit thread to get to the logsync unsynchronized section Log.Info("Main thread: waiting to enter flush..."); waitToEnterFlush.Await(); NUnit.Framework.Assert.IsNull(deferredException.Get()); Log.Info("Main thread: detected that logSync is in unsynchronized section."); Log.Info("Trying to enter safe mode."); Log.Info("This should block for " + BlockTime + "sec, since flush will sleep that long" ); long st = Time.Now(); namesystem.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); long et = Time.Now(); Log.Info("Entered safe mode"); // Make sure we really waited for the flush to complete! NUnit.Framework.Assert.IsTrue(et - st > (BlockTime - 1) * 1000); // Once we're in safe mode, save namespace. namesystem.SaveNamespace(); Log.Info("Joining on edit thread..."); doAnEditThread.Join(); NUnit.Framework.Assert.IsNull(deferredException.Get()); // We did 3 edits: begin, txn, and end NUnit.Framework.Assert.AreEqual(3, VerifyEditLogs(namesystem, fsimage, NNStorage. GetFinalizedEditsFileName(1, 3), 1)); // after the save, just the one "begin" NUnit.Framework.Assert.AreEqual(1, VerifyEditLogs(namesystem, fsimage, NNStorage. GetInProgressEditsFileName(4), 4)); } finally { Log.Info("Closing nn"); if (namesystem != null) { namesystem.Close(); } } }
public virtual void TestSaveNamespace() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; AtomicReference <Exception> caughtErr = new AtomicReference <Exception>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); NamenodeProtocols nn = cluster.GetNameNodeRpc(); FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = fsimage.GetEditLog(); StartTransactionWorkers(nn, caughtErr); for (int i = 0; i < NumSaveImage && caughtErr.Get() == null; i++) { try { Sharpen.Thread.Sleep(20); } catch (Exception) { } Log.Info("Save " + i + ": entering safe mode"); namesystem.EnterSafeMode(false); // Verify edit logs before the save // They should start with the first edit after the checkpoint long logStartTxId = fsimage.GetStorage().GetMostRecentCheckpointTxId() + 1; VerifyEditLogs(namesystem, fsimage, NNStorage.GetInProgressEditsFileName(logStartTxId ), logStartTxId); Log.Info("Save " + i + ": saving namespace"); namesystem.SaveNamespace(); Log.Info("Save " + i + ": leaving safemode"); long savedImageTxId = fsimage.GetStorage().GetMostRecentCheckpointTxId(); // Verify that edit logs post save got finalized and aren't corrupt VerifyEditLogs(namesystem, fsimage, NNStorage.GetFinalizedEditsFileName(logStartTxId , savedImageTxId), logStartTxId); // The checkpoint id should be 1 less than the last written ID, since // the log roll writes the "BEGIN" transaction to the new log. NUnit.Framework.Assert.AreEqual(fsimage.GetStorage().GetMostRecentCheckpointTxId( ), editLog.GetLastWrittenTxId() - 1); namesystem.LeaveSafeMode(); Log.Info("Save " + i + ": complete"); } } finally { StopTransactionWorkers(); if (caughtErr.Get() != null) { throw new RuntimeException(caughtErr.Get()); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestGetRemoteEditLog() { Storage.StorageDirectory sd = FSImageTestUtil.MockStorageDirectory(NNStorage.NameNodeDirType .Edits, false, NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetFinalizedEditsFileName (101, 200), NNStorage.GetInProgressEditsFileName(201), NNStorage.GetFinalizedEditsFileName (1001, 1100)); // passing null for NNStorage because this unit test will not use it FileJournalManager fjm = new FileJournalManager(conf, sd, null); NUnit.Framework.Assert.AreEqual("[1,100],[101,200],[1001,1100]", GetLogsAsString( fjm, 1)); NUnit.Framework.Assert.AreEqual("[101,200],[1001,1100]", GetLogsAsString(fjm, 101 )); NUnit.Framework.Assert.AreEqual("[101,200],[1001,1100]", GetLogsAsString(fjm, 150 )); NUnit.Framework.Assert.AreEqual("[1001,1100]", GetLogsAsString(fjm, 201)); NUnit.Framework.Assert.AreEqual("Asking for a newer log than exists should return empty list" , string.Empty, GetLogsAsString(fjm, 9999)); }
public virtual void TestPurgingWithNameEditsDirAfterFailure() { MiniDFSCluster cluster = null; Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 0); FilePath sd0 = new FilePath(TestRootDir, "nn0"); FilePath sd1 = new FilePath(TestRootDir, "nn1"); FilePath cd0 = new FilePath(sd0, "current"); FilePath cd1 = new FilePath(sd1, "current"); conf.Set(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(sd0, sd1)); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).ManageNameDfsDirs(false ).Format(true).Build(); NameNode nn = cluster.GetNameNode(); DoSaveNamespace(nn); Log.Info("After first save, images 0 and 2 should exist in both dirs"); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(2)); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(2)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 2), NNStorage.GetInProgressEditsFileName(3)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 2), NNStorage.GetInProgressEditsFileName(3)); DoSaveNamespace(nn); Log.Info("After second save, image 0 should be purged, " + "and image 4 should exist in both." ); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 4), NNStorage.GetInProgressEditsFileName(5)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 4), NNStorage.GetInProgressEditsFileName(5)); Log.Info("Failing first storage dir by chmodding it"); NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(cd0.GetAbsolutePath(), "000")); DoSaveNamespace(nn); Log.Info("Restoring accessibility of first storage dir"); NUnit.Framework.Assert.AreEqual(0, FileUtil.Chmod(cd0.GetAbsolutePath(), "755")); Log.Info("nothing should have been purged in first storage dir"); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 4), NNStorage.GetInProgressEditsFileName(5)); Log.Info("fsimage_2 should be purged in second storage dir"); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (4), NNStorage.GetImageFileName(6)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (5, 6), NNStorage.GetInProgressEditsFileName(7)); Log.Info("On next save, we should purge logs from the failed dir," + " but not images, since the image directory is in failed state." ); DoSaveNamespace(nn); GenericTestUtils.AssertGlobEquals(cd1, "fsimage_\\d*", NNStorage.GetImageFileName (6), NNStorage.GetImageFileName(8)); GenericTestUtils.AssertGlobEquals(cd1, "edits_.*", NNStorage.GetFinalizedEditsFileName (7, 8), NNStorage.GetInProgressEditsFileName(9)); GenericTestUtils.AssertGlobEquals(cd0, "fsimage_\\d*", NNStorage.GetImageFileName (2), NNStorage.GetImageFileName(4)); GenericTestUtils.AssertGlobEquals(cd0, "edits_.*", NNStorage.GetInProgressEditsFileName (9)); } finally { FileUtil.Chmod(cd0.GetAbsolutePath(), "755"); Log.Info("Shutting down..."); if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestStorageRestore() { int numDatanodes = 0; cluster = new MiniDFSCluster.Builder(config).NumDataNodes(numDatanodes).ManageNameDfsDirs (false).Build(); cluster.WaitActive(); SecondaryNameNode secondary = new SecondaryNameNode(config); System.Console.Out.WriteLine("****testStorageRestore: Cluster and SNN started"); PrintStorages(cluster.GetNameNode().GetFSImage()); FileSystem fs = cluster.GetFileSystem(); Path path = new Path("/", "test"); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(path)); System.Console.Out.WriteLine("****testStorageRestore: dir 'test' created, invalidating storage..." ); InvalidateStorage(cluster.GetNameNode().GetFSImage(), ImmutableSet.Of(path2, path3 )); PrintStorages(cluster.GetNameNode().GetFSImage()); System.Console.Out.WriteLine("****testStorageRestore: storage invalidated"); path = new Path("/", "test1"); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(path)); System.Console.Out.WriteLine("****testStorageRestore: dir 'test1' created"); // We did another edit, so the still-active directory at 'path1' // should now differ from the others FSImageTestUtil.AssertFileContentsDifferent(2, new FilePath(path1, "current/" + NNStorage.GetInProgressEditsFileName (1)), new FilePath(path2, "current/" + NNStorage.GetInProgressEditsFileName(1)), new FilePath(path3, "current/" + NNStorage.GetInProgressEditsFileName(1))); FSImageTestUtil.AssertFileContentsSame(new FilePath(path2, "current/" + NNStorage.GetInProgressEditsFileName (1)), new FilePath(path3, "current/" + NNStorage.GetInProgressEditsFileName(1))); System.Console.Out.WriteLine("****testStorageRestore: checkfiles(false) run"); secondary.DoCheckpoint(); ///should enable storage.. // We should have a checkpoint through txid 4 in the two image dirs // (txid=4 for BEGIN, mkdir, mkdir, END) FSImageTestUtil.AssertFileContentsSame(new FilePath(path1, "current/" + NNStorage.GetImageFileName (4)), new FilePath(path2, "current/" + NNStorage.GetImageFileName(4))); NUnit.Framework.Assert.IsFalse("Should not have any image in an edits-only directory" , new FilePath(path3, "current/" + NNStorage.GetImageFileName(4)).Exists()); // Should have finalized logs in the directory that didn't fail NUnit.Framework.Assert.IsTrue("Should have finalized logs in the directory that didn't fail" , new FilePath(path1, "current/" + NNStorage.GetFinalizedEditsFileName(1, 4)).Exists ()); // Should not have finalized logs in the failed directories NUnit.Framework.Assert.IsFalse("Should not have finalized logs in the failed directories" , new FilePath(path2, "current/" + NNStorage.GetFinalizedEditsFileName(1, 4)).Exists ()); NUnit.Framework.Assert.IsFalse("Should not have finalized logs in the failed directories" , new FilePath(path3, "current/" + NNStorage.GetFinalizedEditsFileName(1, 4)).Exists ()); // The new log segment should be in all of the directories. FSImageTestUtil.AssertFileContentsSame(new FilePath(path1, "current/" + NNStorage.GetInProgressEditsFileName (5)), new FilePath(path2, "current/" + NNStorage.GetInProgressEditsFileName(5)), new FilePath(path3, "current/" + NNStorage.GetInProgressEditsFileName(5))); string md5BeforeEdit = FSImageTestUtil.GetFileMD5(new FilePath(path1, "current/" + NNStorage.GetInProgressEditsFileName(5))); // The original image should still be the previously failed image // directory after it got restored, since it's still useful for // a recovery! FSImageTestUtil.AssertFileContentsSame(new FilePath(path1, "current/" + NNStorage.GetImageFileName (0)), new FilePath(path2, "current/" + NNStorage.GetImageFileName(0))); // Do another edit to verify that all the logs are active. path = new Path("/", "test2"); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(path)); // Logs should be changed by the edit. string md5AfterEdit = FSImageTestUtil.GetFileMD5(new FilePath(path1, "current/" + NNStorage.GetInProgressEditsFileName(5))); NUnit.Framework.Assert.IsFalse(md5BeforeEdit.Equals(md5AfterEdit)); // And all logs should be changed. FSImageTestUtil.AssertFileContentsSame(new FilePath(path1, "current/" + NNStorage.GetInProgressEditsFileName (5)), new FilePath(path2, "current/" + NNStorage.GetInProgressEditsFileName(5)), new FilePath(path3, "current/" + NNStorage.GetInProgressEditsFileName(5))); secondary.Shutdown(); cluster.Shutdown(); // All logs should be finalized by clean shutdown FSImageTestUtil.AssertFileContentsSame(new FilePath(path1, "current/" + NNStorage.GetFinalizedEditsFileName (5, 7)), new FilePath(path2, "current/" + NNStorage.GetFinalizedEditsFileName(5, 7)), new FilePath(path3, "current/" + NNStorage.GetFinalizedEditsFileName(5, 7)) ); }
public virtual void TestCurrentStorageInspector() { FSImageTransactionalStorageInspector inspector = new FSImageTransactionalStorageInspector (); Storage.StorageDirectory mockDir = FSImageTestUtil.MockStorageDirectory(NNStorage.NameNodeDirType .ImageAndEdits, false, "/foo/current/" + NNStorage.GetImageFileName(123), "/foo/current/" + NNStorage.GetFinalizedEditsFileName(123, 456), "/foo/current/" + NNStorage.GetImageFileName (456), "/foo/current/" + NNStorage.GetInProgressEditsFileName(457)); inspector.InspectDirectory(mockDir); NUnit.Framework.Assert.AreEqual(2, inspector.foundImages.Count); FSImageStorageInspector.FSImageFile latestImage = inspector.GetLatestImages()[0]; NUnit.Framework.Assert.AreEqual(456, latestImage.txId); NUnit.Framework.Assert.AreSame(mockDir, latestImage.sd); NUnit.Framework.Assert.IsTrue(inspector.IsUpgradeFinalized()); NUnit.Framework.Assert.AreEqual(new FilePath("/foo/current/" + NNStorage.GetImageFileName (456)), latestImage.GetFile()); }