/// <summary>invalidate storage by removing the second and third storage directories</summary> /// <exception cref="System.IO.IOException"/> public virtual void InvalidateStorage(FSImage fi, ICollection <FilePath> filesToInvalidate ) { AList <Storage.StorageDirectory> al = new AList <Storage.StorageDirectory>(2); IEnumerator <Storage.StorageDirectory> it = fi.GetStorage().DirIterator(); while (it.HasNext()) { Storage.StorageDirectory sd = it.Next(); if (filesToInvalidate.Contains(sd.GetRoot())) { Log.Info("causing IO error on " + sd.GetRoot()); al.AddItem(sd); } } // simulate an error fi.GetStorage().ReportErrorsOnDirectories(al); foreach (JournalSet.JournalAndStream j in fi.GetEditLog().GetJournals()) { if (j.GetManager() is FileJournalManager) { FileJournalManager fm = (FileJournalManager)j.GetManager(); if (fm.GetStorageDirectory().GetRoot().Equals(path2) || fm.GetStorageDirectory(). GetRoot().Equals(path3)) { EditLogOutputStream mockStream = Org.Mockito.Mockito.Spy(j.GetCurrentStream()); j.SetCurrentStreamForTests(mockStream); Org.Mockito.Mockito.DoThrow(new IOException("Injected fault: write")).When(mockStream ).Write(Org.Mockito.Mockito.AnyObject <FSEditLogOp>()); } } } }
internal CheckpointSignature(FSImage fsImage) : base(fsImage.GetStorage()) { blockpoolID = fsImage.GetBlockPoolID(); mostRecentCheckpointTxId = fsImage.GetStorage().GetMostRecentCheckpointTxId(); curSegmentTxId = fsImage.GetEditLog().GetCurSegmentTxId(); }
/// <summary>verify that edits log and fsimage are in different directories and of a correct size /// </summary> private void VerifyDifferentDirs(FSImage img, long expectedImgSize, long expectedEditsSize ) { Storage.StorageDirectory sd = null; for (IEnumerator <Storage.StorageDirectory> it = img.GetStorage().DirIterator(); it .HasNext();) { sd = it.Next(); if (sd.GetStorageDirType().IsOfType(NNStorage.NameNodeDirType.Image)) { img.GetStorage(); FilePath imf = NNStorage.GetStorageFile(sd, NNStorage.NameNodeFile.Image, 0); Log.Info("--image file " + imf.GetAbsolutePath() + "; len = " + imf.Length() + "; expected = " + expectedImgSize); NUnit.Framework.Assert.AreEqual(expectedImgSize, imf.Length()); } else { if (sd.GetStorageDirType().IsOfType(NNStorage.NameNodeDirType.Edits)) { img.GetStorage(); FilePath edf = NNStorage.GetStorageFile(sd, NNStorage.NameNodeFile.Edits, 0); Log.Info("-- edits file " + edf.GetAbsolutePath() + "; len = " + edf.Length() + "; expected = " + expectedEditsSize); NUnit.Framework.Assert.AreEqual(expectedEditsSize, edf.Length()); } else { NUnit.Framework.Assert.Fail("Image/Edits directories are not different"); } } } }
/// <exception cref="System.IO.IOException"/> private int DownloadImage(NNStorage storage, NamenodeProtocol proxy) { // Load the newly formatted image, using all of the directories // (including shared edits) long imageTxId = proxy.GetMostRecentCheckpointTxId(); long curTxId = proxy.GetTransactionID(); FSImage image = new FSImage(conf); try { image.GetStorage().SetStorageInfo(storage); image.InitEditLog(HdfsServerConstants.StartupOption.Regular); System.Diagnostics.Debug.Assert(image.GetEditLog().IsOpenForRead(), "Expected edit log to be open for read" ); // Ensure that we have enough edits already in the shared directory to // start up from the last checkpoint on the active. if (!skipSharedEditsCheck && !CheckLogsAvailableForRead(image, imageTxId, curTxId )) { return(ErrCodeLogsUnavailable); } image.GetStorage().WriteTransactionIdFileToStorage(curTxId); // Download that checkpoint into our storage directories. MD5Hash hash = TransferFsImage.DownloadImageToStorage(otherHttpAddr, imageTxId, storage , true); image.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, imageTxId, hash); } catch (IOException ioe) { image.Close(); throw; } return(0); }
/// <exception cref="System.IO.IOException"/> internal virtual void ValidateStorageInfo(FSImage si) { if (!IsSameCluster(si) || !StorageVersionMatches(si.GetStorage())) { throw new IOException("Inconsistent checkpoint fields.\n" + "LV = " + layoutVersion + " namespaceID = " + namespaceID + " cTime = " + cTime + " ; clusterId = " + clusterID + " ; blockpoolId = " + blockpoolID + ".\nExpecting respectively: " + si.GetStorage ().layoutVersion + "; " + si.GetStorage().namespaceID + "; " + si.GetStorage().cTime + "; " + si.GetClusterID() + "; " + si.GetBlockPoolID() + "."); } }
////////////////////////////////////////////////////// internal virtual bool ShouldCheckpointAtStartup() { FSImage fsImage = GetFSImage(); if (IsRole(HdfsServerConstants.NamenodeRole.Checkpoint)) { System.Diagnostics.Debug.Assert(fsImage.GetStorage().GetNumStorageDirs() > 0); return(!fsImage.GetStorage().GetStorageDir(0).GetVersionFile().Exists()); } // BN always checkpoints on startup in order to get in sync with namespace return(true); }
private long CountUncheckpointedTxns() { FSImage img = namesystem.GetFSImage(); return(img.GetLastAppliedOrWrittenTxId() - img.GetStorage().GetMostRecentCheckpointTxId ()); }
/// <exception cref="System.IO.IOException"/> internal static void DoMerge(CheckpointSignature sig, RemoteEditLogManifest manifest , bool loadImage, FSImage dstImage, FSNamesystem dstNamesystem) { NNStorage dstStorage = dstImage.GetStorage(); dstStorage.SetStorageInfo(sig); if (loadImage) { FilePath file = dstStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); if (file == null) { throw new IOException("Couldn't find image file at txid " + sig.mostRecentCheckpointTxId + " even though it should have " + "just been downloaded"); } dstNamesystem.WriteLock(); try { dstImage.ReloadFromImageFile(file, dstNamesystem); } finally { dstNamesystem.WriteUnlock(); } dstNamesystem.ImageLoadComplete(); } // error simulation code for junit test CheckpointFaultInjector.GetInstance().DuringMerge(); Checkpointer.RollForwardByApplyingLogs(manifest, dstImage, dstNamesystem); // The following has the side effect of purging old fsimages/edit logs. dstImage.SaveFSImageInAllDirs(dstNamesystem, dstImage.GetLastAppliedTxId()); dstStorage.WriteAll(); }
/// <exception cref="System.IO.IOException"/> private void ValidateRequest(ServletContext context, Configuration conf, HttpServletRequest request, HttpServletResponse response, FSImage nnImage, string theirStorageInfoString ) { if (UserGroupInformation.IsSecurityEnabled() && !IsValidRequestor(context, request .GetUserPrincipal().GetName(), conf)) { string errorMsg = "Only Namenode, Secondary Namenode, and administrators may access " + "this servlet"; response.SendError(HttpServletResponse.ScForbidden, errorMsg); Log.Warn("Received non-NN/SNN/administrator request for image or edits from " + request .GetUserPrincipal().GetName() + " at " + request.GetRemoteHost()); throw new IOException(errorMsg); } string myStorageInfoString = nnImage.GetStorage().ToColonSeparatedString(); if (theirStorageInfoString != null && !myStorageInfoString.Equals(theirStorageInfoString )) { string errorMsg = "This namenode has storage info " + myStorageInfoString + " but the secondary expected " + theirStorageInfoString; response.SendError(HttpServletResponse.ScForbidden, errorMsg); Log.Warn("Received an invalid request file transfer request " + "from a secondary with storage info " + theirStorageInfoString); throw new IOException(errorMsg); } }
public virtual void TestEditLogRolling() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; AtomicReference <Exception> caughtErr = new AtomicReference <Exception>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); NamenodeProtocols nn = cluster.GetNameNode().GetRpcServer(); FSImage fsimage = cluster.GetNamesystem().GetFSImage(); Storage.StorageDirectory sd = fsimage.GetStorage().GetStorageDir(0); StartTransactionWorkers(nn, caughtErr); long previousLogTxId = 1; for (int i = 0; i < NumRolls && caughtErr.Get() == null; i++) { try { Sharpen.Thread.Sleep(20); } catch (Exception) { } Log.Info("Starting roll " + i + "."); CheckpointSignature sig = nn.RollEditLog(); long nextLog = sig.curSegmentTxId; string logFileName = NNStorage.GetFinalizedEditsFileName(previousLogTxId, nextLog - 1); previousLogTxId += VerifyEditLogs(cluster.GetNamesystem(), fsimage, logFileName, previousLogTxId); NUnit.Framework.Assert.AreEqual(previousLogTxId, nextLog); FilePath expectedLog = NNStorage.GetInProgressEditsFile(sd, previousLogTxId); NUnit.Framework.Assert.IsTrue("Expect " + expectedLog + " to exist", expectedLog. Exists()); } } finally { StopTransactionWorkers(); if (caughtErr.Get() != null) { throw new RuntimeException(caughtErr.Get()); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestDfsAdminCmd() { cluster = new MiniDFSCluster.Builder(config).NumDataNodes(2).ManageNameDfsDirs(false ).Build(); cluster.WaitActive(); try { FSImage fsi = cluster.GetNameNode().GetFSImage(); // it is started with dfs.namenode.name.dir.restore set to true (in SetUp()) bool restore = fsi.GetStorage().GetRestoreFailedStorage(); Log.Info("Restore is " + restore); NUnit.Framework.Assert.AreEqual(restore, true); // now run DFSAdmnin command string cmd = "-fs NAMENODE -restoreFailedStorage false"; string namenode = config.Get(DFSConfigKeys.FsDefaultNameKey, "file:///"); CommandExecutor executor = new CLITestCmdDFS(cmd, new CLICommandDFSAdmin()).GetExecutor (namenode); executor.ExecuteCommand(cmd); restore = fsi.GetStorage().GetRestoreFailedStorage(); NUnit.Framework.Assert.IsFalse("After set true call restore is " + restore, restore ); // run one more time - to set it to true again cmd = "-fs NAMENODE -restoreFailedStorage true"; executor.ExecuteCommand(cmd); restore = fsi.GetStorage().GetRestoreFailedStorage(); NUnit.Framework.Assert.IsTrue("After set false call restore is " + restore, restore ); // run one more time - no change in value cmd = "-fs NAMENODE -restoreFailedStorage check"; CommandExecutor.Result cmdResult = executor.ExecuteCommand(cmd); restore = fsi.GetStorage().GetRestoreFailedStorage(); NUnit.Framework.Assert.IsTrue("After check call restore is " + restore, restore); string commandOutput = cmdResult.GetCommandOutput(); commandOutput.Trim(); NUnit.Framework.Assert.IsTrue(commandOutput.Contains("restoreFailedStorage is set to true" )); } finally { cluster.Shutdown(); } }
/// <summary>Get edits filename</summary> /// <returns>edits file name for cluster</returns> /// <exception cref="System.IO.IOException"/> private string GetEditsFilename(CheckpointSignature sig) { FSImage image = cluster.GetNameNode().GetFSImage(); // it was set up to only have ONE StorageDirectory IEnumerator <Storage.StorageDirectory> it = image.GetStorage().DirIterator(NNStorage.NameNodeDirType .Edits); Storage.StorageDirectory sd = it.Next(); FilePath ret = NNStorage.GetFinalizedEditsFile(sd, 1, sig.curSegmentTxId - 1); System.Diagnostics.Debug.Assert(ret.Exists(), "expected " + ret + " exists"); return(ret.GetAbsolutePath()); }
public virtual void TestDisplayRecentEditLogOpCodes() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).EnableManagedDfsDirsRedundancy (false).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); FSImage fsimage = namesystem.GetFSImage(); for (int i = 0; i < 20; i++) { fileSys.Mkdirs(new Path("/tmp/tmp" + i)); } Storage.StorageDirectory sd = fsimage.GetStorage().DirIterator(NNStorage.NameNodeDirType .Edits).Next(); cluster.Shutdown(); FilePath editFile = FSImageTestUtil.FindLatestEditsLog(sd).GetFile(); NUnit.Framework.Assert.IsTrue("Should exist: " + editFile, editFile.Exists()); // Corrupt the edits file. long fileLen = editFile.Length(); RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.Seek(fileLen - 40); for (int i_1 = 0; i_1 < 20; i_1++) { rwf.Write(FSEditLogOpCodes.OpDelete.GetOpCode()); } rwf.Close(); StringBuilder bld = new StringBuilder(); bld.Append("^Error replaying edit log at offset \\d+. "); bld.Append("Expected transaction ID was \\d+\n"); bld.Append("Recent opcode offsets: (\\d+\\s*){4}$"); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); NUnit.Framework.Assert.Fail("should not be able to start"); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("error message contains opcodes message", e.Message .Matches(bld.ToString())); } }
/// <exception cref="System.IO.IOException"/> private string GetClusterId(Configuration config) { // see if cluster id not empty. ICollection <URI> dirsToFormat = FSNamesystem.GetNamespaceDirs(config); IList <URI> editsToFormat = FSNamesystem.GetNamespaceEditsDirs(config); FSImage fsImage = new FSImage(config, dirsToFormat, editsToFormat); IEnumerator <Storage.StorageDirectory> sdit = fsImage.GetStorage().DirIterator(NNStorage.NameNodeDirType .Image); Storage.StorageDirectory sd = sdit.Next(); Properties props = Storage.ReadPropertiesFile(sd.GetVersionFile()); string cid = props.GetProperty("clusterID"); Log.Info("successfully formated : sd=" + sd.GetCurrentDir() + ";cid=" + cid); return(cid); }
/// <exception cref="System.IO.IOException"/> internal static void RollForwardByApplyingLogs(RemoteEditLogManifest manifest, FSImage dstImage, FSNamesystem dstNamesystem) { NNStorage dstStorage = dstImage.GetStorage(); IList <EditLogInputStream> editsStreams = Lists.NewArrayList(); foreach (RemoteEditLog log in manifest.GetLogs()) { if (log.GetEndTxId() > dstImage.GetLastAppliedTxId()) { FilePath f = dstStorage.FindFinalizedEditsFile(log.GetStartTxId(), log.GetEndTxId ()); editsStreams.AddItem(new EditLogFileInputStream(f, log.GetStartTxId(), log.GetEndTxId (), true)); } } Log.Info("Checkpointer about to load edits from " + editsStreams.Count + " stream(s)." ); dstImage.LoadEdits(editsStreams, dstNamesystem); }
/// <exception cref="System.IO.IOException"/> private long VerifyEditLogs(FSNamesystem namesystem, FSImage fsimage, string logFileName , long startTxId) { long numEdits = -1; // Verify that we can read in all the transactions that we have written. // If there were any corruptions, it is likely that the reading in // of these transactions will throw an exception. foreach (Storage.StorageDirectory sd in fsimage.GetStorage().DirIterable(NNStorage.NameNodeDirType .Edits)) { FilePath editFile = new FilePath(sd.GetCurrentDir(), logFileName); System.Console.Out.WriteLine("Verifying file: " + editFile); FSEditLogLoader loader = new FSEditLogLoader(namesystem, startTxId); long numEditsThisLog = loader.LoadFSEdits(new EditLogFileInputStream(editFile), startTxId ); System.Console.Out.WriteLine("Number of edits: " + numEditsThisLog); NUnit.Framework.Assert.IsTrue(numEdits == -1 || numEditsThisLog == numEdits); numEdits = numEditsThisLog; } NUnit.Framework.Assert.IsTrue(numEdits != -1); return(numEdits); }
public virtual void TestSaveNamespace() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; AtomicReference <Exception> caughtErr = new AtomicReference <Exception>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); NamenodeProtocols nn = cluster.GetNameNodeRpc(); FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = fsimage.GetEditLog(); StartTransactionWorkers(nn, caughtErr); for (int i = 0; i < NumSaveImage && caughtErr.Get() == null; i++) { try { Sharpen.Thread.Sleep(20); } catch (Exception) { } Log.Info("Save " + i + ": entering safe mode"); namesystem.EnterSafeMode(false); // Verify edit logs before the save // They should start with the first edit after the checkpoint long logStartTxId = fsimage.GetStorage().GetMostRecentCheckpointTxId() + 1; VerifyEditLogs(namesystem, fsimage, NNStorage.GetInProgressEditsFileName(logStartTxId ), logStartTxId); Log.Info("Save " + i + ": saving namespace"); namesystem.SaveNamespace(); Log.Info("Save " + i + ": leaving safemode"); long savedImageTxId = fsimage.GetStorage().GetMostRecentCheckpointTxId(); // Verify that edit logs post save got finalized and aren't corrupt VerifyEditLogs(namesystem, fsimage, NNStorage.GetFinalizedEditsFileName(logStartTxId , savedImageTxId), logStartTxId); // The checkpoint id should be 1 less than the last written ID, since // the log roll writes the "BEGIN" transaction to the new log. NUnit.Framework.Assert.AreEqual(fsimage.GetStorage().GetMostRecentCheckpointTxId( ), editLog.GetLastWrittenTxId() - 1); namesystem.LeaveSafeMode(); Log.Info("Save " + i + ": complete"); } } finally { StopTransactionWorkers(); if (caughtErr.Get() != null) { throw new RuntimeException(caughtErr.Get()); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
/// <exception cref="System.Exception"/> public virtual void TestCancelSaveNamespace() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage image = fsn.GetFSImage(); NNStorage storage = image.GetStorage(); storage.Close(); // unlock any directories that FSNamesystem's initialization may have locked storage.SetStorageDirectories(FSNamesystem.GetNamespaceDirs(conf), FSNamesystem.GetNamespaceEditsDirs (conf)); FSNamesystem spyFsn = Org.Mockito.Mockito.Spy(fsn); FSNamesystem finalFsn = spyFsn; GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); BlockIdManager bid = Org.Mockito.Mockito.Spy(spyFsn.GetBlockIdManager()); Whitebox.SetInternalState(finalFsn, "blockIdManager", bid); Org.Mockito.Mockito.DoAnswer(delayer).When(bid).GetGenerationStampV2(); ExecutorService pool = Executors.NewFixedThreadPool(2); try { DoAnEdit(fsn, 1); Canceler canceler = new Canceler(); // Save namespace fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { Future <Void> saverFuture = pool.Submit(new _Callable_561(image, finalFsn, canceler )); // Wait until saveNamespace calls getGenerationStamp delayer.WaitForCall(); // then cancel the saveNamespace Future <Void> cancelFuture = pool.Submit(new _Callable_572(canceler)); // give the cancel call time to run Sharpen.Thread.Sleep(500); // allow saveNamespace to proceed - it should check the cancel flag after // this point and throw an exception delayer.Proceed(); cancelFuture.Get(); saverFuture.Get(); NUnit.Framework.Assert.Fail("saveNamespace did not fail even though cancelled!"); } catch (Exception t) { GenericTestUtils.AssertExceptionContains("SaveNamespaceCancelledException", t); } Log.Info("Successfully cancelled a saveNamespace"); // Check that we have only the original image and not any // cruft left over from half-finished images FSImageTestUtil.LogStorageContents(Log, storage); foreach (Storage.StorageDirectory sd in storage.DirIterable(null)) { FilePath curDir = sd.GetCurrentDir(); GenericTestUtils.AssertGlobEquals(curDir, "fsimage_.*", NNStorage.GetImageFileName (0), NNStorage.GetImageFileName(0) + MD5FileUtils.Md5Suffix); } } finally { fsn.Close(); } }
internal virtual bool IsSameCluster(FSImage si) { return(namespaceID == si.GetStorage().namespaceID&& clusterID.Equals(si.GetClusterID ()) && blockpoolID.Equals(si.GetBlockPoolID())); }
/// <exception cref="System.Exception"/> private void SaveNamespaceWithInjectedFault(TestSaveNamespace.Fault fault) { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage originalImage = fsn.GetFSImage(); NNStorage storage = originalImage.GetStorage(); NNStorage spyStorage = Org.Mockito.Mockito.Spy(storage); originalImage.storage = spyStorage; FSImage spyImage = Org.Mockito.Mockito.Spy(originalImage); Whitebox.SetInternalState(fsn, "fsImage", spyImage); bool shouldFail = false; switch (fault) { case TestSaveNamespace.Fault.SaveSecondFsimageRte: { // should we expect the save operation to fail // inject fault // The spy throws a RuntimeException when writing to the second directory Org.Mockito.Mockito.DoAnswer(new TestSaveNamespace.FaultySaveImage(true)).When(spyImage ).SaveFSImage((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory )Matchers.AnyObject(), (NNStorage.NameNodeFile)Matchers.AnyObject()); shouldFail = false; break; } case TestSaveNamespace.Fault.SaveSecondFsimageIoe: { // The spy throws an IOException when writing to the second directory Org.Mockito.Mockito.DoAnswer(new TestSaveNamespace.FaultySaveImage(false)).When(spyImage ).SaveFSImage((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory )Matchers.AnyObject(), (NNStorage.NameNodeFile)Matchers.AnyObject()); shouldFail = false; break; } case TestSaveNamespace.Fault.SaveAllFsimages: { // The spy throws IOException in all directories Org.Mockito.Mockito.DoThrow(new RuntimeException("Injected")).When(spyImage).SaveFSImage ((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory)Matchers.AnyObject (), (NNStorage.NameNodeFile)Matchers.AnyObject()); shouldFail = true; break; } case TestSaveNamespace.Fault.WriteStorageAll: { // The spy throws an exception before writing any VERSION files Org.Mockito.Mockito.DoThrow(new RuntimeException("Injected")).When(spyStorage).WriteAll (); shouldFail = true; break; } case TestSaveNamespace.Fault.WriteStorageOne: { // The spy throws on exception on one particular storage directory Org.Mockito.Mockito.DoAnswer(new TestSaveNamespace.FaultySaveImage(true)).When(spyStorage ).WriteProperties((Storage.StorageDirectory)Matchers.AnyObject()); // TODO: unfortunately this fails -- should be improved. // See HDFS-2173. shouldFail = true; break; } } try { DoAnEdit(fsn, 1); // Save namespace - this may fail, depending on fault injected fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { fsn.SaveNamespace(); if (shouldFail) { NUnit.Framework.Assert.Fail("Did not fail!"); } } catch (Exception e) { if (!shouldFail) { throw; } else { Log.Info("Test caught expected exception", e); } } fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave); // Should still be able to perform edits DoAnEdit(fsn, 2); // Now shut down and restart the namesystem originalImage.Close(); fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edits. CheckEditExists(fsn, 1); CheckEditExists(fsn, 2); } finally { if (fsn != null) { fsn.Close(); } } }
/// <exception cref="System.IO.IOException"/> internal static void TestNameNodeRecoveryImpl(TestNameNodeRecovery.Corruptor corruptor , bool finalize) { string TestPath = "/test/path/dir"; string TestPath2 = "/second/dir"; bool needRecovery = corruptor.NeedRecovery(finalize); // start a cluster Configuration conf = new HdfsConfiguration(); SetupRecoveryTestConf(conf); MiniDFSCluster cluster = null; FileSystem fileSys = null; Storage.StorageDirectory sd = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).ManageNameDfsDirs(false ).Build(); cluster.WaitActive(); if (!finalize) { // Normally, the in-progress edit log would be finalized by // FSEditLog#endCurrentLogSegment. For testing purposes, we // disable that here. FSEditLog spyLog = Org.Mockito.Mockito.Spy(cluster.GetNameNode().GetFSImage().GetEditLog ()); Org.Mockito.Mockito.DoNothing().When(spyLog).EndCurrentLogSegment(true); DFSTestUtil.SetEditLogForTesting(cluster.GetNamesystem(), spyLog); } fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); FSImage fsimage = namesystem.GetFSImage(); fileSys.Mkdirs(new Path(TestPath)); fileSys.Mkdirs(new Path(TestPath2)); sd = fsimage.GetStorage().DirIterator(NNStorage.NameNodeDirType.Edits).Next(); } finally { if (cluster != null) { cluster.Shutdown(); } } FilePath editFile = FSImageTestUtil.FindLatestEditsLog(sd).GetFile(); NUnit.Framework.Assert.IsTrue("Should exist: " + editFile, editFile.Exists()); // Corrupt the edit log Log.Info("corrupting edit log file '" + editFile + "'"); corruptor.Corrupt(editFile); // If needRecovery == true, make sure that we can't start the // cluster normally before recovery cluster = null; try { Log.Debug("trying to start normally (this should fail)..."); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); cluster.WaitActive(); cluster.Shutdown(); if (needRecovery) { NUnit.Framework.Assert.Fail("expected the corrupted edit log to prevent normal startup" ); } } catch (IOException e) { if (!needRecovery) { Log.Error("Got unexpected failure with " + corruptor.GetName() + corruptor, e); NUnit.Framework.Assert.Fail("got unexpected exception " + e.Message); } } finally { if (cluster != null) { cluster.Shutdown(); } } // Perform NameNode recovery. // Even if there was nothing wrong previously (needRecovery == false), // this should still work fine. cluster = null; try { Log.Debug("running recovery..."); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).EnableManagedDfsDirsRedundancy (false).Format(false).StartupOption(recoverStartOpt).Build(); } catch (IOException e) { NUnit.Framework.Assert.Fail("caught IOException while trying to recover. " + "message was " + e.Message + "\nstack trace\n" + StringUtils.StringifyException(e)); } finally { if (cluster != null) { cluster.Shutdown(); } } // Make sure that we can start the cluster normally after recovery cluster = null; try { Log.Debug("starting cluster normally after recovery..."); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); Log.Debug("successfully recovered the " + corruptor.GetName() + " corrupted edit log" ); cluster.WaitActive(); NUnit.Framework.Assert.IsTrue(cluster.GetFileSystem().Exists(new Path(TestPath))); } catch (IOException e) { NUnit.Framework.Assert.Fail("failed to recover. Error message: " + e.Message); } finally { if (cluster != null) { cluster.Shutdown(); } } }
internal virtual bool NamespaceIdMatches(FSImage si) { return(namespaceID == si.GetStorage().namespaceID); }
/// <exception cref="System.Exception"/> /// <exception cref="System.IO.IOException"/> private void DoCheckpoint() { System.Diagnostics.Debug.Assert(canceler != null); long txid; NNStorage.NameNodeFile imageType; // Acquire cpLock to make sure no one is modifying the name system. // It does not need the full namesystem write lock, since the only thing // that modifies namesystem on standby node is edit log replaying. namesystem.CpLockInterruptibly(); try { System.Diagnostics.Debug.Assert(namesystem.GetEditLog().IsOpenForRead(), "Standby Checkpointer should only attempt a checkpoint when " + "NN is in standby mode, but the edit logs are in an unexpected state"); FSImage img = namesystem.GetFSImage(); long prevCheckpointTxId = img.GetStorage().GetMostRecentCheckpointTxId(); long thisCheckpointTxId = img.GetLastAppliedOrWrittenTxId(); System.Diagnostics.Debug.Assert(thisCheckpointTxId >= prevCheckpointTxId); if (thisCheckpointTxId == prevCheckpointTxId) { Log.Info("A checkpoint was triggered but the Standby Node has not " + "received any transactions since the last checkpoint at txid " + thisCheckpointTxId + ". Skipping..."); return; } if (namesystem.IsRollingUpgrade() && !namesystem.GetFSImage().HasRollbackFSImage( )) { // if we will do rolling upgrade but have not created the rollback image // yet, name this checkpoint as fsimage_rollback imageType = NNStorage.NameNodeFile.ImageRollback; } else { imageType = NNStorage.NameNodeFile.Image; } img.SaveNamespace(namesystem, imageType, canceler); txid = img.GetStorage().GetMostRecentCheckpointTxId(); System.Diagnostics.Debug.Assert(txid == thisCheckpointTxId, "expected to save checkpoint at txid=" + thisCheckpointTxId + " but instead saved at txid=" + txid); // Save the legacy OIV image, if the output dir is defined. string outputDir = checkpointConf.GetLegacyOivImageDir(); if (outputDir != null && !outputDir.IsEmpty()) { img.SaveLegacyOIVImage(namesystem, outputDir, canceler); } } finally { namesystem.CpUnlock(); } // Upload the saved checkpoint back to the active // Do this in a separate thread to avoid blocking transition to active // See HDFS-4816 ExecutorService executor = Executors.NewSingleThreadExecutor(uploadThreadFactory); Future <Void> upload = executor.Submit(new _Callable_204(this, imageType, txid)); executor.Shutdown(); try { upload.Get(); } catch (Exception e) { // The background thread may be blocked waiting in the throttler, so // interrupt it. upload.Cancel(true); throw; } catch (ExecutionException e) { throw new IOException("Exception during image upload: " + e.Message, e.InnerException ); } }
public virtual void TestSNNStartup() { //setUpConfig(); Log.Info("--starting SecondNN startup test"); // different name dirs config.Set(DFSConfigKeys.DfsNamenodeNameDirKey, Org.Apache.Hadoop.Hdfs.Server.Common.Util.FileAsURI (new FilePath(hdfsDir, "name")).ToString()); config.Set(DFSConfigKeys.DfsNamenodeEditsDirKey, Org.Apache.Hadoop.Hdfs.Server.Common.Util.FileAsURI (new FilePath(hdfsDir, "name")).ToString()); // same checkpoint dirs config.Set(DFSConfigKeys.DfsNamenodeCheckpointEditsDirKey, Org.Apache.Hadoop.Hdfs.Server.Common.Util.FileAsURI (new FilePath(hdfsDir, "chkpt_edits")).ToString()); config.Set(DFSConfigKeys.DfsNamenodeCheckpointDirKey, Org.Apache.Hadoop.Hdfs.Server.Common.Util.FileAsURI (new FilePath(hdfsDir, "chkpt")).ToString()); Log.Info("--starting NN "); MiniDFSCluster cluster = null; SecondaryNameNode sn = null; NameNode nn = null; try { cluster = new MiniDFSCluster.Builder(config).ManageDataDfsDirs(false).ManageNameDfsDirs (false).Build(); cluster.WaitActive(); nn = cluster.GetNameNode(); NUnit.Framework.Assert.IsNotNull(nn); // start secondary node Log.Info("--starting SecondNN"); sn = new SecondaryNameNode(config); NUnit.Framework.Assert.IsNotNull(sn); Log.Info("--doing checkpoint"); sn.DoCheckpoint(); // this shouldn't fail Log.Info("--done checkpoint"); // now verify that image and edits are created in the different directories FSImage image = nn.GetFSImage(); Storage.StorageDirectory sd = image.GetStorage().GetStorageDir(0); //only one NUnit.Framework.Assert.AreEqual(sd.GetStorageDirType(), NNStorage.NameNodeDirType .ImageAndEdits); image.GetStorage(); FilePath imf = NNStorage.GetStorageFile(sd, NNStorage.NameNodeFile.Image, 0); image.GetStorage(); FilePath edf = NNStorage.GetStorageFile(sd, NNStorage.NameNodeFile.Edits, 0); Log.Info("--image file " + imf.GetAbsolutePath() + "; len = " + imf.Length()); Log.Info("--edits file " + edf.GetAbsolutePath() + "; len = " + edf.Length()); FSImage chkpImage = sn.GetFSImage(); VerifyDifferentDirs(chkpImage, imf.Length(), edf.Length()); } catch (IOException e) { NUnit.Framework.Assert.Fail(StringUtils.StringifyException(e)); System.Console.Error.WriteLine("checkpoint failed"); throw; } finally { if (sn != null) { sn.Shutdown(); } if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>Injects a failure on all storage directories while saving namespace.</summary> /// <param name="restoreStorageAfterFailure"> /// if true, will try to save again after /// clearing the failure injection /// </param> /// <exception cref="System.Exception"/> public virtual void DoTestFailedSaveNamespace(bool restoreStorageAfterFailure) { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage originalImage = fsn.GetFSImage(); NNStorage storage = originalImage.GetStorage(); storage.Close(); // unlock any directories that FSNamesystem's initialization may have locked NNStorage spyStorage = Org.Mockito.Mockito.Spy(storage); originalImage.storage = spyStorage; FSImage spyImage = Org.Mockito.Mockito.Spy(originalImage); Whitebox.SetInternalState(fsn, "fsImage", spyImage); spyImage.storage.SetStorageDirectories(FSNamesystem.GetNamespaceDirs(conf), FSNamesystem .GetNamespaceEditsDirs(conf)); Org.Mockito.Mockito.DoThrow(new IOException("Injected fault: saveFSImage")).When( spyImage).SaveFSImage((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory )Matchers.AnyObject(), (NNStorage.NameNodeFile)Matchers.AnyObject()); try { DoAnEdit(fsn, 1); // Save namespace fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { fsn.SaveNamespace(); NUnit.Framework.Assert.Fail("saveNamespace did not fail even when all directories failed!" ); } catch (IOException ioe) { Log.Info("Got expected exception", ioe); } // Ensure that, if storage dirs come back online, things work again. if (restoreStorageAfterFailure) { Org.Mockito.Mockito.Reset(spyImage); spyStorage.SetRestoreFailedStorage(true); fsn.SaveNamespace(); CheckEditExists(fsn, 1); } // Now shut down and restart the NN originalImage.Close(); fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edits. CheckEditExists(fsn, 1); } finally { if (fsn != null) { fsn.Close(); } } }
/// <summary>test</summary> private void PrintStorages(FSImage image) { FSImageTestUtil.LogStorageContents(Log, image.GetStorage()); }
/// <summary> /// Verify that a saveNamespace command brings faulty directories /// in fs.name.dir and fs.edit.dir back online. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestReinsertnamedirsInSavenamespace() { // create a configuration with the key to restore error // directories in fs.name.dir Configuration conf = GetConf(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeNameDirRestoreKey, true); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage originalImage = fsn.GetFSImage(); NNStorage storage = originalImage.GetStorage(); FSImage spyImage = Org.Mockito.Mockito.Spy(originalImage); Whitebox.SetInternalState(fsn, "fsImage", spyImage); FileSystem fs = FileSystem.GetLocal(conf); FilePath rootDir = storage.GetStorageDir(0).GetRoot(); Path rootPath = new Path(rootDir.GetPath(), "current"); FsPermission permissionNone = new FsPermission((short)0); FsPermission permissionAll = new FsPermission(FsAction.All, FsAction.ReadExecute, FsAction.ReadExecute); fs.SetPermission(rootPath, permissionNone); try { DoAnEdit(fsn, 1); fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); // Save namespace - should mark the first storage dir as faulty // since it's not traversable. Log.Info("Doing the first savenamespace."); fsn.SaveNamespace(); Log.Info("First savenamespace sucessful."); NUnit.Framework.Assert.IsTrue("Savenamespace should have marked one directory as bad." + " But found " + storage.GetRemovedStorageDirs().Count + " bad directories.", storage.GetRemovedStorageDirs().Count == 1); fs.SetPermission(rootPath, permissionAll); // The next call to savenamespace should try inserting the // erroneous directory back to fs.name.dir. This command should // be successful. Log.Info("Doing the second savenamespace."); fsn.SaveNamespace(); Log.Warn("Second savenamespace sucessful."); NUnit.Framework.Assert.IsTrue("Savenamespace should have been successful in removing " + " bad directories from Image." + " But found " + storage.GetRemovedStorageDirs ().Count + " bad directories.", storage.GetRemovedStorageDirs().Count == 0); // Now shut down and restart the namesystem Log.Info("Shutting down fsimage."); originalImage.Close(); fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. Log.Info("Loading new FSmage from disk."); fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edit. Log.Info("Checking reloaded image."); CheckEditExists(fsn, 1); Log.Info("Reloaded image is good."); } finally { if (rootDir.Exists()) { fs.SetPermission(rootPath, permissionAll); } if (fsn != null) { try { fsn.Close(); } catch (Exception t) { Log.Fatal("Failed to shut down", t); } } } }
public virtual void TestEditLog() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; try { conf.SetBoolean(DFSConfigKeys.DfsNamenodeDelegationTokenAlwaysUseKey, true); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); for (IEnumerator <URI> it = cluster.GetNameDirs(0).GetEnumerator(); it.HasNext();) { FilePath dir = new FilePath(it.Next().GetPath()); System.Console.Out.WriteLine(dir); } FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = fsimage.GetEditLog(); // set small size of flush buffer editLog.SetOutputBufferCapacity(2048); // Create threads and make them run transactions concurrently. Sharpen.Thread[] threadId = new Sharpen.Thread[NumThreads]; for (int i = 0; i < NumThreads; i++) { TestSecurityTokenEditLog.Transactions trans = new TestSecurityTokenEditLog.Transactions (namesystem, NumTransactions); threadId[i] = new Sharpen.Thread(trans, "TransactionThread-" + i); threadId[i].Start(); } // wait for all transactions to get over for (int i_1 = 0; i_1 < NumThreads; i_1++) { try { threadId[i_1].Join(); } catch (Exception) { i_1--; } } // retry editLog.Close(); // Verify that we can read in all the transactions that we have written. // If there were any corruptions, it is likely that the reading in // of these transactions will throw an exception. // namesystem.GetDelegationTokenSecretManager().StopThreads(); int numKeys = namesystem.GetDelegationTokenSecretManager().GetNumberOfKeys(); int expectedTransactions = NumThreads * opsPerTrans * NumTransactions + numKeys + 2; // + 2 for BEGIN and END txns foreach (Storage.StorageDirectory sd in fsimage.GetStorage().DirIterable(NNStorage.NameNodeDirType .Edits)) { FilePath editFile = NNStorage.GetFinalizedEditsFile(sd, 1, 1 + expectedTransactions - 1); System.Console.Out.WriteLine("Verifying file: " + editFile); FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0); long numEdits = loader.LoadFSEdits(new EditLogFileInputStream(editFile), 1); NUnit.Framework.Assert.AreEqual("Verification for " + editFile, expectedTransactions , numEdits); } } finally { if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }