/// <exception cref="System.IO.IOException"/> private int DownloadImage(NNStorage storage, NamenodeProtocol proxy) { // Load the newly formatted image, using all of the directories // (including shared edits) long imageTxId = proxy.GetMostRecentCheckpointTxId(); long curTxId = proxy.GetTransactionID(); FSImage image = new FSImage(conf); try { image.GetStorage().SetStorageInfo(storage); image.InitEditLog(HdfsServerConstants.StartupOption.Regular); System.Diagnostics.Debug.Assert(image.GetEditLog().IsOpenForRead(), "Expected edit log to be open for read" ); // Ensure that we have enough edits already in the shared directory to // start up from the last checkpoint on the active. if (!skipSharedEditsCheck && !CheckLogsAvailableForRead(image, imageTxId, curTxId )) { return(ErrCodeLogsUnavailable); } image.GetStorage().WriteTransactionIdFileToStorage(curTxId); // Download that checkpoint into our storage directories. MD5Hash hash = TransferFsImage.DownloadImageToStorage(otherHttpAddr, imageTxId, storage , true); image.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, imageTxId, hash); } catch (IOException ioe) { image.Close(); throw; } return(0); }
/// <summary>Injects a failure on all storage directories while saving namespace.</summary> /// <param name="restoreStorageAfterFailure"> /// if true, will try to save again after /// clearing the failure injection /// </param> /// <exception cref="System.Exception"/> public virtual void DoTestFailedSaveNamespace(bool restoreStorageAfterFailure) { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage originalImage = fsn.GetFSImage(); NNStorage storage = originalImage.GetStorage(); storage.Close(); // unlock any directories that FSNamesystem's initialization may have locked NNStorage spyStorage = Org.Mockito.Mockito.Spy(storage); originalImage.storage = spyStorage; FSImage spyImage = Org.Mockito.Mockito.Spy(originalImage); Whitebox.SetInternalState(fsn, "fsImage", spyImage); spyImage.storage.SetStorageDirectories(FSNamesystem.GetNamespaceDirs(conf), FSNamesystem .GetNamespaceEditsDirs(conf)); Org.Mockito.Mockito.DoThrow(new IOException("Injected fault: saveFSImage")).When( spyImage).SaveFSImage((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory )Matchers.AnyObject(), (NNStorage.NameNodeFile)Matchers.AnyObject()); try { DoAnEdit(fsn, 1); // Save namespace fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { fsn.SaveNamespace(); NUnit.Framework.Assert.Fail("saveNamespace did not fail even when all directories failed!" ); } catch (IOException ioe) { Log.Info("Got expected exception", ioe); } // Ensure that, if storage dirs come back online, things work again. if (restoreStorageAfterFailure) { Org.Mockito.Mockito.Reset(spyImage); spyStorage.SetRestoreFailedStorage(true); fsn.SaveNamespace(); CheckEditExists(fsn, 1); } // Now shut down and restart the NN originalImage.Close(); fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edits. CheckEditExists(fsn, 1); } finally { if (fsn != null) { fsn.Close(); } } }
/// <exception cref="System.Exception"/> private void SaveNamespaceWithInjectedFault(TestSaveNamespace.Fault fault) { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage originalImage = fsn.GetFSImage(); NNStorage storage = originalImage.GetStorage(); NNStorage spyStorage = Org.Mockito.Mockito.Spy(storage); originalImage.storage = spyStorage; FSImage spyImage = Org.Mockito.Mockito.Spy(originalImage); Whitebox.SetInternalState(fsn, "fsImage", spyImage); bool shouldFail = false; switch (fault) { case TestSaveNamespace.Fault.SaveSecondFsimageRte: { // should we expect the save operation to fail // inject fault // The spy throws a RuntimeException when writing to the second directory Org.Mockito.Mockito.DoAnswer(new TestSaveNamespace.FaultySaveImage(true)).When(spyImage ).SaveFSImage((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory )Matchers.AnyObject(), (NNStorage.NameNodeFile)Matchers.AnyObject()); shouldFail = false; break; } case TestSaveNamespace.Fault.SaveSecondFsimageIoe: { // The spy throws an IOException when writing to the second directory Org.Mockito.Mockito.DoAnswer(new TestSaveNamespace.FaultySaveImage(false)).When(spyImage ).SaveFSImage((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory )Matchers.AnyObject(), (NNStorage.NameNodeFile)Matchers.AnyObject()); shouldFail = false; break; } case TestSaveNamespace.Fault.SaveAllFsimages: { // The spy throws IOException in all directories Org.Mockito.Mockito.DoThrow(new RuntimeException("Injected")).When(spyImage).SaveFSImage ((SaveNamespaceContext)Matchers.AnyObject(), (Storage.StorageDirectory)Matchers.AnyObject (), (NNStorage.NameNodeFile)Matchers.AnyObject()); shouldFail = true; break; } case TestSaveNamespace.Fault.WriteStorageAll: { // The spy throws an exception before writing any VERSION files Org.Mockito.Mockito.DoThrow(new RuntimeException("Injected")).When(spyStorage).WriteAll (); shouldFail = true; break; } case TestSaveNamespace.Fault.WriteStorageOne: { // The spy throws on exception on one particular storage directory Org.Mockito.Mockito.DoAnswer(new TestSaveNamespace.FaultySaveImage(true)).When(spyStorage ).WriteProperties((Storage.StorageDirectory)Matchers.AnyObject()); // TODO: unfortunately this fails -- should be improved. // See HDFS-2173. shouldFail = true; break; } } try { DoAnEdit(fsn, 1); // Save namespace - this may fail, depending on fault injected fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); try { fsn.SaveNamespace(); if (shouldFail) { NUnit.Framework.Assert.Fail("Did not fail!"); } } catch (Exception e) { if (!shouldFail) { throw; } else { Log.Info("Test caught expected exception", e); } } fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave); // Should still be able to perform edits DoAnEdit(fsn, 2); // Now shut down and restart the namesystem originalImage.Close(); fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edits. CheckEditExists(fsn, 1); CheckEditExists(fsn, 2); } finally { if (fsn != null) { fsn.Close(); } } }
/// <summary> /// Verify that a saveNamespace command brings faulty directories /// in fs.name.dir and fs.edit.dir back online. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestReinsertnamedirsInSavenamespace() { // create a configuration with the key to restore error // directories in fs.name.dir Configuration conf = GetConf(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeNameDirRestoreKey, true); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); // Replace the FSImage with a spy FSImage originalImage = fsn.GetFSImage(); NNStorage storage = originalImage.GetStorage(); FSImage spyImage = Org.Mockito.Mockito.Spy(originalImage); Whitebox.SetInternalState(fsn, "fsImage", spyImage); FileSystem fs = FileSystem.GetLocal(conf); FilePath rootDir = storage.GetStorageDir(0).GetRoot(); Path rootPath = new Path(rootDir.GetPath(), "current"); FsPermission permissionNone = new FsPermission((short)0); FsPermission permissionAll = new FsPermission(FsAction.All, FsAction.ReadExecute, FsAction.ReadExecute); fs.SetPermission(rootPath, permissionNone); try { DoAnEdit(fsn, 1); fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); // Save namespace - should mark the first storage dir as faulty // since it's not traversable. Log.Info("Doing the first savenamespace."); fsn.SaveNamespace(); Log.Info("First savenamespace sucessful."); NUnit.Framework.Assert.IsTrue("Savenamespace should have marked one directory as bad." + " But found " + storage.GetRemovedStorageDirs().Count + " bad directories.", storage.GetRemovedStorageDirs().Count == 1); fs.SetPermission(rootPath, permissionAll); // The next call to savenamespace should try inserting the // erroneous directory back to fs.name.dir. This command should // be successful. Log.Info("Doing the second savenamespace."); fsn.SaveNamespace(); Log.Warn("Second savenamespace sucessful."); NUnit.Framework.Assert.IsTrue("Savenamespace should have been successful in removing " + " bad directories from Image." + " But found " + storage.GetRemovedStorageDirs ().Count + " bad directories.", storage.GetRemovedStorageDirs().Count == 0); // Now shut down and restart the namesystem Log.Info("Shutting down fsimage."); originalImage.Close(); fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. Log.Info("Loading new FSmage from disk."); fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edit. Log.Info("Checking reloaded image."); CheckEditExists(fsn, 1); Log.Info("Reloaded image is good."); } finally { if (rootDir.Exists()) { fs.SetPermission(rootPath, permissionAll); } if (fsn != null) { try { fsn.Close(); } catch (Exception t) { Log.Fatal("Failed to shut down", t); } } } }