private long CountUncheckpointedTxns() { FSImage img = namesystem.GetFSImage(); return(img.GetLastAppliedOrWrittenTxId() - img.GetStorage().GetMostRecentCheckpointTxId ()); }
/// <exception cref="System.IO.IOException"/> private int DownloadImage(NNStorage storage, NamenodeProtocol proxy) { // Load the newly formatted image, using all of the directories // (including shared edits) long imageTxId = proxy.GetMostRecentCheckpointTxId(); long curTxId = proxy.GetTransactionID(); FSImage image = new FSImage(conf); try { image.GetStorage().SetStorageInfo(storage); image.InitEditLog(HdfsServerConstants.StartupOption.Regular); System.Diagnostics.Debug.Assert(image.GetEditLog().IsOpenForRead(), "Expected edit log to be open for read" ); // Ensure that we have enough edits already in the shared directory to // start up from the last checkpoint on the active. if (!skipSharedEditsCheck && !CheckLogsAvailableForRead(image, imageTxId, curTxId )) { return(ErrCodeLogsUnavailable); } image.GetStorage().WriteTransactionIdFileToStorage(curTxId); // Download that checkpoint into our storage directories. MD5Hash hash = TransferFsImage.DownloadImageToStorage(otherHttpAddr, imageTxId, storage , true); image.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, imageTxId, hash); } catch (IOException ioe) { image.Close(); throw; } return(0); }
/// <exception cref="System.IO.IOException"/> private void ValidateRequest(ServletContext context, Configuration conf, HttpServletRequest request, HttpServletResponse response, FSImage nnImage, string theirStorageInfoString ) { if (UserGroupInformation.IsSecurityEnabled() && !IsValidRequestor(context, request .GetUserPrincipal().GetName(), conf)) { string errorMsg = "Only Namenode, Secondary Namenode, and administrators may access " + "this servlet"; response.SendError(HttpServletResponse.ScForbidden, errorMsg); Log.Warn("Received non-NN/SNN/administrator request for image or edits from " + request .GetUserPrincipal().GetName() + " at " + request.GetRemoteHost()); throw new IOException(errorMsg); } string myStorageInfoString = nnImage.GetStorage().ToColonSeparatedString(); if (theirStorageInfoString != null && !myStorageInfoString.Equals(theirStorageInfoString )) { string errorMsg = "This namenode has storage info " + myStorageInfoString + " but the secondary expected " + theirStorageInfoString; response.SendError(HttpServletResponse.ScForbidden, errorMsg); Log.Warn("Received an invalid request file transfer request " + "from a secondary with storage info " + theirStorageInfoString); throw new IOException(errorMsg); } }
/// <exception cref="Javax.Servlet.ServletException"/> /// <exception cref="System.IO.IOException"/> protected override void DoPut(HttpServletRequest request, HttpServletResponse response ) { try { ServletContext context = GetServletContext(); FSImage nnImage = NameNodeHttpServer.GetFsImageFromContext(context); Configuration conf = (Configuration)GetServletContext().GetAttribute(JspHelper.CurrentConf ); ImageServlet.PutImageParams parsedParams = new ImageServlet.PutImageParams(request , response, conf); NameNodeMetrics metrics = NameNode.GetNameNodeMetrics(); ValidateRequest(context, conf, request, response, nnImage, parsedParams.GetStorageInfoString ()); UserGroupInformation.GetCurrentUser().DoAs(new _PrivilegedExceptionAction_458(parsedParams , nnImage, response, request, conf, metrics)); } catch (Exception t) { // Metrics non-null only when used inside name node // Now that we have a new checkpoint, we might be able to // remove some old ones. string errMsg = "PutImage failed. " + StringUtils.StringifyException(t); response.SendError(HttpServletResponse.ScGone, errMsg); throw new IOException(errMsg); } }
/// <summary>Pull out one of the JournalAndStream objects from the edit log.</summary> private JournalSet.JournalAndStream GetJournalAndStream(int index) { FSImage fsimage = cluster.GetNamesystem().GetFSImage(); FSEditLog editLog = fsimage.GetEditLog(); return(editLog.GetJournals()[index]); }
/// <summary> /// Download <code>fsimage</code> and <code>edits</code> /// files from the name-node. /// </summary> /// <returns>true if a new image has been downloaded and needs to be loaded</returns> /// <exception cref="System.IO.IOException"/> internal static bool DownloadCheckpointFiles(Uri nnHostPort, FSImage dstImage, CheckpointSignature sig, RemoteEditLogManifest manifest) { // Sanity check manifest - these could happen if, eg, someone on the // NN side accidentally rmed the storage directories if (manifest.GetLogs().IsEmpty()) { throw new IOException("Found no edit logs to download on NN since txid " + sig.mostRecentCheckpointTxId ); } long expectedTxId = sig.mostRecentCheckpointTxId + 1; if (manifest.GetLogs()[0].GetStartTxId() != expectedTxId) { throw new IOException("Bad edit log manifest (expected txid = " + expectedTxId + ": " + manifest); } try { bool b = UserGroupInformation.GetCurrentUser().DoAs(new _PrivilegedExceptionAction_444 (dstImage, sig, nnHostPort, manifest)); // get fsimage // get edits file // true if we haven't loaded all the transactions represented by the // downloaded fsimage. return(b); } catch (Exception e) { throw new RuntimeException(e); } }
/// <summary>start with -importCheckpoint option and verify that the files are in separate directories and of the right length /// </summary> /// <exception cref="System.IO.IOException"/> private void CheckNameNodeFiles() { // start namenode with import option Log.Info("-- about to start DFS cluster"); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(config).Format(false).ManageDataDfsDirs(false ).ManageNameDfsDirs(false).StartupOption(HdfsServerConstants.StartupOption.Import ).Build(); cluster.WaitActive(); Log.Info("--NN started with checkpoint option"); NameNode nn = cluster.GetNameNode(); NUnit.Framework.Assert.IsNotNull(nn); // Verify that image file sizes did not change. FSImage image = nn.GetFSImage(); VerifyDifferentDirs(image, this.fsimageLength, this.editsLength); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>verify that edits log and fsimage are in different directories and of a correct size /// </summary> private void VerifyDifferentDirs(FSImage img, long expectedImgSize, long expectedEditsSize ) { Storage.StorageDirectory sd = null; for (IEnumerator <Storage.StorageDirectory> it = img.GetStorage().DirIterator(); it .HasNext();) { sd = it.Next(); if (sd.GetStorageDirType().IsOfType(NNStorage.NameNodeDirType.Image)) { img.GetStorage(); FilePath imf = NNStorage.GetStorageFile(sd, NNStorage.NameNodeFile.Image, 0); Log.Info("--image file " + imf.GetAbsolutePath() + "; len = " + imf.Length() + "; expected = " + expectedImgSize); NUnit.Framework.Assert.AreEqual(expectedImgSize, imf.Length()); } else { if (sd.GetStorageDirType().IsOfType(NNStorage.NameNodeDirType.Edits)) { img.GetStorage(); FilePath edf = NNStorage.GetStorageFile(sd, NNStorage.NameNodeFile.Edits, 0); Log.Info("-- edits file " + edf.GetAbsolutePath() + "; len = " + edf.Length() + "; expected = " + expectedEditsSize); NUnit.Framework.Assert.AreEqual(expectedEditsSize, edf.Length()); } else { NUnit.Framework.Assert.Fail("Image/Edits directories are not different"); } } } }
internal CheckpointSignature(FSImage fsImage) : base(fsImage.GetStorage()) { blockpoolID = fsImage.GetBlockPoolID(); mostRecentCheckpointTxId = fsImage.GetStorage().GetMostRecentCheckpointTxId(); curSegmentTxId = fsImage.GetEditLog().GetCurSegmentTxId(); }
public virtual void TestReplQueuesActiveAfterStartupSafemode() { Configuration conf = new Configuration(); FSEditLog fsEditLog = Org.Mockito.Mockito.Mock <FSEditLog>(); FSImage fsImage = Org.Mockito.Mockito.Mock <FSImage>(); Org.Mockito.Mockito.When(fsImage.GetEditLog()).ThenReturn(fsEditLog); FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage); FSNamesystem fsn = Org.Mockito.Mockito.Spy(fsNamesystem); //Make shouldPopulaeReplQueues return true HAContext haContext = Org.Mockito.Mockito.Mock <HAContext>(); HAState haState = Org.Mockito.Mockito.Mock <HAState>(); Org.Mockito.Mockito.When(haContext.GetState()).ThenReturn(haState); Org.Mockito.Mockito.When(haState.ShouldPopulateReplQueues()).ThenReturn(true); Whitebox.SetInternalState(fsn, "haContext", haContext); //Make NameNode.getNameNodeMetrics() not return null NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); fsn.EnterSafeMode(false); NUnit.Framework.Assert.IsTrue("FSNamesystem didn't enter safemode", fsn.IsInSafeMode ()); NUnit.Framework.Assert.IsTrue("Replication queues were being populated during very first " + "safemode", !fsn.IsPopulatingReplQueues()); fsn.LeaveSafeMode(); NUnit.Framework.Assert.IsTrue("FSNamesystem didn't leave safemode", !fsn.IsInSafeMode ()); NUnit.Framework.Assert.IsTrue("Replication queues weren't being populated even after leaving " + "safemode", fsn.IsPopulatingReplQueues()); fsn.EnterSafeMode(false); NUnit.Framework.Assert.IsTrue("FSNamesystem didn't enter safemode", fsn.IsInSafeMode ()); NUnit.Framework.Assert.IsTrue("Replication queues weren't being populated after entering " + "safemode 2nd time", fsn.IsPopulatingReplQueues()); }
/// <summary>invalidate storage by removing the second and third storage directories</summary> /// <exception cref="System.IO.IOException"/> public virtual void InvalidateStorage(FSImage fi, ICollection <FilePath> filesToInvalidate ) { AList <Storage.StorageDirectory> al = new AList <Storage.StorageDirectory>(2); IEnumerator <Storage.StorageDirectory> it = fi.GetStorage().DirIterator(); while (it.HasNext()) { Storage.StorageDirectory sd = it.Next(); if (filesToInvalidate.Contains(sd.GetRoot())) { Log.Info("causing IO error on " + sd.GetRoot()); al.AddItem(sd); } } // simulate an error fi.GetStorage().ReportErrorsOnDirectories(al); foreach (JournalSet.JournalAndStream j in fi.GetEditLog().GetJournals()) { if (j.GetManager() is FileJournalManager) { FileJournalManager fm = (FileJournalManager)j.GetManager(); if (fm.GetStorageDirectory().GetRoot().Equals(path2) || fm.GetStorageDirectory(). GetRoot().Equals(path3)) { EditLogOutputStream mockStream = Org.Mockito.Mockito.Spy(j.GetCurrentStream()); j.SetCurrentStreamForTests(mockStream); Org.Mockito.Mockito.DoThrow(new IOException("Injected fault: write")).When(mockStream ).Write(Org.Mockito.Mockito.AnyObject <FSEditLogOp>()); } } } }
/// <exception cref="System.IO.IOException"/> internal static void DoMerge(CheckpointSignature sig, RemoteEditLogManifest manifest , bool loadImage, FSImage dstImage, FSNamesystem dstNamesystem) { NNStorage dstStorage = dstImage.GetStorage(); dstStorage.SetStorageInfo(sig); if (loadImage) { FilePath file = dstStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); if (file == null) { throw new IOException("Couldn't find image file at txid " + sig.mostRecentCheckpointTxId + " even though it should have " + "just been downloaded"); } dstNamesystem.WriteLock(); try { dstImage.ReloadFromImageFile(file, dstNamesystem); } finally { dstNamesystem.WriteUnlock(); } dstNamesystem.ImageLoadComplete(); } // error simulation code for junit test CheckpointFaultInjector.GetInstance().DuringMerge(); Checkpointer.RollForwardByApplyingLogs(manifest, dstImage, dstNamesystem); // The following has the side effect of purging old fsimages/edit logs. dstImage.SaveFSImageInAllDirs(dstNamesystem, dstImage.GetLastAppliedTxId()); dstStorage.WriteAll(); }
public virtual void TestSaveImageWhileSyncInProgress() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem namesystem = FSNamesystem.LoadFromDisk(conf); try { FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = fsimage.GetEditLog(); JournalSet.JournalAndStream jas = editLog.GetJournals()[0]; EditLogFileOutputStream spyElos = Org.Mockito.Mockito.Spy((EditLogFileOutputStream )jas.GetCurrentStream()); jas.SetCurrentStreamForTests(spyElos); AtomicReference <Exception> deferredException = new AtomicReference <Exception>(); CountDownLatch waitToEnterFlush = new CountDownLatch(1); Sharpen.Thread doAnEditThread = new _Thread_371(namesystem, deferredException, waitToEnterFlush ); Answer <Void> blockingFlush = new _Answer_388(doAnEditThread, waitToEnterFlush); // Signal to main thread that the edit thread is in the racy section Org.Mockito.Mockito.DoAnswer(blockingFlush).When(spyElos).Flush(); doAnEditThread.Start(); // Wait for the edit thread to get to the logsync unsynchronized section Log.Info("Main thread: waiting to enter flush..."); waitToEnterFlush.Await(); NUnit.Framework.Assert.IsNull(deferredException.Get()); Log.Info("Main thread: detected that logSync is in unsynchronized section."); Log.Info("Trying to enter safe mode."); Log.Info("This should block for " + BlockTime + "sec, since flush will sleep that long" ); long st = Time.Now(); namesystem.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); long et = Time.Now(); Log.Info("Entered safe mode"); // Make sure we really waited for the flush to complete! NUnit.Framework.Assert.IsTrue(et - st > (BlockTime - 1) * 1000); // Once we're in safe mode, save namespace. namesystem.SaveNamespace(); Log.Info("Joining on edit thread..."); doAnEditThread.Join(); NUnit.Framework.Assert.IsNull(deferredException.Get()); // We did 3 edits: begin, txn, and end NUnit.Framework.Assert.AreEqual(3, VerifyEditLogs(namesystem, fsimage, NNStorage. GetFinalizedEditsFileName(1, 3), 1)); // after the save, just the one "begin" NUnit.Framework.Assert.AreEqual(1, VerifyEditLogs(namesystem, fsimage, NNStorage. GetInProgressEditsFileName(4), 4)); } finally { Log.Info("Closing nn"); if (namesystem != null) { namesystem.Close(); } } }
public _PrivilegedExceptionAction_444(FSImage dstImage, CheckpointSignature sig, Uri nnHostPort, RemoteEditLogManifest manifest) { this.dstImage = dstImage; this.sig = sig; this.nnHostPort = nnHostPort; this.manifest = manifest; }
public static FSImage SpyOnFsImage(NameNode nn1) { FSNamesystem fsn = nn1.GetNamesystem(); FSImage spy = Org.Mockito.Mockito.Spy(fsn.GetFSImage()); Whitebox.SetInternalState(fsn, "fsImage", spy); return(spy); }
/// <summary>This is called when using bootstrapStandby for HA upgrade.</summary> /// <remarks> /// This is called when using bootstrapStandby for HA upgrade. The SBN should /// also create previous directory so that later when it starts, it understands /// that the cluster is in the upgrade state. This function renames the old /// current directory to previous.tmp. /// </remarks> /// <exception cref="System.IO.IOException"/> private bool DoPreUpgrade(NNStorage storage, NamespaceInfo nsInfo) { bool isFormatted = false; IDictionary <Storage.StorageDirectory, Storage.StorageState> dataDirStates = new Dictionary <Storage.StorageDirectory, Storage.StorageState>(); try { isFormatted = FSImage.RecoverStorageDirs(HdfsServerConstants.StartupOption.Upgrade , storage, dataDirStates); if (dataDirStates.Values.Contains(Storage.StorageState.NotFormatted)) { // recoverStorageDirs returns true if there is a formatted directory isFormatted = false; System.Console.Error.WriteLine("The original storage directory is not formatted." ); } } catch (InconsistentFSStateException e) { // if the storage is in a bad state, Log.Warn("The storage directory is in an inconsistent state", e); } finally { storage.UnlockAll(); } // if there is InconsistentFSStateException or the storage is not formatted, // format the storage. Although this format is done through the new // software, since in HA setup the SBN is rolled back through // "-bootstrapStandby", we should still be fine. if (!isFormatted && !Format(storage, nsInfo)) { return(false); } // make sure there is no previous directory FSImage.CheckUpgrade(storage); // Do preUpgrade for each directory for (IEnumerator <Storage.StorageDirectory> it = storage.DirIterator(false); it.HasNext ();) { Storage.StorageDirectory sd = it.Next(); try { NNUpgradeUtil.RenameCurToTmp(sd); } catch (IOException e) { Log.Error("Failed to move aside pre-upgrade storage " + "in image directory " + sd .GetRoot(), e); throw; } } storage.SetStorageInfo(nsInfo); storage.SetBlockPoolID(nsInfo.GetBlockPoolID()); return(true); }
public virtual void TestEditLogRolling() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; AtomicReference <Exception> caughtErr = new AtomicReference <Exception>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); NamenodeProtocols nn = cluster.GetNameNode().GetRpcServer(); FSImage fsimage = cluster.GetNamesystem().GetFSImage(); Storage.StorageDirectory sd = fsimage.GetStorage().GetStorageDir(0); StartTransactionWorkers(nn, caughtErr); long previousLogTxId = 1; for (int i = 0; i < NumRolls && caughtErr.Get() == null; i++) { try { Sharpen.Thread.Sleep(20); } catch (Exception) { } Log.Info("Starting roll " + i + "."); CheckpointSignature sig = nn.RollEditLog(); long nextLog = sig.curSegmentTxId; string logFileName = NNStorage.GetFinalizedEditsFileName(previousLogTxId, nextLog - 1); previousLogTxId += VerifyEditLogs(cluster.GetNamesystem(), fsimage, logFileName, previousLogTxId); NUnit.Framework.Assert.AreEqual(previousLogTxId, nextLog); FilePath expectedLog = NNStorage.GetInProgressEditsFile(sd, previousLogTxId); NUnit.Framework.Assert.IsTrue("Expect " + expectedLog + " to exist", expectedLog. Exists()); } } finally { StopTransactionWorkers(); if (caughtErr.Get() != null) { throw new RuntimeException(caughtErr.Get()); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
/// <exception cref="System.IO.IOException"/> private static FSNamesystem GetMockNamesystem() { FSImage fsImage = Org.Mockito.Mockito.Mock <FSImage>(); FSEditLog editLog = Org.Mockito.Mockito.Mock <FSEditLog>(); Org.Mockito.Mockito.DoReturn(editLog).When(fsImage).GetEditLog(); FSNamesystem fsn = new FSNamesystem(conf, fsImage); fsn.SetImageLoaded(fsIsReady); return(fsn); }
public virtual void TestSaveRightBeforeSync() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem namesystem = FSNamesystem.LoadFromDisk(conf); try { FSImage fsimage = namesystem.GetFSImage(); FSEditLog editLog = Org.Mockito.Mockito.Spy(fsimage.GetEditLog()); DFSTestUtil.SetEditLogForTesting(namesystem, editLog); AtomicReference <Exception> deferredException = new AtomicReference <Exception>(); CountDownLatch waitToEnterSync = new CountDownLatch(1); Sharpen.Thread doAnEditThread = new _Thread_467(namesystem, deferredException, waitToEnterSync ); Answer <Void> blockingSync = new _Answer_484(doAnEditThread, waitToEnterSync); Org.Mockito.Mockito.DoAnswer(blockingSync).When(editLog).LogSync(); doAnEditThread.Start(); Log.Info("Main thread: waiting to just before logSync..."); waitToEnterSync.Await(); NUnit.Framework.Assert.IsNull(deferredException.Get()); Log.Info("Main thread: detected that logSync about to be called."); Log.Info("Trying to enter safe mode."); Log.Info("This should block for " + BlockTime + "sec, since we have pending edits" ); long st = Time.Now(); namesystem.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); long et = Time.Now(); Log.Info("Entered safe mode"); // Make sure we really waited for the flush to complete! NUnit.Framework.Assert.IsTrue(et - st > (BlockTime - 1) * 1000); // Once we're in safe mode, save namespace. namesystem.SaveNamespace(); Log.Info("Joining on edit thread..."); doAnEditThread.Join(); NUnit.Framework.Assert.IsNull(deferredException.Get()); // We did 3 edits: begin, txn, and end NUnit.Framework.Assert.AreEqual(3, VerifyEditLogs(namesystem, fsimage, NNStorage. GetFinalizedEditsFileName(1, 3), 1)); // after the save, just the one "begin" NUnit.Framework.Assert.AreEqual(1, VerifyEditLogs(namesystem, fsimage, NNStorage. GetInProgressEditsFileName(4), 4)); } finally { Log.Info("Closing nn"); if (namesystem != null) { namesystem.Close(); } } }
/// <exception cref="System.IO.IOException"/> internal virtual void ValidateStorageInfo(FSImage si) { if (!IsSameCluster(si) || !StorageVersionMatches(si.GetStorage())) { throw new IOException("Inconsistent checkpoint fields.\n" + "LV = " + layoutVersion + " namespaceID = " + namespaceID + " cTime = " + cTime + " ; clusterId = " + clusterID + " ; blockpoolId = " + blockpoolID + ".\nExpecting respectively: " + si.GetStorage ().layoutVersion + "; " + si.GetStorage().namespaceID + "; " + si.GetStorage().cTime + "; " + si.GetClusterID() + "; " + si.GetBlockPoolID() + "."); } }
////////////////////////////////////////////////////// internal virtual bool ShouldCheckpointAtStartup() { FSImage fsImage = GetFSImage(); if (IsRole(HdfsServerConstants.NamenodeRole.Checkpoint)) { System.Diagnostics.Debug.Assert(fsImage.GetStorage().GetNumStorageDirs() > 0); return(!fsImage.GetStorage().GetStorageDir(0).GetVersionFile().Exists()); } // BN always checkpoints on startup in order to get in sync with namespace return(true); }
/// <summary>Get edits filename</summary> /// <returns>edits file name for cluster</returns> /// <exception cref="System.IO.IOException"/> private string GetEditsFilename(CheckpointSignature sig) { FSImage image = cluster.GetNameNode().GetFSImage(); // it was set up to only have ONE StorageDirectory IEnumerator <Storage.StorageDirectory> it = image.GetStorage().DirIterator(NNStorage.NameNodeDirType .Edits); Storage.StorageDirectory sd = it.Next(); FilePath ret = NNStorage.GetFinalizedEditsFile(sd, 1, sig.curSegmentTxId - 1); System.Diagnostics.Debug.Assert(ret.Exists(), "expected " + ret + " exists"); return(ret.GetAbsolutePath()); }
public virtual void TestDisplayRecentEditLogOpCodes() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).EnableManagedDfsDirsRedundancy (false).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); FSImage fsimage = namesystem.GetFSImage(); for (int i = 0; i < 20; i++) { fileSys.Mkdirs(new Path("/tmp/tmp" + i)); } Storage.StorageDirectory sd = fsimage.GetStorage().DirIterator(NNStorage.NameNodeDirType .Edits).Next(); cluster.Shutdown(); FilePath editFile = FSImageTestUtil.FindLatestEditsLog(sd).GetFile(); NUnit.Framework.Assert.IsTrue("Should exist: " + editFile, editFile.Exists()); // Corrupt the edits file. long fileLen = editFile.Length(); RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.Seek(fileLen - 40); for (int i_1 = 0; i_1 < 20; i_1++) { rwf.Write(FSEditLogOpCodes.OpDelete.GetOpCode()); } rwf.Close(); StringBuilder bld = new StringBuilder(); bld.Append("^Error replaying edit log at offset \\d+. "); bld.Append("Expected transaction ID was \\d+\n"); bld.Append("Recent opcode offsets: (\\d+\\s*){4}$"); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).EnableManagedDfsDirsRedundancy (false).Format(false).Build(); NUnit.Framework.Assert.Fail("should not be able to start"); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("error message contains opcodes message", e.Message .Matches(bld.ToString())); } }
/// <exception cref="System.Exception"/> private void TestUpgrade(TestBootstrapStandbyWithQJM.UpgradeState state) { cluster.TransitionToActive(0); Configuration confNN1 = cluster.GetConfiguration(1); FilePath current = cluster.GetNameNode(1).GetFSImage().GetStorage().GetStorageDir (0).GetCurrentDir(); FilePath tmp = cluster.GetNameNode(1).GetFSImage().GetStorage().GetStorageDir(0). GetPreviousTmp(); // shut down nn1 cluster.ShutdownNameNode(1); // make NN0 in upgrade state FSImage fsImage0 = cluster.GetNameNode(0).GetNamesystem().GetFSImage(); Whitebox.SetInternalState(fsImage0, "isUpgradeFinalized", false); switch (state) { case TestBootstrapStandbyWithQJM.UpgradeState.Recover: { // rename the current directory to previous.tmp in nn1 NNStorage.Rename(current, tmp); break; } case TestBootstrapStandbyWithQJM.UpgradeState.Format: { // rename the current directory to a random name so it's not formatted FilePath wrongPath = new FilePath(current.GetParentFile(), "wrong"); NNStorage.Rename(current, wrongPath); break; } default: { break; } } int rc = BootstrapStandby.Run(new string[] { "-force" }, confNN1); NUnit.Framework.Assert.AreEqual(0, rc); // Should have copied over the namespace from the standby FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of(0)); FSImageTestUtil.AssertNNFilesMatch(cluster); // make sure the NN1 is in upgrade state, i.e., the previous directory has // been successfully created cluster.RestartNameNode(1); NUnit.Framework.Assert.IsFalse(cluster.GetNameNode(1).GetNamesystem().IsUpgradeFinalized ()); }
public virtual void TestFsLockFairness() { Configuration conf = new Configuration(); FSEditLog fsEditLog = Org.Mockito.Mockito.Mock <FSEditLog>(); FSImage fsImage = Org.Mockito.Mockito.Mock <FSImage>(); Org.Mockito.Mockito.When(fsImage.GetEditLog()).ThenReturn(fsEditLog); conf.SetBoolean("dfs.namenode.fslock.fair", true); FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage); NUnit.Framework.Assert.IsTrue(fsNamesystem.GetFsLockForTests().IsFair()); conf.SetBoolean("dfs.namenode.fslock.fair", false); fsNamesystem = new FSNamesystem(conf, fsImage); NUnit.Framework.Assert.IsFalse(fsNamesystem.GetFsLockForTests().IsFair()); }
/// <exception cref="System.IO.IOException"/> private string GetClusterId(Configuration config) { // see if cluster id not empty. ICollection <URI> dirsToFormat = FSNamesystem.GetNamespaceDirs(config); IList <URI> editsToFormat = FSNamesystem.GetNamespaceEditsDirs(config); FSImage fsImage = new FSImage(config, dirsToFormat, editsToFormat); IEnumerator <Storage.StorageDirectory> sdit = fsImage.GetStorage().DirIterator(NNStorage.NameNodeDirType .Image); Storage.StorageDirectory sd = sdit.Next(); Properties props = Storage.ReadPropertiesFile(sd.GetVersionFile()); string cid = props.GetProperty("clusterID"); Log.Info("successfully formated : sd=" + sd.GetCurrentDir() + ";cid=" + cid); return(cid); }
public virtual void TestMultipleSecondaryCheckpoint() { SecondaryNameNode secondary = null; try { cluster = new MiniDFSCluster.Builder(config).NumDataNodes(1).ManageNameDfsDirs(false ).Build(); cluster.WaitActive(); secondary = new SecondaryNameNode(config); FSImage fsImage = cluster.GetNameNode().GetFSImage(); PrintStorages(fsImage); FileSystem fs = cluster.GetFileSystem(); Path testPath = new Path("/", "test"); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(testPath)); PrintStorages(fsImage); // Take name1 offline InvalidateStorage(fsImage, ImmutableSet.Of(path1)); // Simulate a 2NN beginning a checkpoint, but not finishing. This will // cause name1 to be restored. cluster.GetNameNodeRpc().RollEditLog(); PrintStorages(fsImage); // Now another 2NN comes along to do a full checkpoint. secondary.DoCheckpoint(); PrintStorages(fsImage); // The created file should still exist in the in-memory FS state after the // checkpoint. NUnit.Framework.Assert.IsTrue("path exists before restart", fs.Exists(testPath)); secondary.Shutdown(); // Restart the NN so it reloads the edits from on-disk. cluster.RestartNameNode(); // The created file should still exist after the restart. NUnit.Framework.Assert.IsTrue("path should still exist after restart", fs.Exists( testPath)); } finally { if (cluster != null) { cluster.Shutdown(); } if (secondary != null) { secondary.Shutdown(); } } }
/// <summary>Load the fsimage from a temp file</summary> /// <exception cref="System.IO.IOException"/> private void LoadFSImageFromTempFile(FilePath imageFile) { FSImageFormat.LoaderDelegator loader = FSImageFormat.NewLoader(conf, fsn); fsn.WriteLock(); fsn.GetFSDirectory().WriteLock(); try { loader.Load(imageFile, false); FSImage.UpdateCountForQuota(fsn.GetBlockManager().GetStoragePolicySuite(), INodeDirectory .ValueOf(fsn.GetFSDirectory().GetINode("/"), "/")); } finally { fsn.GetFSDirectory().WriteUnlock(); fsn.WriteUnlock(); } }
/// <summary> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. /// </summary> /// <remarks> /// Make sure that clients will receive StandbyExceptions even when a /// checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer /// thread will have FSNS lock. Regression test for HDFS-4591. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestStandbyExceptionThrownDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Eq(NNStorage.NameNodeFile.Image), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); try { // Perform an RPC to the SBN and make sure it throws a StandbyException. nn1.GetRpcServer().GetFileInfo("/"); NUnit.Framework.Assert.Fail("Should have thrown StandbyException, but instead succeeded." ); } catch (StandbyException se) { GenericTestUtils.AssertExceptionContains("is not supported", se); } // Make sure new incremental block reports are processed during // checkpointing on the SBN. NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetPendingDataNodeMessageCount ()); DoCreate(); Sharpen.Thread.Sleep(1000); NUnit.Framework.Assert.IsTrue(cluster.GetNamesystem(1).GetPendingDataNodeMessageCount () > 0); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); }
/// <exception cref="System.Exception"/> public virtual void TestReadsAllowedDuringCheckpoint() { // Set it up so that we know when the SBN checkpoint starts and ends. FSImage spyImage1 = NameNodeAdapter.SpyOnFsImage(nn1); GenericTestUtils.DelayAnswer answerer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(answerer).When(spyImage1).SaveNamespace(Org.Mockito.Mockito .Any <FSNamesystem>(), Org.Mockito.Mockito.Any <NNStorage.NameNodeFile>(), Org.Mockito.Mockito .Any <Canceler>()); // Perform some edits and wait for a checkpoint to start on the SBN. DoEdits(0, 1000); nn0.GetRpcServer().RollEditLog(); answerer.WaitForCall(); NUnit.Framework.Assert.IsTrue("SBN is not performing checkpoint but it should be." , answerer.GetFireCount() == 1 && answerer.GetResultCount() == 0); // Make sure that the lock has actually been taken by the checkpointing // thread. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); // Perform an RPC that needs to take the write lock. Sharpen.Thread t = new _Thread_404(this); t.Start(); // Make sure that our thread is waiting for the lock. ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().HasQueuedThreads ()); NUnit.Framework.Assert.IsFalse(nn1.GetNamesystem().GetFsLockForTests().IsWriteLocked ()); NUnit.Framework.Assert.IsTrue(nn1.GetNamesystem().GetCpLockForTests().HasQueuedThreads ()); // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to // be taken. string pageContents = DFSTestUtil.UrlGet(new Uri("http://" + nn1.GetHttpAddress() .GetHostName() + ":" + nn1.GetHttpAddress().Port + "/jmx")); NUnit.Framework.Assert.IsTrue(pageContents.Contains("NumLiveDataNodes")); // Make sure that the checkpoint is still going on, implying that the client // RPC to the SBN happened during the checkpoint. NUnit.Framework.Assert.IsTrue("SBN should have still been checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 0); answerer.Proceed(); answerer.WaitForResult(); NUnit.Framework.Assert.IsTrue("SBN should have finished checkpointing.", answerer .GetFireCount() == 1 && answerer.GetResultCount() == 1); t.Join(); }