/// <exception cref="System.IO.IOException"/> internal static void DoMerge(CheckpointSignature sig, RemoteEditLogManifest manifest , bool loadImage, FSImage dstImage, FSNamesystem dstNamesystem) { NNStorage dstStorage = dstImage.GetStorage(); dstStorage.SetStorageInfo(sig); if (loadImage) { FilePath file = dstStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); if (file == null) { throw new IOException("Couldn't find image file at txid " + sig.mostRecentCheckpointTxId + " even though it should have " + "just been downloaded"); } dstNamesystem.WriteLock(); try { dstImage.ReloadFromImageFile(file, dstNamesystem); } finally { dstNamesystem.WriteUnlock(); } dstNamesystem.ImageLoadComplete(); } // error simulation code for junit test CheckpointFaultInjector.GetInstance().DuringMerge(); Checkpointer.RollForwardByApplyingLogs(manifest, dstImage, dstNamesystem); // The following has the side effect of purging old fsimages/edit logs. dstImage.SaveFSImageInAllDirs(dstNamesystem, dstImage.GetLastAppliedTxId()); dstStorage.WriteAll(); }
public _PrivilegedExceptionAction_444(FSImage dstImage, CheckpointSignature sig, Uri nnHostPort, RemoteEditLogManifest manifest) { this.dstImage = dstImage; this.sig = sig; this.nnHostPort = nnHostPort; this.manifest = manifest; }
public virtual void TestEditLogRolling() { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; AtomicReference <Exception> caughtErr = new AtomicReference <Exception>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build(); cluster.WaitActive(); fileSys = cluster.GetFileSystem(); NamenodeProtocols nn = cluster.GetNameNode().GetRpcServer(); FSImage fsimage = cluster.GetNamesystem().GetFSImage(); Storage.StorageDirectory sd = fsimage.GetStorage().GetStorageDir(0); StartTransactionWorkers(nn, caughtErr); long previousLogTxId = 1; for (int i = 0; i < NumRolls && caughtErr.Get() == null; i++) { try { Sharpen.Thread.Sleep(20); } catch (Exception) { } Log.Info("Starting roll " + i + "."); CheckpointSignature sig = nn.RollEditLog(); long nextLog = sig.curSegmentTxId; string logFileName = NNStorage.GetFinalizedEditsFileName(previousLogTxId, nextLog - 1); previousLogTxId += VerifyEditLogs(cluster.GetNamesystem(), fsimage, logFileName, previousLogTxId); NUnit.Framework.Assert.AreEqual(previousLogTxId, nextLog); FilePath expectedLog = NNStorage.GetInProgressEditsFile(sd, previousLogTxId); NUnit.Framework.Assert.IsTrue("Expect " + expectedLog + " to exist", expectedLog. Exists()); } } finally { StopTransactionWorkers(); if (caughtErr.Get() != null) { throw new RuntimeException(caughtErr.Get()); } if (fileSys != null) { fileSys.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>Get edits filename</summary> /// <returns>edits file name for cluster</returns> /// <exception cref="System.IO.IOException"/> private string GetEditsFilename(CheckpointSignature sig) { FSImage image = cluster.GetNameNode().GetFSImage(); // it was set up to only have ONE StorageDirectory IEnumerator <Storage.StorageDirectory> it = image.GetStorage().DirIterator(NNStorage.NameNodeDirType .Edits); Storage.StorageDirectory sd = it.Next(); FilePath ret = NNStorage.GetFinalizedEditsFile(sd, 1, sig.curSegmentTxId - 1); System.Diagnostics.Debug.Assert(ret.Exists(), "expected " + ret + " exists"); return(ret.GetAbsolutePath()); }
/// <exception cref="System.Exception"/> public virtual void TestSaveWhileEditsRolled() { Configuration conf = GetConf(); NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode); DFSTestUtil.FormatNameNode(conf); FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf); try { DoAnEdit(fsn, 1); CheckpointSignature sig = fsn.RollEditLog(); Log.Warn("Checkpoint signature: " + sig); // Do another edit DoAnEdit(fsn, 2); // Save namespace fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter); fsn.SaveNamespace(); // Now shut down and restart the NN fsn.Close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. fsn = FSNamesystem.LoadFromDisk(conf); // Make sure the image loaded including our edits. CheckEditExists(fsn, 1); CheckEditExists(fsn, 2); } finally { if (fsn != null) { fsn.Close(); } } }
/// <summary>Generates edits with all op codes and returns the edits filename</summary> /// <exception cref="System.IO.IOException"/> public virtual string GenerateEdits() { CheckpointSignature signature = RunOperations(); return(GetEditsFilename(signature)); }
/// <summary>Create a new checkpoint</summary> /// <exception cref="System.IO.IOException"/> internal virtual void DoCheckpoint() { BackupImage bnImage = GetFSImage(); NNStorage bnStorage = bnImage.GetStorage(); long startTime = Time.MonotonicNow(); bnImage.FreezeNamespaceAtNextRoll(); NamenodeCommand cmd = GetRemoteNamenodeProxy().StartCheckpoint(backupNode.GetRegistration ()); CheckpointCommand cpCmd = null; switch (cmd.GetAction()) { case NamenodeProtocol.ActShutdown: { Shutdown(); throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown." ); } case NamenodeProtocol.ActCheckpoint: { cpCmd = (CheckpointCommand)cmd; break; } default: { throw new IOException("Unsupported NamenodeCommand: " + cmd.GetAction()); } } bnImage.WaitUntilNamespaceFrozen(); CheckpointSignature sig = cpCmd.GetSignature(); // Make sure we're talking to the same NN! sig.ValidateStorageInfo(bnImage); long lastApplied = bnImage.GetLastAppliedTxId(); Log.Debug("Doing checkpoint. Last applied: " + lastApplied); RemoteEditLogManifest manifest = GetRemoteNamenodeProxy().GetEditLogManifest(bnImage .GetLastAppliedTxId() + 1); bool needReloadImage = false; if (!manifest.GetLogs().IsEmpty()) { RemoteEditLog firstRemoteLog = manifest.GetLogs()[0]; // we don't have enough logs to roll forward using only logs. Need // to download and load the image. if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { Log.Info("Unable to roll forward using only logs. Downloading " + "image with txid " + sig.mostRecentCheckpointTxId); MD5Hash downloadedHash = TransferFsImage.DownloadImageToStorage(backupNode.nnHttpAddress , sig.mostRecentCheckpointTxId, bnStorage, true); bnImage.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId , downloadedHash); lastApplied = sig.mostRecentCheckpointTxId; needReloadImage = true; } if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { throw new IOException("No logs to roll forward from " + lastApplied); } // get edits files foreach (RemoteEditLog log in manifest.GetLogs()) { TransferFsImage.DownloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage); } if (needReloadImage) { Log.Info("Loading image with txid " + sig.mostRecentCheckpointTxId); FilePath file = bnStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); bnImage.ReloadFromImageFile(file, backupNode.GetNamesystem()); } RollForwardByApplyingLogs(manifest, bnImage, backupNode.GetNamesystem()); } long txid = bnImage.GetLastAppliedTxId(); backupNode.namesystem.WriteLock(); try { backupNode.namesystem.SetImageLoaded(); if (backupNode.namesystem.GetBlocksTotal() > 0) { backupNode.namesystem.SetBlockTotal(); } bnImage.SaveFSImageInAllDirs(backupNode.GetNamesystem(), txid); bnStorage.WriteAll(); } finally { backupNode.namesystem.WriteUnlock(); } if (cpCmd.NeedToReturnImage()) { TransferFsImage.UploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage, NNStorage.NameNodeFile.Image, txid); } GetRemoteNamenodeProxy().EndCheckpoint(backupNode.GetRegistration(), sig); if (backupNode.GetRole() == HdfsServerConstants.NamenodeRole.Backup) { bnImage.ConvergeJournalSpool(); } backupNode.SetRegistration(); // keep registration up to date long imageSize = bnImage.GetStorage().GetFsImageName(txid).Length(); Log.Info("Checkpoint completed in " + (Time.MonotonicNow() - startTime) / 1000 + " seconds." + " New Image Size: " + imageSize); }
public virtual bool DoCheckpoint() { checkpointImage.EnsureCurrentDirExists(); NNStorage dstStorage = checkpointImage.GetStorage(); // Tell the namenode to start logging transactions in a new edit file // Returns a token that would be used to upload the merged image. CheckpointSignature sig = namenode.RollEditLog(); bool loadImage = false; bool isFreshCheckpointer = (checkpointImage.GetNamespaceID() == 0); bool isSameCluster = (dstStorage.VersionSupportsFederation(NameNodeLayoutVersion. Features) && sig.IsSameCluster(checkpointImage)) || (!dstStorage.VersionSupportsFederation (NameNodeLayoutVersion.Features) && sig.NamespaceIdMatches(checkpointImage)); if (isFreshCheckpointer || (isSameCluster && !sig.StorageVersionMatches(checkpointImage .GetStorage()))) { // if we're a fresh 2NN, or if we're on the same cluster and our storage // needs an upgrade, just take the storage info from the server. dstStorage.SetStorageInfo(sig); dstStorage.SetClusterID(sig.GetClusterID()); dstStorage.SetBlockPoolID(sig.GetBlockpoolID()); loadImage = true; } sig.ValidateStorageInfo(checkpointImage); // error simulation code for junit test CheckpointFaultInjector.GetInstance().AfterSecondaryCallsRollEditLog(); RemoteEditLogManifest manifest = namenode.GetEditLogManifest(sig.mostRecentCheckpointTxId + 1); // Fetch fsimage and edits. Reload the image if previous merge failed. loadImage |= DownloadCheckpointFiles(fsName, checkpointImage, sig, manifest) | checkpointImage .HasMergeError(); try { DoMerge(sig, manifest, loadImage, checkpointImage, namesystem); } catch (IOException ioe) { // A merge error occurred. The in-memory file system state may be // inconsistent, so the image and edits need to be reloaded. checkpointImage.SetMergeError(); throw; } // Clear any error since merge was successful. checkpointImage.ClearMergeError(); // // Upload the new image into the NameNode. Then tell the Namenode // to make this new uploaded image as the most current image. // long txid = checkpointImage.GetLastAppliedTxId(); TransferFsImage.UploadImageFromStorage(fsName, conf, dstStorage, NNStorage.NameNodeFile .Image, txid); // error simulation code for junit test CheckpointFaultInjector.GetInstance().AfterSecondaryUploadsNewImage(); Log.Warn("Checkpoint done. New Image Size: " + dstStorage.GetFsImageName(txid).Length ()); if (legacyOivImageDir != null && !legacyOivImageDir.IsEmpty()) { try { checkpointImage.SaveLegacyOIVImage(namesystem, legacyOivImageDir, new Canceler()); } catch (IOException e) { Log.Warn("Failed to write legacy OIV image: ", e); } } return(loadImage); }
/// <summary> /// Download <code>fsimage</code> and <code>edits</code> /// files from the name-node. /// </summary> /// <returns>true if a new image has been downloaded and needs to be loaded</returns> /// <exception cref="System.IO.IOException"/> internal static bool DownloadCheckpointFiles(Uri nnHostPort, FSImage dstImage, CheckpointSignature sig, RemoteEditLogManifest manifest) { // Sanity check manifest - these could happen if, eg, someone on the // NN side accidentally rmed the storage directories if (manifest.GetLogs().IsEmpty()) { throw new IOException("Found no edit logs to download on NN since txid " + sig.mostRecentCheckpointTxId ); } long expectedTxId = sig.mostRecentCheckpointTxId + 1; if (manifest.GetLogs()[0].GetStartTxId() != expectedTxId) { throw new IOException("Bad edit log manifest (expected txid = " + expectedTxId + ": " + manifest); } try { bool b = UserGroupInformation.GetCurrentUser().DoAs(new _PrivilegedExceptionAction_444 (dstImage, sig, nnHostPort, manifest)); // get fsimage // get edits file // true if we haven't loaded all the transactions represented by the // downloaded fsimage. return(b); } catch (Exception e) { throw new RuntimeException(e); } }