/// <summary>Create a new checkpoint</summary> /// <exception cref="System.IO.IOException"/> internal virtual void DoCheckpoint() { BackupImage bnImage = GetFSImage(); NNStorage bnStorage = bnImage.GetStorage(); long startTime = Time.MonotonicNow(); bnImage.FreezeNamespaceAtNextRoll(); NamenodeCommand cmd = GetRemoteNamenodeProxy().StartCheckpoint(backupNode.GetRegistration ()); CheckpointCommand cpCmd = null; switch (cmd.GetAction()) { case NamenodeProtocol.ActShutdown: { Shutdown(); throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown." ); } case NamenodeProtocol.ActCheckpoint: { cpCmd = (CheckpointCommand)cmd; break; } default: { throw new IOException("Unsupported NamenodeCommand: " + cmd.GetAction()); } } bnImage.WaitUntilNamespaceFrozen(); CheckpointSignature sig = cpCmd.GetSignature(); // Make sure we're talking to the same NN! sig.ValidateStorageInfo(bnImage); long lastApplied = bnImage.GetLastAppliedTxId(); Log.Debug("Doing checkpoint. Last applied: " + lastApplied); RemoteEditLogManifest manifest = GetRemoteNamenodeProxy().GetEditLogManifest(bnImage .GetLastAppliedTxId() + 1); bool needReloadImage = false; if (!manifest.GetLogs().IsEmpty()) { RemoteEditLog firstRemoteLog = manifest.GetLogs()[0]; // we don't have enough logs to roll forward using only logs. Need // to download and load the image. if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { Log.Info("Unable to roll forward using only logs. Downloading " + "image with txid " + sig.mostRecentCheckpointTxId); MD5Hash downloadedHash = TransferFsImage.DownloadImageToStorage(backupNode.nnHttpAddress , sig.mostRecentCheckpointTxId, bnStorage, true); bnImage.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId , downloadedHash); lastApplied = sig.mostRecentCheckpointTxId; needReloadImage = true; } if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { throw new IOException("No logs to roll forward from " + lastApplied); } // get edits files foreach (RemoteEditLog log in manifest.GetLogs()) { TransferFsImage.DownloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage); } if (needReloadImage) { Log.Info("Loading image with txid " + sig.mostRecentCheckpointTxId); FilePath file = bnStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); bnImage.ReloadFromImageFile(file, backupNode.GetNamesystem()); } RollForwardByApplyingLogs(manifest, bnImage, backupNode.GetNamesystem()); } long txid = bnImage.GetLastAppliedTxId(); backupNode.namesystem.WriteLock(); try { backupNode.namesystem.SetImageLoaded(); if (backupNode.namesystem.GetBlocksTotal() > 0) { backupNode.namesystem.SetBlockTotal(); } bnImage.SaveFSImageInAllDirs(backupNode.GetNamesystem(), txid); bnStorage.WriteAll(); } finally { backupNode.namesystem.WriteUnlock(); } if (cpCmd.NeedToReturnImage()) { TransferFsImage.UploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage, NNStorage.NameNodeFile.Image, txid); } GetRemoteNamenodeProxy().EndCheckpoint(backupNode.GetRegistration(), sig); if (backupNode.GetRole() == HdfsServerConstants.NamenodeRole.Backup) { bnImage.ConvergeJournalSpool(); } backupNode.SetRegistration(); // keep registration up to date long imageSize = bnImage.GetStorage().GetFsImageName(txid).Length(); Log.Info("Checkpoint completed in " + (Time.MonotonicNow() - startTime) / 1000 + " seconds." + " New Image Size: " + imageSize); }
public virtual bool DoCheckpoint() { checkpointImage.EnsureCurrentDirExists(); NNStorage dstStorage = checkpointImage.GetStorage(); // Tell the namenode to start logging transactions in a new edit file // Returns a token that would be used to upload the merged image. CheckpointSignature sig = namenode.RollEditLog(); bool loadImage = false; bool isFreshCheckpointer = (checkpointImage.GetNamespaceID() == 0); bool isSameCluster = (dstStorage.VersionSupportsFederation(NameNodeLayoutVersion. Features) && sig.IsSameCluster(checkpointImage)) || (!dstStorage.VersionSupportsFederation (NameNodeLayoutVersion.Features) && sig.NamespaceIdMatches(checkpointImage)); if (isFreshCheckpointer || (isSameCluster && !sig.StorageVersionMatches(checkpointImage .GetStorage()))) { // if we're a fresh 2NN, or if we're on the same cluster and our storage // needs an upgrade, just take the storage info from the server. dstStorage.SetStorageInfo(sig); dstStorage.SetClusterID(sig.GetClusterID()); dstStorage.SetBlockPoolID(sig.GetBlockpoolID()); loadImage = true; } sig.ValidateStorageInfo(checkpointImage); // error simulation code for junit test CheckpointFaultInjector.GetInstance().AfterSecondaryCallsRollEditLog(); RemoteEditLogManifest manifest = namenode.GetEditLogManifest(sig.mostRecentCheckpointTxId + 1); // Fetch fsimage and edits. Reload the image if previous merge failed. loadImage |= DownloadCheckpointFiles(fsName, checkpointImage, sig, manifest) | checkpointImage .HasMergeError(); try { DoMerge(sig, manifest, loadImage, checkpointImage, namesystem); } catch (IOException ioe) { // A merge error occurred. The in-memory file system state may be // inconsistent, so the image and edits need to be reloaded. checkpointImage.SetMergeError(); throw; } // Clear any error since merge was successful. checkpointImage.ClearMergeError(); // // Upload the new image into the NameNode. Then tell the Namenode // to make this new uploaded image as the most current image. // long txid = checkpointImage.GetLastAppliedTxId(); TransferFsImage.UploadImageFromStorage(fsName, conf, dstStorage, NNStorage.NameNodeFile .Image, txid); // error simulation code for junit test CheckpointFaultInjector.GetInstance().AfterSecondaryUploadsNewImage(); Log.Warn("Checkpoint done. New Image Size: " + dstStorage.GetFsImageName(txid).Length ()); if (legacyOivImageDir != null && !legacyOivImageDir.IsEmpty()) { try { checkpointImage.SaveLegacyOIVImage(namesystem, legacyOivImageDir, new Canceler()); } catch (IOException e) { Log.Warn("Failed to write legacy OIV image: ", e); } } return(loadImage); }