/// <summary> /// Download <code>fsimage</code> and <code>edits</code> /// files from the name-node. /// </summary> /// <returns>true if a new image has been downloaded and needs to be loaded</returns> /// <exception cref="System.IO.IOException"/> internal static bool DownloadCheckpointFiles(Uri nnHostPort, FSImage dstImage, CheckpointSignature sig, RemoteEditLogManifest manifest) { // Sanity check manifest - these could happen if, eg, someone on the // NN side accidentally rmed the storage directories if (manifest.GetLogs().IsEmpty()) { throw new IOException("Found no edit logs to download on NN since txid " + sig.mostRecentCheckpointTxId ); } long expectedTxId = sig.mostRecentCheckpointTxId + 1; if (manifest.GetLogs()[0].GetStartTxId() != expectedTxId) { throw new IOException("Bad edit log manifest (expected txid = " + expectedTxId + ": " + manifest); } try { bool b = UserGroupInformation.GetCurrentUser().DoAs(new _PrivilegedExceptionAction_444 (dstImage, sig, nnHostPort, manifest)); // get fsimage // get edits file // true if we haven't loaded all the transactions represented by the // downloaded fsimage. return(b); } catch (Exception e) { throw new RuntimeException(e); } }
/// <exception cref="System.IO.IOException"/> public virtual void SelectInputStreams(ICollection <EditLogInputStream> streams, long fromTxnId, bool inProgressOk) { QuorumCall <AsyncLogger, RemoteEditLogManifest> q = loggers.GetEditLogManifest(fromTxnId , inProgressOk); IDictionary <AsyncLogger, RemoteEditLogManifest> resps = loggers.WaitForWriteQuorum (q, selectInputStreamsTimeoutMs, "selectInputStreams"); Log.Debug("selectInputStream manifests:\n" + Joiner.On("\n").WithKeyValueSeparator (": ").Join(resps)); PriorityQueue <EditLogInputStream> allStreams = new PriorityQueue <EditLogInputStream >(64, JournalSet.EditLogInputStreamComparator); foreach (KeyValuePair <AsyncLogger, RemoteEditLogManifest> e in resps) { AsyncLogger logger = e.Key; RemoteEditLogManifest manifest = e.Value; foreach (RemoteEditLog remoteLog in manifest.GetLogs()) { Uri url = logger.BuildURLToFetchLogs(remoteLog.GetStartTxId()); EditLogInputStream elis = EditLogFileInputStream.FromUrl(connectionFactory, url, remoteLog.GetStartTxId(), remoteLog.GetEndTxId(), remoteLog.IsInProgress()); allStreams.AddItem(elis); } } JournalSet.ChainAndMakeRedundantStreams(streams, allStreams, fromTxnId); }
public virtual void TestConvertRemoteEditLogManifest() { IList <RemoteEditLog> logs = new AList <RemoteEditLog>(); logs.AddItem(new RemoteEditLog(1, 10)); logs.AddItem(new RemoteEditLog(11, 20)); RemoteEditLogManifest m = new RemoteEditLogManifest(logs); HdfsProtos.RemoteEditLogManifestProto mProto = PBHelper.Convert(m); RemoteEditLogManifest m1 = PBHelper.Convert(mProto); IList <RemoteEditLog> logs1 = m1.GetLogs(); NUnit.Framework.Assert.AreEqual(logs.Count, logs1.Count); for (int i = 0; i < logs.Count; i++) { Compare(logs[i], logs1[i]); } }
/// <exception cref="System.IO.IOException"/> internal static void RollForwardByApplyingLogs(RemoteEditLogManifest manifest, FSImage dstImage, FSNamesystem dstNamesystem) { NNStorage dstStorage = dstImage.GetStorage(); IList <EditLogInputStream> editsStreams = Lists.NewArrayList(); foreach (RemoteEditLog log in manifest.GetLogs()) { if (log.GetEndTxId() > dstImage.GetLastAppliedTxId()) { FilePath f = dstStorage.FindFinalizedEditsFile(log.GetStartTxId(), log.GetEndTxId ()); editsStreams.AddItem(new EditLogFileInputStream(f, log.GetStartTxId(), log.GetEndTxId (), true)); } } Log.Info("Checkpointer about to load edits from " + editsStreams.Count + " stream(s)." ); dstImage.LoadEdits(editsStreams, dstNamesystem); }
/// <summary>Create a new checkpoint</summary> /// <exception cref="System.IO.IOException"/> internal virtual void DoCheckpoint() { BackupImage bnImage = GetFSImage(); NNStorage bnStorage = bnImage.GetStorage(); long startTime = Time.MonotonicNow(); bnImage.FreezeNamespaceAtNextRoll(); NamenodeCommand cmd = GetRemoteNamenodeProxy().StartCheckpoint(backupNode.GetRegistration ()); CheckpointCommand cpCmd = null; switch (cmd.GetAction()) { case NamenodeProtocol.ActShutdown: { Shutdown(); throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown." ); } case NamenodeProtocol.ActCheckpoint: { cpCmd = (CheckpointCommand)cmd; break; } default: { throw new IOException("Unsupported NamenodeCommand: " + cmd.GetAction()); } } bnImage.WaitUntilNamespaceFrozen(); CheckpointSignature sig = cpCmd.GetSignature(); // Make sure we're talking to the same NN! sig.ValidateStorageInfo(bnImage); long lastApplied = bnImage.GetLastAppliedTxId(); Log.Debug("Doing checkpoint. Last applied: " + lastApplied); RemoteEditLogManifest manifest = GetRemoteNamenodeProxy().GetEditLogManifest(bnImage .GetLastAppliedTxId() + 1); bool needReloadImage = false; if (!manifest.GetLogs().IsEmpty()) { RemoteEditLog firstRemoteLog = manifest.GetLogs()[0]; // we don't have enough logs to roll forward using only logs. Need // to download and load the image. if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { Log.Info("Unable to roll forward using only logs. Downloading " + "image with txid " + sig.mostRecentCheckpointTxId); MD5Hash downloadedHash = TransferFsImage.DownloadImageToStorage(backupNode.nnHttpAddress , sig.mostRecentCheckpointTxId, bnStorage, true); bnImage.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId , downloadedHash); lastApplied = sig.mostRecentCheckpointTxId; needReloadImage = true; } if (firstRemoteLog.GetStartTxId() > lastApplied + 1) { throw new IOException("No logs to roll forward from " + lastApplied); } // get edits files foreach (RemoteEditLog log in manifest.GetLogs()) { TransferFsImage.DownloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage); } if (needReloadImage) { Log.Info("Loading image with txid " + sig.mostRecentCheckpointTxId); FilePath file = bnStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId ); bnImage.ReloadFromImageFile(file, backupNode.GetNamesystem()); } RollForwardByApplyingLogs(manifest, bnImage, backupNode.GetNamesystem()); } long txid = bnImage.GetLastAppliedTxId(); backupNode.namesystem.WriteLock(); try { backupNode.namesystem.SetImageLoaded(); if (backupNode.namesystem.GetBlocksTotal() > 0) { backupNode.namesystem.SetBlockTotal(); } bnImage.SaveFSImageInAllDirs(backupNode.GetNamesystem(), txid); bnStorage.WriteAll(); } finally { backupNode.namesystem.WriteUnlock(); } if (cpCmd.NeedToReturnImage()) { TransferFsImage.UploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage, NNStorage.NameNodeFile.Image, txid); } GetRemoteNamenodeProxy().EndCheckpoint(backupNode.GetRegistration(), sig); if (backupNode.GetRole() == HdfsServerConstants.NamenodeRole.Backup) { bnImage.ConvergeJournalSpool(); } backupNode.SetRegistration(); // keep registration up to date long imageSize = bnImage.GetStorage().GetFsImageName(txid).Length(); Log.Info("Checkpoint completed in " + (Time.MonotonicNow() - startTime) / 1000 + " seconds." + " New Image Size: " + imageSize); }