예제 #1
0
        /// <exception cref="System.IO.IOException"/>
        internal static void DoMerge(CheckpointSignature sig, RemoteEditLogManifest manifest
                                     , bool loadImage, FSImage dstImage, FSNamesystem dstNamesystem)
        {
            NNStorage dstStorage = dstImage.GetStorage();

            dstStorage.SetStorageInfo(sig);
            if (loadImage)
            {
                FilePath file = dstStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId
                                                         );
                if (file == null)
                {
                    throw new IOException("Couldn't find image file at txid " + sig.mostRecentCheckpointTxId
                                          + " even though it should have " + "just been downloaded");
                }
                dstNamesystem.WriteLock();
                try
                {
                    dstImage.ReloadFromImageFile(file, dstNamesystem);
                }
                finally
                {
                    dstNamesystem.WriteUnlock();
                }
                dstNamesystem.ImageLoadComplete();
            }
            // error simulation code for junit test
            CheckpointFaultInjector.GetInstance().DuringMerge();
            Checkpointer.RollForwardByApplyingLogs(manifest, dstImage, dstNamesystem);
            // The following has the side effect of purging old fsimages/edit logs.
            dstImage.SaveFSImageInAllDirs(dstNamesystem, dstImage.GetLastAppliedTxId());
            dstStorage.WriteAll();
        }
예제 #2
0
 public _PrivilegedExceptionAction_444(FSImage dstImage, CheckpointSignature sig,
                                       Uri nnHostPort, RemoteEditLogManifest manifest)
 {
     this.dstImage   = dstImage;
     this.sig        = sig;
     this.nnHostPort = nnHostPort;
     this.manifest   = manifest;
 }
예제 #3
0
        public virtual void TestEditLogRolling()
        {
            // start a cluster
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = null;
            FileSystem     fileSys = null;
            AtomicReference <Exception> caughtErr = new AtomicReference <Exception>();

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDataNodes).Build();
                cluster.WaitActive();
                fileSys = cluster.GetFileSystem();
                NamenodeProtocols        nn      = cluster.GetNameNode().GetRpcServer();
                FSImage                  fsimage = cluster.GetNamesystem().GetFSImage();
                Storage.StorageDirectory sd      = fsimage.GetStorage().GetStorageDir(0);
                StartTransactionWorkers(nn, caughtErr);
                long previousLogTxId = 1;
                for (int i = 0; i < NumRolls && caughtErr.Get() == null; i++)
                {
                    try
                    {
                        Sharpen.Thread.Sleep(20);
                    }
                    catch (Exception)
                    {
                    }
                    Log.Info("Starting roll " + i + ".");
                    CheckpointSignature sig = nn.RollEditLog();
                    long   nextLog          = sig.curSegmentTxId;
                    string logFileName      = NNStorage.GetFinalizedEditsFileName(previousLogTxId, nextLog
                                                                                  - 1);
                    previousLogTxId += VerifyEditLogs(cluster.GetNamesystem(), fsimage, logFileName,
                                                      previousLogTxId);
                    NUnit.Framework.Assert.AreEqual(previousLogTxId, nextLog);
                    FilePath expectedLog = NNStorage.GetInProgressEditsFile(sd, previousLogTxId);
                    NUnit.Framework.Assert.IsTrue("Expect " + expectedLog + " to exist", expectedLog.
                                                  Exists());
                }
            }
            finally
            {
                StopTransactionWorkers();
                if (caughtErr.Get() != null)
                {
                    throw new RuntimeException(caughtErr.Get());
                }
                if (fileSys != null)
                {
                    fileSys.Close();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
        /// <summary>Get edits filename</summary>
        /// <returns>edits file name for cluster</returns>
        /// <exception cref="System.IO.IOException"/>
        private string GetEditsFilename(CheckpointSignature sig)
        {
            FSImage image = cluster.GetNameNode().GetFSImage();
            // it was set up to only have ONE StorageDirectory
            IEnumerator <Storage.StorageDirectory> it = image.GetStorage().DirIterator(NNStorage.NameNodeDirType
                                                                                       .Edits);

            Storage.StorageDirectory sd = it.Next();
            FilePath ret = NNStorage.GetFinalizedEditsFile(sd, 1, sig.curSegmentTxId - 1);

            System.Diagnostics.Debug.Assert(ret.Exists(), "expected " + ret + " exists");
            return(ret.GetAbsolutePath());
        }
예제 #5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestSaveWhileEditsRolled()
        {
            Configuration conf = GetConf();

            NameNode.InitMetrics(conf, HdfsServerConstants.NamenodeRole.Namenode);
            DFSTestUtil.FormatNameNode(conf);
            FSNamesystem fsn = FSNamesystem.LoadFromDisk(conf);

            try
            {
                DoAnEdit(fsn, 1);
                CheckpointSignature sig = fsn.RollEditLog();
                Log.Warn("Checkpoint signature: " + sig);
                // Do another edit
                DoAnEdit(fsn, 2);
                // Save namespace
                fsn.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter);
                fsn.SaveNamespace();
                // Now shut down and restart the NN
                fsn.Close();
                fsn = null;
                // Start a new namesystem, which should be able to recover
                // the namespace from the previous incarnation.
                fsn = FSNamesystem.LoadFromDisk(conf);
                // Make sure the image loaded including our edits.
                CheckEditExists(fsn, 1);
                CheckEditExists(fsn, 2);
            }
            finally
            {
                if (fsn != null)
                {
                    fsn.Close();
                }
            }
        }
        /// <summary>Generates edits with all op codes and returns the edits filename</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual string GenerateEdits()
        {
            CheckpointSignature signature = RunOperations();

            return(GetEditsFilename(signature));
        }
예제 #7
0
        /// <summary>Create a new checkpoint</summary>
        /// <exception cref="System.IO.IOException"/>
        internal virtual void DoCheckpoint()
        {
            BackupImage bnImage   = GetFSImage();
            NNStorage   bnStorage = bnImage.GetStorage();
            long        startTime = Time.MonotonicNow();

            bnImage.FreezeNamespaceAtNextRoll();
            NamenodeCommand cmd = GetRemoteNamenodeProxy().StartCheckpoint(backupNode.GetRegistration
                                                                               ());
            CheckpointCommand cpCmd = null;

            switch (cmd.GetAction())
            {
            case NamenodeProtocol.ActShutdown:
            {
                Shutdown();
                throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown."
                                      );
            }

            case NamenodeProtocol.ActCheckpoint:
            {
                cpCmd = (CheckpointCommand)cmd;
                break;
            }

            default:
            {
                throw new IOException("Unsupported NamenodeCommand: " + cmd.GetAction());
            }
            }
            bnImage.WaitUntilNamespaceFrozen();
            CheckpointSignature sig = cpCmd.GetSignature();

            // Make sure we're talking to the same NN!
            sig.ValidateStorageInfo(bnImage);
            long lastApplied = bnImage.GetLastAppliedTxId();

            Log.Debug("Doing checkpoint. Last applied: " + lastApplied);
            RemoteEditLogManifest manifest = GetRemoteNamenodeProxy().GetEditLogManifest(bnImage
                                                                                         .GetLastAppliedTxId() + 1);
            bool needReloadImage = false;

            if (!manifest.GetLogs().IsEmpty())
            {
                RemoteEditLog firstRemoteLog = manifest.GetLogs()[0];
                // we don't have enough logs to roll forward using only logs. Need
                // to download and load the image.
                if (firstRemoteLog.GetStartTxId() > lastApplied + 1)
                {
                    Log.Info("Unable to roll forward using only logs. Downloading " + "image with txid "
                             + sig.mostRecentCheckpointTxId);
                    MD5Hash downloadedHash = TransferFsImage.DownloadImageToStorage(backupNode.nnHttpAddress
                                                                                    , sig.mostRecentCheckpointTxId, bnStorage, true);
                    bnImage.SaveDigestAndRenameCheckpointImage(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId
                                                               , downloadedHash);
                    lastApplied     = sig.mostRecentCheckpointTxId;
                    needReloadImage = true;
                }
                if (firstRemoteLog.GetStartTxId() > lastApplied + 1)
                {
                    throw new IOException("No logs to roll forward from " + lastApplied);
                }
                // get edits files
                foreach (RemoteEditLog log in manifest.GetLogs())
                {
                    TransferFsImage.DownloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage);
                }
                if (needReloadImage)
                {
                    Log.Info("Loading image with txid " + sig.mostRecentCheckpointTxId);
                    FilePath file = bnStorage.FindImageFile(NNStorage.NameNodeFile.Image, sig.mostRecentCheckpointTxId
                                                            );
                    bnImage.ReloadFromImageFile(file, backupNode.GetNamesystem());
                }
                RollForwardByApplyingLogs(manifest, bnImage, backupNode.GetNamesystem());
            }
            long txid = bnImage.GetLastAppliedTxId();

            backupNode.namesystem.WriteLock();
            try
            {
                backupNode.namesystem.SetImageLoaded();
                if (backupNode.namesystem.GetBlocksTotal() > 0)
                {
                    backupNode.namesystem.SetBlockTotal();
                }
                bnImage.SaveFSImageInAllDirs(backupNode.GetNamesystem(), txid);
                bnStorage.WriteAll();
            }
            finally
            {
                backupNode.namesystem.WriteUnlock();
            }
            if (cpCmd.NeedToReturnImage())
            {
                TransferFsImage.UploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage,
                                                       NNStorage.NameNodeFile.Image, txid);
            }
            GetRemoteNamenodeProxy().EndCheckpoint(backupNode.GetRegistration(), sig);
            if (backupNode.GetRole() == HdfsServerConstants.NamenodeRole.Backup)
            {
                bnImage.ConvergeJournalSpool();
            }
            backupNode.SetRegistration();
            // keep registration up to date
            long imageSize = bnImage.GetStorage().GetFsImageName(txid).Length();

            Log.Info("Checkpoint completed in " + (Time.MonotonicNow() - startTime) / 1000 +
                     " seconds." + " New Image Size: " + imageSize);
        }
예제 #8
0
        public virtual bool DoCheckpoint()
        {
            checkpointImage.EnsureCurrentDirExists();
            NNStorage dstStorage = checkpointImage.GetStorage();
            // Tell the namenode to start logging transactions in a new edit file
            // Returns a token that would be used to upload the merged image.
            CheckpointSignature sig  = namenode.RollEditLog();
            bool loadImage           = false;
            bool isFreshCheckpointer = (checkpointImage.GetNamespaceID() == 0);
            bool isSameCluster       = (dstStorage.VersionSupportsFederation(NameNodeLayoutVersion.
                                                                             Features) && sig.IsSameCluster(checkpointImage)) || (!dstStorage.VersionSupportsFederation
                                                                                                                                      (NameNodeLayoutVersion.Features) && sig.NamespaceIdMatches(checkpointImage));

            if (isFreshCheckpointer || (isSameCluster && !sig.StorageVersionMatches(checkpointImage
                                                                                    .GetStorage())))
            {
                // if we're a fresh 2NN, or if we're on the same cluster and our storage
                // needs an upgrade, just take the storage info from the server.
                dstStorage.SetStorageInfo(sig);
                dstStorage.SetClusterID(sig.GetClusterID());
                dstStorage.SetBlockPoolID(sig.GetBlockpoolID());
                loadImage = true;
            }
            sig.ValidateStorageInfo(checkpointImage);
            // error simulation code for junit test
            CheckpointFaultInjector.GetInstance().AfterSecondaryCallsRollEditLog();
            RemoteEditLogManifest manifest = namenode.GetEditLogManifest(sig.mostRecentCheckpointTxId
                                                                         + 1);

            // Fetch fsimage and edits. Reload the image if previous merge failed.
            loadImage |= DownloadCheckpointFiles(fsName, checkpointImage, sig, manifest) | checkpointImage
                         .HasMergeError();
            try
            {
                DoMerge(sig, manifest, loadImage, checkpointImage, namesystem);
            }
            catch (IOException ioe)
            {
                // A merge error occurred. The in-memory file system state may be
                // inconsistent, so the image and edits need to be reloaded.
                checkpointImage.SetMergeError();
                throw;
            }
            // Clear any error since merge was successful.
            checkpointImage.ClearMergeError();
            //
            // Upload the new image into the NameNode. Then tell the Namenode
            // to make this new uploaded image as the most current image.
            //
            long txid = checkpointImage.GetLastAppliedTxId();

            TransferFsImage.UploadImageFromStorage(fsName, conf, dstStorage, NNStorage.NameNodeFile
                                                   .Image, txid);
            // error simulation code for junit test
            CheckpointFaultInjector.GetInstance().AfterSecondaryUploadsNewImage();
            Log.Warn("Checkpoint done. New Image Size: " + dstStorage.GetFsImageName(txid).Length
                         ());
            if (legacyOivImageDir != null && !legacyOivImageDir.IsEmpty())
            {
                try
                {
                    checkpointImage.SaveLegacyOIVImage(namesystem, legacyOivImageDir, new Canceler());
                }
                catch (IOException e)
                {
                    Log.Warn("Failed to write legacy OIV image: ", e);
                }
            }
            return(loadImage);
        }
예제 #9
0
        /// <summary>
        /// Download <code>fsimage</code> and <code>edits</code>
        /// files from the name-node.
        /// </summary>
        /// <returns>true if a new image has been downloaded and needs to be loaded</returns>
        /// <exception cref="System.IO.IOException"/>
        internal static bool DownloadCheckpointFiles(Uri nnHostPort, FSImage dstImage, CheckpointSignature
                                                     sig, RemoteEditLogManifest manifest)
        {
            // Sanity check manifest - these could happen if, eg, someone on the
            // NN side accidentally rmed the storage directories
            if (manifest.GetLogs().IsEmpty())
            {
                throw new IOException("Found no edit logs to download on NN since txid " + sig.mostRecentCheckpointTxId
                                      );
            }
            long expectedTxId = sig.mostRecentCheckpointTxId + 1;

            if (manifest.GetLogs()[0].GetStartTxId() != expectedTxId)
            {
                throw new IOException("Bad edit log manifest (expected txid = " + expectedTxId +
                                      ": " + manifest);
            }
            try
            {
                bool b = UserGroupInformation.GetCurrentUser().DoAs(new _PrivilegedExceptionAction_444
                                                                        (dstImage, sig, nnHostPort, manifest));
                // get fsimage
                // get edits file
                // true if we haven't loaded all the transactions represented by the
                // downloaded fsimage.
                return(b);
            }
            catch (Exception e)
            {
                throw new RuntimeException(e);
            }
        }