Example #1
0
        public virtual void TestBlockRecoveryWithLessMetafile()
        {
            Configuration conf = new Configuration();

            conf.Set(DFSConfigKeys.DfsBlockLocalPathAccessUserKey, UserGroupInformation.GetCurrentUser
                         ().GetShortUserName());
            cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build();
            Path file = new Path("/testRecoveryFile");
            DistributedFileSystem dfs  = cluster.GetFileSystem();
            FSDataOutputStream    @out = dfs.Create(file);
            int count = 0;

            while (count < 2 * 1024 * 1024)
            {
                @out.WriteBytes("Data");
                count += 4;
            }
            @out.Hsync();
            // abort the original stream
            ((DFSOutputStream)@out.GetWrappedStream()).Abort();
            LocatedBlocks locations = cluster.GetNameNodeRpc().GetBlockLocations(file.ToString
                                                                                     (), 0, count);
            ExtendedBlock      block         = locations.Get(0).GetBlock();
            DataNode           dn            = cluster.GetDataNodes()[0];
            BlockLocalPathInfo localPathInfo = dn.GetBlockLocalPathInfo(block, null);
            FilePath           metafile      = new FilePath(localPathInfo.GetMetaPath());

            NUnit.Framework.Assert.IsTrue(metafile.Exists());
            // reduce the block meta file size
            RandomAccessFile raf = new RandomAccessFile(metafile, "rw");

            raf.SetLength(metafile.Length() - 20);
            raf.Close();
            // restart DN to make replica to RWR
            MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0);
            cluster.RestartDataNode(dnProp, true);
            // try to recover the lease
            DistributedFileSystem newdfs = (DistributedFileSystem)FileSystem.NewInstance(cluster
                                                                                         .GetConfiguration(0));

            count = 0;
            while (++count < 10 && !newdfs.RecoverLease(file))
            {
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.IsTrue("File should be closed", newdfs.RecoverLease(file));
        }
Example #2
0
        /// <summary>Test NN crash and client crash/stuck immediately after block allocation</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestOpenFileWhenNNAndClientCrashAfterAddBlock()
        {
            cluster.GetConfiguration(0).Set(DFSConfigKeys.DfsNamenodeSafemodeThresholdPctKey,
                                            "1.0f");
            string testData = "testData";

            // to make sure we write the full block before creating dummy block at NN.
            cluster.GetConfiguration(0).SetInt("io.bytes.per.checksum", testData.Length);
            cluster.RestartNameNode(0);
            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                cluster.TransitionToStandby(1);
                DistributedFileSystem dfs     = cluster.GetFileSystem(0);
                string             pathString = "/tmp1.txt";
                Path               filePath   = new Path(pathString);
                FSDataOutputStream create     = dfs.Create(filePath, FsPermission.GetDefault(), true,
                                                           1024, (short)3, testData.Length, null);
                create.Write(Sharpen.Runtime.GetBytesForString(testData));
                create.Hflush();
                long       fileId     = ((DFSOutputStream)create.GetWrappedStream()).GetFileId();
                FileStatus fileStatus = dfs.GetFileStatus(filePath);
                DFSClient  client     = DFSClientAdapter.GetClient(dfs);
                // add one dummy block at NN, but not write to DataNode
                ExtendedBlock previousBlock = DFSClientAdapter.GetPreviousBlock(client, fileId);
                DFSClientAdapter.GetNamenode(client).AddBlock(pathString, client.GetClientName(),
                                                              new ExtendedBlock(previousBlock), new DatanodeInfo[0], DFSClientAdapter.GetFileId
                                                                  ((DFSOutputStream)create.GetWrappedStream()), null);
                cluster.RestartNameNode(0, true);
                cluster.RestartDataNode(0);
                cluster.TransitionToActive(0);
                // let the block reports be processed.
                Sharpen.Thread.Sleep(2000);
                FSDataInputStream @is = dfs.Open(filePath);
                @is.Close();
                dfs.RecoverLease(filePath);
                // initiate recovery
                NUnit.Framework.Assert.IsTrue("Recovery also should be success", dfs.RecoverLease
                                                  (filePath));
            }
            finally
            {
                cluster.Shutdown();
            }
        }
 /// <exception cref="System.Exception"/>
 private void RecoverLease(Path filepath, DistributedFileSystem dfs)
 {
     if (dfs == null)
     {
         dfs = (DistributedFileSystem)GetFSAsAnotherUser(conf);
     }
     while (!dfs.RecoverLease(filepath))
     {
         AppendTestUtil.Log.Info("sleep " + 5000 + "ms");
         Sharpen.Thread.Sleep(5000);
     }
 }
Example #4
0
        /// <summary>Test append over storage quota does not mark file as UC or create lease</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestAppendOverStorageQuota()
        {
            Path dir  = new Path("/TestAppendOverQuota");
            Path file = new Path(dir, "file");

            // create partial block file
            dfs.Mkdirs(dir);
            DFSTestUtil.CreateFile(dfs, file, Blocksize / 2, Replication, seed);
            // lower quota to cause exception when appending to partial block
            dfs.SetQuota(dir, long.MaxValue - 1, 1);
            INodeDirectory dirNode   = fsdir.GetINode4Write(dir.ToString()).AsDirectory();
            long           spaceUsed = dirNode.GetDirectoryWithQuotaFeature().GetSpaceConsumed().GetStorageSpace
                                           ();

            try
            {
                DFSTestUtil.AppendFile(dfs, file, Blocksize);
                NUnit.Framework.Assert.Fail("append didn't fail");
            }
            catch (DSQuotaExceededException)
            {
            }
            // ignore
            // check that the file exists, isn't UC, and has no dangling lease
            INodeFile inode = fsdir.GetINode(file.ToString()).AsFile();

            NUnit.Framework.Assert.IsNotNull(inode);
            NUnit.Framework.Assert.IsFalse("should not be UC", inode.IsUnderConstruction());
            NUnit.Framework.Assert.IsNull("should not have a lease", cluster.GetNamesystem().
                                          GetLeaseManager().GetLeaseByPath(file.ToString()));
            // make sure the quota usage is unchanged
            long newSpaceUsed = dirNode.GetDirectoryWithQuotaFeature().GetSpaceConsumed().GetStorageSpace
                                    ();

            NUnit.Framework.Assert.AreEqual(spaceUsed, newSpaceUsed);
            // make sure edits aren't corrupted
            dfs.RecoverLease(file);
            cluster.RestartNameNodes();
        }
Example #5
0
        public virtual void TestLeaseRecoveryAndAppend()
        {
            Configuration conf = new Configuration();

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build();
                Path file = new Path("/testLeaseRecovery");
                DistributedFileSystem dfs = cluster.GetFileSystem();
                // create a file with 0 bytes
                FSDataOutputStream @out = dfs.Create(file);
                @out.Hflush();
                @out.Hsync();
                // abort the original stream
                ((DFSOutputStream)@out.GetWrappedStream()).Abort();
                DistributedFileSystem newdfs = (DistributedFileSystem)FileSystem.NewInstance(cluster
                                                                                             .GetConfiguration(0));
                // Append to a file , whose lease is held by another client should fail
                try
                {
                    newdfs.Append(file);
                    NUnit.Framework.Assert.Fail("Append to a file(lease is held by another client) should fail"
                                                );
                }
                catch (RemoteException e)
                {
                    NUnit.Framework.Assert.IsTrue(e.Message.Contains("file lease is currently owned")
                                                  );
                }
                // Lease recovery on first try should be successful
                bool recoverLease = newdfs.RecoverLease(file);
                NUnit.Framework.Assert.IsTrue(recoverLease);
                FSDataOutputStream append = newdfs.Append(file);
                append.Write(Sharpen.Runtime.GetBytesForString("test"));
                append.Close();
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                    cluster = null;
                }
            }
        }
        /// <summary>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed.
        /// </summary>
        /// <remarks>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed. The
        /// DN running the recovery should then fail to commit the synchronization
        /// and a later retry will succeed.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestFailoverRightBeforeCommitSynchronization()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a half block
                AppendTestUtil.Write(stm, 0, BlockSize / 2);
                stm.Hflush();
                // Look into the block manager on the active node for the block
                // under construction.
                NameNode           nn0             = cluster.GetNameNode(0);
                ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, TestPath);
                DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk);
                Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                // Find the corresponding DN daemon, and spy on its connection to the
                // active.
                DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN
                                                                                               , nn0);
                // Delay the commitBlockSynchronization call
                GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                             .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                             .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                 (), (string[])Org.Mockito.Mockito.AnyObject());
                // new genstamp
                // new length
                // close file
                // delete block
                // new targets
                // new target storages
                DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath));
                Log.Info("Waiting for commitBlockSynchronization call from primary");
                delayer.WaitForCall();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                // Let the commitBlockSynchronization call go through, and check that
                // it failed with the correct exception.
                delayer.Proceed();
                delayer.WaitForResult();
                Exception t = delayer.GetThrown();
                if (t == null)
                {
                    NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby"
                                                );
                }
                GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported"
                                                         , t);
                // Now, if we try again to recover the block, it should succeed on the new
                // active.
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockSize / 2);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Example #7
0
        /// <summary>Test race between delete operation and commitBlockSynchronization method.
        ///     </summary>
        /// <remarks>
        /// Test race between delete operation and commitBlockSynchronization method.
        /// See HDFS-6825.
        /// </remarks>
        /// <param name="hasSnapshot"/>
        /// <exception cref="System.Exception"/>
        private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot)
        {
            Log.Info("Start testing, hasSnapshot: " + hasSnapshot);
            AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry
                                                                                           <string, bool> >();

            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file",
                                                                                 false));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1"
                                                                                 , true));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file"
                                                                                 , false));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1"
                                                                                 , true));
            Path          rootPath = new Path("/");
            Configuration conf     = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm = null;
            IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary
                                                                                   <DataNode, DatanodeProtocolClientSideTranslatorPB>();

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();
                cluster.WaitActive();
                DistributedFileSystem fs = cluster.GetFileSystem();
                int stId = 0;
                foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList)
                {
                    string testPath  = stest.Key;
                    bool   mkSameDir = stest.Value;
                    Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot
                             );
                    Path fPath = new Path(testPath);
                    //find grandest non-root parent
                    Path grandestNonRootParent = fPath;
                    while (!grandestNonRootParent.GetParent().Equals(rootPath))
                    {
                        grandestNonRootParent = grandestNonRootParent.GetParent();
                    }
                    stm = fs.Create(fPath);
                    Log.Info("test on " + testPath + " created " + fPath);
                    // write a half block
                    AppendTestUtil.Write(stm, 0, BlockSize / 2);
                    stm.Hflush();
                    if (hasSnapshot)
                    {
                        SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString());
                        ++stId;
                    }
                    // Look into the block manager on the active node for the block
                    // under construction.
                    NameNode           nn              = cluster.GetNameNode();
                    ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, fPath);
                    DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk);
                    Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                    // Find the corresponding DN daemon, and spy on its connection to the
                    // active.
                    DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                    DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN];
                    if (nnSpy == null)
                    {
                        nnSpy            = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn);
                        dnMap[primaryDN] = nnSpy;
                    }
                    // Delay the commitBlockSynchronization call
                    GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                    Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                                 .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                                 .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                     (), (string[])Org.Mockito.Mockito.AnyObject());
                    // new genstamp
                    // new length
                    // close file
                    // delete block
                    // new targets
                    // new target storages
                    fs.RecoverLease(fPath);
                    Log.Info("Waiting for commitBlockSynchronization call from primary");
                    delayer.WaitForCall();
                    Log.Info("Deleting recursively " + grandestNonRootParent);
                    fs.Delete(grandestNonRootParent, true);
                    if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath))
                    {
                        Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath);
                        fs.Mkdirs(grandestNonRootParent);
                    }
                    delayer.Proceed();
                    Log.Info("Now wait for result");
                    delayer.WaitForResult();
                    Exception t = delayer.GetThrown();
                    if (t != null)
                    {
                        Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t);
                    }
                }
                // end of loop each fPath
                Log.Info("Now check we can restart");
                cluster.RestartNameNodes();
                Log.Info("Restart finished");
            }
            finally
            {
                if (stm != null)
                {
                    IOUtils.CloseStream(stm);
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Example #8
0
            /// <exception cref="System.IO.IOException"/>
            internal override int Run(IList <string> args)
            {
                if (args.Count == 0)
                {
                    System.Console.Out.WriteLine(this.usageText);
                    System.Console.Out.WriteLine(this.helpText + "\n");
                    return(1);
                }
                string pathStr    = StringUtils.PopOptionWithArgument("-path", args);
                string retriesStr = StringUtils.PopOptionWithArgument("-retries", args);

                if (pathStr == null)
                {
                    System.Console.Error.WriteLine("You must supply a -path argument to " + "recoverLease."
                                                   );
                    return(1);
                }
                int maxRetries = 1;

                if (retriesStr != null)
                {
                    try
                    {
                        maxRetries = System.Convert.ToInt32(retriesStr);
                    }
                    catch (FormatException e)
                    {
                        System.Console.Error.WriteLine("Failed to parse the argument to -retries: " + StringUtils
                                                       .StringifyException(e));
                        return(1);
                    }
                }
                FileSystem fs;

                try
                {
                    fs = FileSystem.NewInstance(new URI(pathStr), this._enclosing.GetConf(), null);
                }
                catch (URISyntaxException e)
                {
                    System.Console.Error.WriteLine("URISyntaxException for " + pathStr + ":" + StringUtils
                                                   .StringifyException(e));
                    return(1);
                }
                catch (Exception e)
                {
                    System.Console.Error.WriteLine("InterruptedException for " + pathStr + ":" + StringUtils
                                                   .StringifyException(e));
                    return(1);
                }
                DistributedFileSystem dfs = null;

                try
                {
                    dfs = (DistributedFileSystem)fs;
                }
                catch (InvalidCastException)
                {
                    System.Console.Error.WriteLine("Invalid filesystem for path " + pathStr + ": " +
                                                   "needed scheme hdfs, but got: " + fs.GetScheme());
                    return(1);
                }
                for (int retry = 0; true;)
                {
                    bool        recovered = false;
                    IOException ioe       = null;
                    try
                    {
                        recovered = dfs.RecoverLease(new Path(pathStr));
                    }
                    catch (IOException e)
                    {
                        ioe = e;
                    }
                    if (recovered)
                    {
                        System.Console.Out.WriteLine("recoverLease SUCCEEDED on " + pathStr);
                        return(0);
                    }
                    if (ioe != null)
                    {
                        System.Console.Error.WriteLine("recoverLease got exception: ");
                        Sharpen.Runtime.PrintStackTrace(ioe);
                    }
                    else
                    {
                        System.Console.Error.WriteLine("recoverLease returned false.");
                    }
                    retry++;
                    if (retry >= maxRetries)
                    {
                        break;
                    }
                    System.Console.Error.WriteLine("Retrying in " + DebugAdmin.RecoverLeaseCommand.TimeoutMs
                                                   + " ms...");
                    Uninterruptibles.SleepUninterruptibly(DebugAdmin.RecoverLeaseCommand.TimeoutMs, TimeUnit
                                                          .Milliseconds);
                    System.Console.Error.WriteLine("Retry #" + retry);
                }
                System.Console.Error.WriteLine("Giving up on recoverLease for " + pathStr + " after "
                                               + maxRetries + (maxRetries == 1 ? " try." : " tries."));
                return(1);
            }