public virtual void TestBlockRecoveryWithLessMetafile() { Configuration conf = new Configuration(); conf.Set(DFSConfigKeys.DfsBlockLocalPathAccessUserKey, UserGroupInformation.GetCurrentUser ().GetShortUserName()); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); Path file = new Path("/testRecoveryFile"); DistributedFileSystem dfs = cluster.GetFileSystem(); FSDataOutputStream @out = dfs.Create(file); int count = 0; while (count < 2 * 1024 * 1024) { @out.WriteBytes("Data"); count += 4; } @out.Hsync(); // abort the original stream ((DFSOutputStream)@out.GetWrappedStream()).Abort(); LocatedBlocks locations = cluster.GetNameNodeRpc().GetBlockLocations(file.ToString (), 0, count); ExtendedBlock block = locations.Get(0).GetBlock(); DataNode dn = cluster.GetDataNodes()[0]; BlockLocalPathInfo localPathInfo = dn.GetBlockLocalPathInfo(block, null); FilePath metafile = new FilePath(localPathInfo.GetMetaPath()); NUnit.Framework.Assert.IsTrue(metafile.Exists()); // reduce the block meta file size RandomAccessFile raf = new RandomAccessFile(metafile, "rw"); raf.SetLength(metafile.Length() - 20); raf.Close(); // restart DN to make replica to RWR MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0); cluster.RestartDataNode(dnProp, true); // try to recover the lease DistributedFileSystem newdfs = (DistributedFileSystem)FileSystem.NewInstance(cluster .GetConfiguration(0)); count = 0; while (++count < 10 && !newdfs.RecoverLease(file)) { Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue("File should be closed", newdfs.RecoverLease(file)); }
/// <summary>Test NN crash and client crash/stuck immediately after block allocation</summary> /// <exception cref="System.Exception"/> public virtual void TestOpenFileWhenNNAndClientCrashAfterAddBlock() { cluster.GetConfiguration(0).Set(DFSConfigKeys.DfsNamenodeSafemodeThresholdPctKey, "1.0f"); string testData = "testData"; // to make sure we write the full block before creating dummy block at NN. cluster.GetConfiguration(0).SetInt("io.bytes.per.checksum", testData.Length); cluster.RestartNameNode(0); try { cluster.WaitActive(); cluster.TransitionToActive(0); cluster.TransitionToStandby(1); DistributedFileSystem dfs = cluster.GetFileSystem(0); string pathString = "/tmp1.txt"; Path filePath = new Path(pathString); FSDataOutputStream create = dfs.Create(filePath, FsPermission.GetDefault(), true, 1024, (short)3, testData.Length, null); create.Write(Sharpen.Runtime.GetBytesForString(testData)); create.Hflush(); long fileId = ((DFSOutputStream)create.GetWrappedStream()).GetFileId(); FileStatus fileStatus = dfs.GetFileStatus(filePath); DFSClient client = DFSClientAdapter.GetClient(dfs); // add one dummy block at NN, but not write to DataNode ExtendedBlock previousBlock = DFSClientAdapter.GetPreviousBlock(client, fileId); DFSClientAdapter.GetNamenode(client).AddBlock(pathString, client.GetClientName(), new ExtendedBlock(previousBlock), new DatanodeInfo[0], DFSClientAdapter.GetFileId ((DFSOutputStream)create.GetWrappedStream()), null); cluster.RestartNameNode(0, true); cluster.RestartDataNode(0); cluster.TransitionToActive(0); // let the block reports be processed. Sharpen.Thread.Sleep(2000); FSDataInputStream @is = dfs.Open(filePath); @is.Close(); dfs.RecoverLease(filePath); // initiate recovery NUnit.Framework.Assert.IsTrue("Recovery also should be success", dfs.RecoverLease (filePath)); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> private void RecoverLease(Path filepath, DistributedFileSystem dfs) { if (dfs == null) { dfs = (DistributedFileSystem)GetFSAsAnotherUser(conf); } while (!dfs.RecoverLease(filepath)) { AppendTestUtil.Log.Info("sleep " + 5000 + "ms"); Sharpen.Thread.Sleep(5000); } }
/// <summary>Test append over storage quota does not mark file as UC or create lease</summary> /// <exception cref="System.Exception"/> public virtual void TestAppendOverStorageQuota() { Path dir = new Path("/TestAppendOverQuota"); Path file = new Path(dir, "file"); // create partial block file dfs.Mkdirs(dir); DFSTestUtil.CreateFile(dfs, file, Blocksize / 2, Replication, seed); // lower quota to cause exception when appending to partial block dfs.SetQuota(dir, long.MaxValue - 1, 1); INodeDirectory dirNode = fsdir.GetINode4Write(dir.ToString()).AsDirectory(); long spaceUsed = dirNode.GetDirectoryWithQuotaFeature().GetSpaceConsumed().GetStorageSpace (); try { DFSTestUtil.AppendFile(dfs, file, Blocksize); NUnit.Framework.Assert.Fail("append didn't fail"); } catch (DSQuotaExceededException) { } // ignore // check that the file exists, isn't UC, and has no dangling lease INodeFile inode = fsdir.GetINode(file.ToString()).AsFile(); NUnit.Framework.Assert.IsNotNull(inode); NUnit.Framework.Assert.IsFalse("should not be UC", inode.IsUnderConstruction()); NUnit.Framework.Assert.IsNull("should not have a lease", cluster.GetNamesystem(). GetLeaseManager().GetLeaseByPath(file.ToString())); // make sure the quota usage is unchanged long newSpaceUsed = dirNode.GetDirectoryWithQuotaFeature().GetSpaceConsumed().GetStorageSpace (); NUnit.Framework.Assert.AreEqual(spaceUsed, newSpaceUsed); // make sure edits aren't corrupted dfs.RecoverLease(file); cluster.RestartNameNodes(); }
public virtual void TestLeaseRecoveryAndAppend() { Configuration conf = new Configuration(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); Path file = new Path("/testLeaseRecovery"); DistributedFileSystem dfs = cluster.GetFileSystem(); // create a file with 0 bytes FSDataOutputStream @out = dfs.Create(file); @out.Hflush(); @out.Hsync(); // abort the original stream ((DFSOutputStream)@out.GetWrappedStream()).Abort(); DistributedFileSystem newdfs = (DistributedFileSystem)FileSystem.NewInstance(cluster .GetConfiguration(0)); // Append to a file , whose lease is held by another client should fail try { newdfs.Append(file); NUnit.Framework.Assert.Fail("Append to a file(lease is held by another client) should fail" ); } catch (RemoteException e) { NUnit.Framework.Assert.IsTrue(e.Message.Contains("file lease is currently owned") ); } // Lease recovery on first try should be successful bool recoverLease = newdfs.RecoverLease(file); NUnit.Framework.Assert.IsTrue(recoverLease); FSDataOutputStream append = newdfs.Append(file); append.Write(Sharpen.Runtime.GetBytesForString("test")); append.Close(); } finally { if (cluster != null) { cluster.Shutdown(); cluster = null; } } }
/// <summary> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. /// </summary> /// <remarks> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. The /// DN running the recovery should then fail to commit the synchronization /// and a later retry will succeed. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestFailoverRightBeforeCommitSynchronization() { Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); // Look into the block manager on the active node for the block // under construction. NameNode nn0 = cluster.GetNameNode(0); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, TestPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN , nn0); // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf); NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath)); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Failing over to NN 1"); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Let the commitBlockSynchronization call go through, and check that // it failed with the correct exception. delayer.Proceed(); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t == null) { NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby" ); } GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported" , t); // Now, if we try again to recover the block, it should succeed on the new // active. LoopRecoverLease(fsOtherUser, TestPath); AppendTestUtil.Check(fs, TestPath, BlockSize / 2); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
/// <summary>Test race between delete operation and commitBlockSynchronization method. /// </summary> /// <remarks> /// Test race between delete operation and commitBlockSynchronization method. /// See HDFS-6825. /// </remarks> /// <param name="hasSnapshot"/> /// <exception cref="System.Exception"/> private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot) { Log.Info("Start testing, hasSnapshot: " + hasSnapshot); AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry <string, bool> >(); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file", false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1" , true)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file" , false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1" , true)); Path rootPath = new Path("/"); Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary <DataNode, DatanodeProtocolClientSideTranslatorPB>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); int stId = 0; foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList) { string testPath = stest.Key; bool mkSameDir = stest.Value; Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot ); Path fPath = new Path(testPath); //find grandest non-root parent Path grandestNonRootParent = fPath; while (!grandestNonRootParent.GetParent().Equals(rootPath)) { grandestNonRootParent = grandestNonRootParent.GetParent(); } stm = fs.Create(fPath); Log.Info("test on " + testPath + " created " + fPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); if (hasSnapshot) { SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString()); ++stId; } // Look into the block manager on the active node for the block // under construction. NameNode nn = cluster.GetNameNode(); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, fPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN]; if (nnSpy == null) { nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn); dnMap[primaryDN] = nnSpy; } // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages fs.RecoverLease(fPath); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Deleting recursively " + grandestNonRootParent); fs.Delete(grandestNonRootParent, true); if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath)) { Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath); fs.Mkdirs(grandestNonRootParent); } delayer.Proceed(); Log.Info("Now wait for result"); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t != null) { Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t); } } // end of loop each fPath Log.Info("Now check we can restart"); cluster.RestartNameNodes(); Log.Info("Restart finished"); } finally { if (stm != null) { IOUtils.CloseStream(stm); } if (cluster != null) { cluster.Shutdown(); } } }
/// <exception cref="System.IO.IOException"/> internal override int Run(IList <string> args) { if (args.Count == 0) { System.Console.Out.WriteLine(this.usageText); System.Console.Out.WriteLine(this.helpText + "\n"); return(1); } string pathStr = StringUtils.PopOptionWithArgument("-path", args); string retriesStr = StringUtils.PopOptionWithArgument("-retries", args); if (pathStr == null) { System.Console.Error.WriteLine("You must supply a -path argument to " + "recoverLease." ); return(1); } int maxRetries = 1; if (retriesStr != null) { try { maxRetries = System.Convert.ToInt32(retriesStr); } catch (FormatException e) { System.Console.Error.WriteLine("Failed to parse the argument to -retries: " + StringUtils .StringifyException(e)); return(1); } } FileSystem fs; try { fs = FileSystem.NewInstance(new URI(pathStr), this._enclosing.GetConf(), null); } catch (URISyntaxException e) { System.Console.Error.WriteLine("URISyntaxException for " + pathStr + ":" + StringUtils .StringifyException(e)); return(1); } catch (Exception e) { System.Console.Error.WriteLine("InterruptedException for " + pathStr + ":" + StringUtils .StringifyException(e)); return(1); } DistributedFileSystem dfs = null; try { dfs = (DistributedFileSystem)fs; } catch (InvalidCastException) { System.Console.Error.WriteLine("Invalid filesystem for path " + pathStr + ": " + "needed scheme hdfs, but got: " + fs.GetScheme()); return(1); } for (int retry = 0; true;) { bool recovered = false; IOException ioe = null; try { recovered = dfs.RecoverLease(new Path(pathStr)); } catch (IOException e) { ioe = e; } if (recovered) { System.Console.Out.WriteLine("recoverLease SUCCEEDED on " + pathStr); return(0); } if (ioe != null) { System.Console.Error.WriteLine("recoverLease got exception: "); Sharpen.Runtime.PrintStackTrace(ioe); } else { System.Console.Error.WriteLine("recoverLease returned false."); } retry++; if (retry >= maxRetries) { break; } System.Console.Error.WriteLine("Retrying in " + DebugAdmin.RecoverLeaseCommand.TimeoutMs + " ms..."); Uninterruptibles.SleepUninterruptibly(DebugAdmin.RecoverLeaseCommand.TimeoutMs, TimeUnit .Milliseconds); System.Console.Error.WriteLine("Retry #" + retry); } System.Console.Error.WriteLine("Giving up on recoverLease for " + pathStr + " after " + maxRetries + (maxRetries == 1 ? " try." : " tries.")); return(1); }