/// <summary>Create a file with one block and corrupt some/all of the block replicas. /// </summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.Security.AccessControlException"/> /// <exception cref="System.IO.FileNotFoundException"/> /// <exception cref="Org.Apache.Hadoop.FS.UnresolvedLinkException"/> /// <exception cref="System.Exception"/> /// <exception cref="Sharpen.TimeoutException"/> private void CreateAFileWithCorruptedBlockReplicas(Path filePath, short repl, int corruptBlockCount) { DFSTestUtil.CreateFile(dfs, filePath, BlockSize, repl, 0); DFSTestUtil.WaitReplication(dfs, filePath, repl); // Locate the file blocks by asking name node LocatedBlocks locatedblocks = dfs.dfs.GetNamenode().GetBlockLocations(filePath.ToString (), 0L, BlockSize); NUnit.Framework.Assert.AreEqual(repl, locatedblocks.Get(0).GetLocations().Length); // The file only has one block LocatedBlock lblock = locatedblocks.Get(0); DatanodeInfo[] datanodeinfos = lblock.GetLocations(); ExtendedBlock block = lblock.GetBlock(); // corrupt some /all of the block replicas for (int i = 0; i < corruptBlockCount; i++) { DatanodeInfo dninfo = datanodeinfos[i]; DataNode dn = cluster.GetDataNode(dninfo.GetIpcPort()); CorruptBlock(block, dn); Log.Debug("Corrupted block " + block.GetBlockName() + " on data node " + dninfo); } }
/// <summary> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// </summary> /// <remarks> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// - Write several blocks to the FS with replication 1. /// - Shutdown the DN /// - Wait for the NNs to declare the DN dead. All blocks will be under-replicated. /// - Restart the DN. /// In the bug, the standby node would only very slowly notice the blocks returning /// to the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDatanodeRestarts() { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, 1024); // We read from the standby to watch block locations HAUtil.SetAllowStandbyReads(conf, true); conf.SetLong(DFSConfigKeys.DfsNamenodeAccesstimePrecisionKey, 0); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(1).Build(); try { NameNode nn0 = cluster.GetNameNode(0); NameNode nn1 = cluster.GetNameNode(1); cluster.TransitionToActive(0); // Create 5 blocks. DFSTestUtil.CreateFile(cluster.GetFileSystem(0), TestFilePath, 5 * 1024, (short)1 , 1L); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Stop the DN. DataNode dn = cluster.GetDataNodes()[0]; string dnName = dn.GetDatanodeId().GetXferAddr(); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Make sure both NNs register it as dead. BlockManagerTestUtil.NoticeDeadDatanode(nn0, dnName); BlockManagerTestUtil.NoticeDeadDatanode(nn1, dnName); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(5, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); // The SBN will not have any blocks in its neededReplication queue // since the SBN doesn't process replication. NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); LocatedBlocks locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has no replicas" , 0, locs.Get(0).GetLocations().Length); cluster.RestartDataNode(dnProps); // Wait for both NNs to re-register the DN. cluster.WaitActive(0); cluster.WaitActive(1); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has replicas again" , 1, locs.Get(0).GetLocations().Length); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> private void TestBadBlockReportOnTransfer(bool corruptBlockByDeletingBlockFile) { Configuration conf = new HdfsConfiguration(); FileSystem fs = null; DFSClient dfsClient = null; LocatedBlocks blocks = null; int replicaCount = 0; short replFactor = 1; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); dfsClient = new DFSClient(new IPEndPoint("localhost", cluster.GetNameNodePort()), conf); // Create file with replication factor of 1 Path file1 = new Path("/tmp/testBadBlockReportOnTransfer/file1"); DFSTestUtil.CreateFile(fs, file1, 1024, replFactor, 0); DFSTestUtil.WaitReplication(fs, file1, replFactor); // Corrupt the block belonging to the created file ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, file1); int blockFilesCorrupted = corruptBlockByDeletingBlockFile ? cluster.CorruptBlockOnDataNodesByDeletingBlockFile (block) : cluster.CorruptBlockOnDataNodes(block); NUnit.Framework.Assert.AreEqual("Corrupted too few blocks", replFactor, blockFilesCorrupted ); // Increase replication factor, this should invoke transfer request // Receiving datanode fails on checksum and reports it to namenode replFactor = 2; fs.SetReplication(file1, replFactor); // Now get block details and check if the block is corrupt blocks = dfsClient.GetNamenode().GetBlockLocations(file1.ToString(), 0, long.MaxValue ); while (blocks.Get(0).IsCorrupt() != true) { try { Log.Info("Waiting until block is marked as corrupt..."); Sharpen.Thread.Sleep(1000); } catch (Exception) { } blocks = dfsClient.GetNamenode().GetBlockLocations(file1.ToString(), 0, long.MaxValue ); } replicaCount = blocks.Get(0).GetLocations().Length; NUnit.Framework.Assert.IsTrue(replicaCount == 1); cluster.Shutdown(); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="Sharpen.TimeoutException"/> private void ChangeBlockLen(MiniDFSCluster cluster, int lenDelta) { Path fileName = new Path("/file1"); short ReplicationFactor = (short)1; FileSystem fs = cluster.GetFileSystem(); int fileLen = fs.GetConf().GetInt(DFSConfigKeys.DfsBytesPerChecksumKey, 512); DFSTestUtil.CreateFile(fs, fileName, fileLen, ReplicationFactor, 0); DFSTestUtil.WaitReplication(fs, fileName, ReplicationFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); // Change the length of a replica for (int i = 0; i < cluster.GetDataNodes().Count; i++) { if (DFSTestUtil.ChangeReplicaLength(cluster, block, i, lenDelta)) { break; } } // increase the file's replication factor fs.SetReplication(fileName, (short)(ReplicationFactor + 1)); // block replication triggers corrupt block detection DFSClient dfsClient = new DFSClient(new IPEndPoint("localhost", cluster.GetNameNodePort ()), fs.GetConf()); LocatedBlocks blocks = dfsClient.GetNamenode().GetBlockLocations(fileName.ToString (), 0, fileLen); if (lenDelta < 0) { // replica truncated while (!blocks.Get(0).IsCorrupt() || ReplicationFactor != blocks.Get(0).GetLocations ().Length) { Sharpen.Thread.Sleep(100); blocks = dfsClient.GetNamenode().GetBlockLocations(fileName.ToString(), 0, fileLen ); } } else { // no corruption detected; block replicated while (ReplicationFactor + 1 != blocks.Get(0).GetLocations().Length) { Sharpen.Thread.Sleep(100); blocks = dfsClient.GetNamenode().GetBlockLocations(fileName.ToString(), 0, fileLen ); } } fs.Delete(fileName, true); }
/// <summary>Verify the first block of the file is corrupted (for all its replica).</summary> /// <exception cref="Org.Apache.Hadoop.Security.AccessControlException"/> /// <exception cref="System.IO.FileNotFoundException"/> /// <exception cref="Org.Apache.Hadoop.FS.UnresolvedLinkException"/> /// <exception cref="System.IO.IOException"/> private void VerifyFirstBlockCorrupted(Path filePath, bool isCorrupted) { LocatedBlocks locatedBlocks = dfs.dfs.GetNamenode().GetBlockLocations(filePath.ToUri ().GetPath(), 0, long.MaxValue); LocatedBlock firstLocatedBlock = locatedBlocks.Get(0); NUnit.Framework.Assert.AreEqual(isCorrupted, firstLocatedBlock.IsCorrupt()); }
public virtual void TestRetryAddBlockWhileInChooseTarget() { string src = "/testRetryAddBlockWhileInChooseTarget"; FSNamesystem ns = cluster.GetNamesystem(); NamenodeProtocols nn = cluster.GetNameNodeRpc(); // create file nn.Create(src, FsPermission.GetFileDefault(), "clientName", new EnumSetWritable <CreateFlag >(EnumSet.Of(CreateFlag.Create)), true, (short)3, 1024, null); // start first addBlock() Log.Info("Starting first addBlock for " + src); LocatedBlock[] onRetryBlock = new LocatedBlock[1]; DatanodeStorageInfo[] targets = ns.GetNewBlockTargets(src, INodeId.GrandfatherInodeId , "clientName", null, null, null, onRetryBlock); NUnit.Framework.Assert.IsNotNull("Targets must be generated", targets); // run second addBlock() Log.Info("Starting second addBlock for " + src); nn.AddBlock(src, "clientName", null, null, INodeId.GrandfatherInodeId, null); NUnit.Framework.Assert.IsTrue("Penultimate block must be complete", CheckFileProgress (src, false)); LocatedBlocks lbs = nn.GetBlockLocations(src, 0, long.MaxValue); NUnit.Framework.Assert.AreEqual("Must be one block", 1, lbs.GetLocatedBlocks().Count ); LocatedBlock lb2 = lbs.Get(0); NUnit.Framework.Assert.AreEqual("Wrong replication", Replication, lb2.GetLocations ().Length); // continue first addBlock() LocatedBlock newBlock = ns.StoreAllocatedBlock(src, INodeId.GrandfatherInodeId, "clientName" , null, targets); NUnit.Framework.Assert.AreEqual("Blocks are not equal", lb2.GetBlock(), newBlock. GetBlock()); // check locations lbs = nn.GetBlockLocations(src, 0, long.MaxValue); NUnit.Framework.Assert.AreEqual("Must be one block", 1, lbs.GetLocatedBlocks().Count ); LocatedBlock lb1 = lbs.Get(0); NUnit.Framework.Assert.AreEqual("Wrong replication", Replication, lb1.GetLocations ().Length); NUnit.Framework.Assert.AreEqual("Blocks are not equal", lb1.GetBlock(), lb2.GetBlock ()); }
public virtual void TestBlockMoveAcrossStorageInSameNode() { Configuration conf = new HdfsConfiguration(); // create only one datanode in the cluster to verify movement within // datanode. MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).StorageTypes (new StorageType[] { StorageType.Disk, StorageType.Archive }).Build(); try { cluster.WaitActive(); DistributedFileSystem dfs = cluster.GetFileSystem(); Path file = new Path("/testBlockMoveAcrossStorageInSameNode/file"); DFSTestUtil.CreateFile(dfs, file, 1024, (short)1, 1024); LocatedBlocks locatedBlocks = dfs.GetClient().GetLocatedBlocks(file.ToString(), 0 ); // get the current LocatedBlock locatedBlock = locatedBlocks.Get(0); ExtendedBlock block = locatedBlock.GetBlock(); DatanodeInfo[] locations = locatedBlock.GetLocations(); NUnit.Framework.Assert.AreEqual(1, locations.Length); StorageType[] storageTypes = locatedBlock.GetStorageTypes(); // current block should be written to DISK NUnit.Framework.Assert.IsTrue(storageTypes[0] == StorageType.Disk); DatanodeInfo source = locations[0]; // move block to ARCHIVE by using same DataNodeInfo for source, proxy and // destination so that movement happens within datanode NUnit.Framework.Assert.IsTrue(ReplaceBlock(block, source, source, source, StorageType .Archive)); // wait till namenode notified Sharpen.Thread.Sleep(3000); locatedBlocks = dfs.GetClient().GetLocatedBlocks(file.ToString(), 0); // get the current locatedBlock = locatedBlocks.Get(0); NUnit.Framework.Assert.AreEqual("Storage should be only one", 1, locatedBlock.GetLocations ().Length); NUnit.Framework.Assert.IsTrue("Block should be moved to ARCHIVE", locatedBlock.GetStorageTypes ()[0] == StorageType.Archive); } finally { cluster.Shutdown(); } }
/// <summary> /// Verify the number of corrupted block replicas by fetching the block /// location from name node. /// </summary> /// <exception cref="Org.Apache.Hadoop.Security.AccessControlException"/> /// <exception cref="System.IO.FileNotFoundException"/> /// <exception cref="Org.Apache.Hadoop.FS.UnresolvedLinkException"/> /// <exception cref="System.IO.IOException"/> private void VerifyCorruptedBlockCount(Path filePath, int expectedReplicas) { LocatedBlocks lBlocks = dfs.dfs.GetNamenode().GetBlockLocations(filePath.ToUri(). GetPath(), 0, long.MaxValue); // we expect only the first block of the file is used for this test LocatedBlock firstLocatedBlock = lBlocks.Get(0); NUnit.Framework.Assert.AreEqual(expectedReplicas, firstLocatedBlock.GetLocations( ).Length); }
public virtual void TestReplicationError() { // create a file of replication factor of 1 Path fileName = new Path("/test.txt"); int fileLen = 1; DFSTestUtil.CreateFile(fs, fileName, 1, (short)1, 1L); DFSTestUtil.WaitReplication(fs, fileName, (short)1); // get the block belonged to the created file LocatedBlocks blocks = NameNodeAdapter.GetBlockLocations(cluster.GetNameNode(), fileName .ToString(), 0, (long)fileLen); NUnit.Framework.Assert.AreEqual("Should only find 1 block", blocks.LocatedBlockCount (), 1); LocatedBlock block = blocks.Get(0); // bring up a second datanode cluster.StartDataNodes(conf, 1, true, null, null); cluster.WaitActive(); int sndNode = 1; DataNode datanode = cluster.GetDataNodes()[sndNode]; // replicate the block to the second datanode IPEndPoint target = datanode.GetXferAddress(); Socket s = Sharpen.Extensions.CreateSocket(target.Address, target.Port); // write the header. DataOutputStream @out = new DataOutputStream(s.GetOutputStream()); DataChecksum checksum = DataChecksum.NewDataChecksum(DataChecksum.Type.Crc32, 512 ); new Sender(@out).WriteBlock(block.GetBlock(), StorageType.Default, BlockTokenSecretManager .DummyToken, string.Empty, new DatanodeInfo[0], new StorageType[0], null, BlockConstructionStage .PipelineSetupCreate, 1, 0L, 0L, 0L, checksum, CachingStrategy.NewDefaultStrategy (), false, false, null); @out.Flush(); // close the connection before sending the content of the block @out.Close(); // the temporary block & meta files should be deleted string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath storageDir = cluster.GetInstanceStorageDir(sndNode, 0); FilePath dir1 = MiniDFSCluster.GetRbwDir(storageDir, bpid); storageDir = cluster.GetInstanceStorageDir(sndNode, 1); FilePath dir2 = MiniDFSCluster.GetRbwDir(storageDir, bpid); while (dir1.ListFiles().Length != 0 || dir2.ListFiles().Length != 0) { Sharpen.Thread.Sleep(100); } // then increase the file's replication factor fs.SetReplication(fileName, (short)2); // replication should succeed DFSTestUtil.WaitReplication(fs, fileName, (short)1); // clean up the file fs.Delete(fileName, false); }
/// <exception cref="System.IO.IOException"/> public virtual void TestDeprecatedGetBlockLocalPathInfoRpc() { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Format( true).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); try { DFSTestUtil.CreateFile(fs, new Path("/tmp/x"), 16, (short)1, 23); LocatedBlocks lb = cluster.GetNameNode().GetRpcServer().GetBlockLocations("/tmp/x" , 0, 16); // Create a new block object, because the block inside LocatedBlock at // namenode is of type BlockInfo. ExtendedBlock blk = new ExtendedBlock(lb.Get(0).GetBlock()); Org.Apache.Hadoop.Security.Token.Token <BlockTokenIdentifier> token = lb.Get(0).GetBlockToken (); DatanodeInfo dnInfo = lb.Get(0).GetLocations()[0]; ClientDatanodeProtocol proxy = DFSUtil.CreateClientDatanodeProtocolProxy(dnInfo, conf, 60000, false); try { proxy.GetBlockLocalPathInfo(blk, token); NUnit.Framework.Assert.Fail("The call should have failed as this user " + " is not allowed to call getBlockLocalPathInfo" ); } catch (IOException ex) { NUnit.Framework.Assert.IsTrue(ex.Message.Contains("not allowed to call getBlockLocalPathInfo" )); } } finally { fs.Close(); cluster.Shutdown(); } }
/// <summary>TC11: Racing rename</summary> /// <exception cref="System.Exception"/> private void TestTC11(bool appendToNewBlock) { Path p = new Path("/TC11/foo" + (appendToNewBlock ? "0" : "1")); System.Console.Out.WriteLine("p=" + p); //a. Create file and write one block of data. Close file. int len1 = (int)BlockSize; { FSDataOutputStream @out = fs.Create(p, false, buffersize, Replication, BlockSize); AppendTestUtil.Write(@out, 0, len1); @out.Close(); } //b. Reopen file in "append" mode. Append half block of data. FSDataOutputStream out_1 = appendToNewBlock ? fs.Append(p, EnumSet.Of(CreateFlag. Append, CreateFlag.NewBlock), 4096, null) : fs.Append(p); int len2 = (int)BlockSize / 2; AppendTestUtil.Write(out_1, len1, len2); out_1.Hflush(); //c. Rename file to file.new. Path pnew = new Path(p + ".new"); NUnit.Framework.Assert.IsTrue(fs.Rename(p, pnew)); //d. Close file handle that was opened in (b). out_1.Close(); //check block sizes long len = fs.GetFileStatus(pnew).GetLen(); LocatedBlocks locatedblocks = fs.dfs.GetNamenode().GetBlockLocations(pnew.ToString (), 0L, len); int numblock = locatedblocks.LocatedBlockCount(); for (int i = 0; i < numblock; i++) { LocatedBlock lb = locatedblocks.Get(i); ExtendedBlock blk = lb.GetBlock(); long size = lb.GetBlockSize(); if (i < numblock - 1) { NUnit.Framework.Assert.AreEqual(BlockSize, size); } foreach (DatanodeInfo datanodeinfo in lb.GetLocations()) { DataNode dn = cluster.GetDataNode(datanodeinfo.GetIpcPort()); Block metainfo = DataNodeTestUtils.GetFSDataset(dn).GetStoredBlock(blk.GetBlockPoolId (), blk.GetBlockId()); NUnit.Framework.Assert.AreEqual(size, metainfo.GetNumBytes()); } } }
/// <summary>TC7: Corrupted replicas are present.</summary> /// <exception cref="System.IO.IOException">an exception might be thrown</exception> /// <exception cref="System.Exception"/> private void TestTC7(bool appendToNewBlock) { short repl = 2; Path p = new Path("/TC7/foo" + (appendToNewBlock ? "0" : "1")); System.Console.Out.WriteLine("p=" + p); //a. Create file with replication factor of 2. Write half block of data. Close file. int len1 = (int)(BlockSize / 2); { FSDataOutputStream @out = fs.Create(p, false, buffersize, repl, BlockSize); AppendTestUtil.Write(@out, 0, len1); @out.Close(); } DFSTestUtil.WaitReplication(fs, p, repl); //b. Log into one datanode that has one replica of this block. // Find the block file on this datanode and truncate it to zero size. LocatedBlocks locatedblocks = fs.dfs.GetNamenode().GetBlockLocations(p.ToString() , 0L, len1); NUnit.Framework.Assert.AreEqual(1, locatedblocks.LocatedBlockCount()); LocatedBlock lb = locatedblocks.Get(0); ExtendedBlock blk = lb.GetBlock(); NUnit.Framework.Assert.AreEqual(len1, lb.GetBlockSize()); DatanodeInfo[] datanodeinfos = lb.GetLocations(); NUnit.Framework.Assert.AreEqual(repl, datanodeinfos.Length); DataNode dn = cluster.GetDataNode(datanodeinfos[0].GetIpcPort()); FilePath f = DataNodeTestUtils.GetBlockFile(dn, blk.GetBlockPoolId(), blk.GetLocalBlock ()); RandomAccessFile raf = new RandomAccessFile(f, "rw"); AppendTestUtil.Log.Info("dn=" + dn + ", blk=" + blk + " (length=" + blk.GetNumBytes () + ")"); NUnit.Framework.Assert.AreEqual(len1, raf.Length()); raf.SetLength(0); raf.Close(); //c. Open file in "append mode". Append a new block worth of data. Close file. int len2 = (int)BlockSize; { FSDataOutputStream @out = appendToNewBlock ? fs.Append(p, EnumSet.Of(CreateFlag.Append , CreateFlag.NewBlock), 4096, null) : fs.Append(p); AppendTestUtil.Write(@out, len1, len2); @out.Close(); } //d. Reopen file and read two blocks worth of data. AppendTestUtil.Check(fs, p, len1 + len2); }
public virtual void TestBlockRecoveryWithLessMetafile() { Configuration conf = new Configuration(); conf.Set(DFSConfigKeys.DfsBlockLocalPathAccessUserKey, UserGroupInformation.GetCurrentUser ().GetShortUserName()); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); Path file = new Path("/testRecoveryFile"); DistributedFileSystem dfs = cluster.GetFileSystem(); FSDataOutputStream @out = dfs.Create(file); int count = 0; while (count < 2 * 1024 * 1024) { @out.WriteBytes("Data"); count += 4; } @out.Hsync(); // abort the original stream ((DFSOutputStream)@out.GetWrappedStream()).Abort(); LocatedBlocks locations = cluster.GetNameNodeRpc().GetBlockLocations(file.ToString (), 0, count); ExtendedBlock block = locations.Get(0).GetBlock(); DataNode dn = cluster.GetDataNodes()[0]; BlockLocalPathInfo localPathInfo = dn.GetBlockLocalPathInfo(block, null); FilePath metafile = new FilePath(localPathInfo.GetMetaPath()); NUnit.Framework.Assert.IsTrue(metafile.Exists()); // reduce the block meta file size RandomAccessFile raf = new RandomAccessFile(metafile, "rw"); raf.SetLength(metafile.Length() - 20); raf.Close(); // restart DN to make replica to RWR MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0); cluster.RestartDataNode(dnProp, true); // try to recover the lease DistributedFileSystem newdfs = (DistributedFileSystem)FileSystem.NewInstance(cluster .GetConfiguration(0)); count = 0; while (++count < 10 && !newdfs.RecoverLease(file)) { Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue("File should be closed", newdfs.RecoverLease(file)); }
public virtual void TestCorruptTwoOutOfThreeReplicas() { short repl = 3; int corruptBlocReplicas = 2; for (int i = 0; i < 2; i++) { string fileName = "/tmp/testClientReportBadBlock/CorruptTwoOutOfThreeReplicas" + i; Path filePath = new Path(fileName); CreateAFileWithCorruptedBlockReplicas(filePath, repl, corruptBlocReplicas); int replicaCount = 0; /* * The order of data nodes in LocatedBlock returned by name node is sorted * by NetworkToplology#pseudoSortByDistance. In current MiniDFSCluster, * when LocatedBlock is returned, the sorting is based on a random order. * That is to say, the DFS client and simulated data nodes in mini DFS * cluster are considered not on the same host nor the same rack. * Therefore, even we corrupted the first two block replicas based in * order. When DFSClient read some block replicas, it is not guaranteed * which block replicas (good/bad) will be returned first. So we try to * re-read the file until we know the expected replicas numbers is * returned. */ while (replicaCount != repl - corruptBlocReplicas) { if (i == 0) { DfsClientReadFile(filePath); } else { DfsClientReadFileFromPosition(filePath); } LocatedBlocks blocks = dfs.dfs.GetNamenode().GetBlockLocations(filePath.ToString( ), 0, long.MaxValue); replicaCount = blocks.Get(0).GetLocations().Length; } VerifyFirstBlockCorrupted(filePath, false); int expectedReplicaCount = repl - corruptBlocReplicas; VerifyCorruptedBlockCount(filePath, expectedReplicaCount); VerifyFsckHealth("Target Replicas is 3 but found 1 replica"); TestFsckListCorruptFilesBlocks(filePath, 0); } }
/// <exception cref="System.Exception"/> private void WaitForAllReplicas(int expectedReplicaNum, Path file, DistributedFileSystem dfs) { for (int i = 0; i < 5; i++) { LocatedBlocks lbs = dfs.GetClient().GetLocatedBlocks(file.ToString(), 0, BlockSize ); LocatedBlock lb = lbs.Get(0); if (lb.GetLocations().Length >= expectedReplicaNum) { return; } else { Sharpen.Thread.Sleep(1000); } } }
// Waits for all of the blocks to have expected replication // Waits for all of the blocks to have expected replication /// <exception cref="System.IO.IOException"/> private void WaitForBlockReplication(string filename, ClientProtocol namenode, int expected, long maxWaitSec) { long start = Time.MonotonicNow(); //wait for all the blocks to be replicated; Log.Info("Checking for block replication for " + filename); LocatedBlocks blocks = namenode.GetBlockLocations(filename, 0, long.MaxValue); NUnit.Framework.Assert.AreEqual(numBlocks, blocks.LocatedBlockCount()); for (int i = 0; i < numBlocks; ++i) { Log.Info("Checking for block:" + (i + 1)); while (true) { // Loop to check for block i (usually when 0 is done all will be done blocks = namenode.GetBlockLocations(filename, 0, long.MaxValue); NUnit.Framework.Assert.AreEqual(numBlocks, blocks.LocatedBlockCount()); LocatedBlock block = blocks.Get(i); int actual = block.GetLocations().Length; if (actual == expected) { Log.Info("Got enough replicas for " + (i + 1) + "th block " + block.GetBlock() + ", got " + actual + "."); break; } Log.Info("Not enough replicas for " + (i + 1) + "th block " + block.GetBlock() + " yet. Expecting " + expected + ", got " + actual + "."); if (maxWaitSec > 0 && (Time.MonotonicNow() - start) > (maxWaitSec * 1000)) { throw new IOException("Timedout while waiting for all blocks to " + " be replicated for " + filename); } try { Sharpen.Thread.Sleep(500); } catch (Exception) { } } } }
public virtual void TestBlockMissingException() { Log.Info("Test testBlockMissingException started."); long blockSize = 1024L; int numBlocks = 4; conf = new HdfsConfiguration(); // Set short retry timeouts so this test runs faster conf.SetInt(DFSConfigKeys.DfsClientRetryWindowBase, 10); try { dfs = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDatanodes).Build(); dfs.WaitActive(); fileSys = dfs.GetFileSystem(); Path file1 = new Path("/user/dhruba/raidtest/file1"); CreateOldFile(fileSys, file1, 1, numBlocks, blockSize); // extract block locations from File system. Wait till file is closed. LocatedBlocks locations = null; locations = fileSys.dfs.GetNamenode().GetBlockLocations(file1.ToString(), 0, numBlocks * blockSize); // remove block of file Log.Info("Remove first block of file"); CorruptBlock(file1, locations.Get(0).GetBlock()); // validate that the system throws BlockMissingException ValidateFile(fileSys, file1); } finally { if (fileSys != null) { fileSys.Close(); } if (dfs != null) { dfs.Shutdown(); } } Log.Info("Test testBlockMissingException completed."); }
/* check if there are at least two nodes are on the same rack */ /// <exception cref="System.IO.IOException"/> private void CheckFile(FileSystem fileSys, Path name, int repl) { Configuration conf = fileSys.GetConf(); ClientProtocol namenode = NameNodeProxies.CreateProxy <ClientProtocol>(conf, fileSys .GetUri()).GetProxy(); WaitForBlockReplication(name.ToString(), namenode, Math.Min(numDatanodes, repl), -1); LocatedBlocks locations = namenode.GetBlockLocations(name.ToString(), 0, long.MaxValue ); FileStatus stat = fileSys.GetFileStatus(name); BlockLocation[] blockLocations = fileSys.GetFileBlockLocations(stat, 0L, long.MaxValue ); // verify that rack locations match NUnit.Framework.Assert.IsTrue(blockLocations.Length == locations.LocatedBlockCount ()); for (int i = 0; i < blockLocations.Length; i++) { LocatedBlock blk = locations.Get(i); DatanodeInfo[] datanodes = blk.GetLocations(); string[] topologyPaths = blockLocations[i].GetTopologyPaths(); NUnit.Framework.Assert.IsTrue(topologyPaths.Length == datanodes.Length); for (int j = 0; j < topologyPaths.Length; j++) { bool found = false; for (int k = 0; k < racks.Length; k++) { if (topologyPaths[j].StartsWith(racks[k])) { found = true; break; } } NUnit.Framework.Assert.IsTrue(found); } } bool isOnSameRack = true; bool isNotOnSameRack = true; foreach (LocatedBlock blk_1 in locations.GetLocatedBlocks()) { DatanodeInfo[] datanodes = blk_1.GetLocations(); if (datanodes.Length <= 1) { break; } if (datanodes.Length == 2) { isNotOnSameRack = !(datanodes[0].GetNetworkLocation().Equals(datanodes[1].GetNetworkLocation ())); break; } isOnSameRack = false; isNotOnSameRack = false; for (int i_1 = 0; i_1 < datanodes.Length - 1; i_1++) { Log.Info("datanode " + i_1 + ": " + datanodes[i_1]); bool onRack = false; for (int j = i_1 + 1; j < datanodes.Length; j++) { if (datanodes[i_1].GetNetworkLocation().Equals(datanodes[j].GetNetworkLocation())) { onRack = true; } } if (onRack) { isOnSameRack = true; } if (!onRack) { isNotOnSameRack = true; } if (isOnSameRack && isNotOnSameRack) { break; } } if (!isOnSameRack || !isNotOnSameRack) { break; } } NUnit.Framework.Assert.IsTrue(isOnSameRack); NUnit.Framework.Assert.IsTrue(isNotOnSameRack); }
public virtual void TestReadSelectNonStaleDatanode() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true); long staleInterval = 30 * 1000 * 60; conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes ).Racks(racks).Build(); cluster.WaitActive(); IPEndPoint addr = new IPEndPoint("localhost", cluster.GetNameNodePort()); DFSClient client = new DFSClient(addr, conf); IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Live); NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList .Count); FileSystem fileSys = cluster.GetFileSystem(); FSDataOutputStream stm = null; try { // do the writing but do not close the FSDataOutputStream // in order to mimic the ongoing writing Path fileName = new Path("/file1"); stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys .IoFileBufferSizeKey, 4096), (short)3, blockSize); stm.Write(new byte[(blockSize * 3) / 2]); // We do not close the stream so that // the writing seems to be still ongoing stm.Hflush(); LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString() , 0, blockSize); DatanodeInfo[] nodes = blocks.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); DataNode staleNode = null; DatanodeDescriptor staleNodeInfo = null; // stop the heartbeat of the first node staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the first node as stale staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager ().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1)); LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName. ToString(), 0, blockSize); DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); // restart the staleNode's heartbeat DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false); // reset the first node as non-stale, so as to avoid two stale nodes DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0); LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue ).GetLastLocatedBlock(); nodes = lastBlock.GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); // stop the heartbeat of the first node for the last block staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the node as stale DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1)); LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0 , long.MaxValue).GetLastLocatedBlock(); nodesAfterStale = lastBlockAfterStale.GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); } finally { if (stm != null) { stm.Close(); } client.Close(); cluster.Shutdown(); } }