/// <exception cref="System.IO.IOException"/> private void CorruptBlock(MiniDFSCluster cluster, FileSystem fs, Path fileName, int dnIndex, ExtendedBlock block) { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(dnIndex, block)); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Each datanode has multiple data dirs, check each for (int dirIndex = 0; dirIndex < 2; dirIndex++) { string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath storageDir = cluster.GetStorageDir(dnIndex, dirIndex); FilePath dataDir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); FilePath scanLogFile = new FilePath(dataDir, "dncp_block_verification.log.curr"); if (scanLogFile.Exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.Delete(); i++) { NUnit.Framework.Assert.IsTrue("Could not delete log file in one minute", i < 60); try { Sharpen.Thread.Sleep(1000); } catch (Exception) { } } } } // restart the detained so the corrupt replica will be detected cluster.RestartDataNode(dnProps); }
public virtual void TestArrayOutOfBoundsException() { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); Path FilePath = new Path("/tmp.txt"); long FileLen = 1L; DFSTestUtil.CreateFile(fs, FilePath, FileLen, (short)2, 1L); // get the block string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath storageDir = cluster.GetInstanceStorageDir(0, 0); FilePath dataDir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); NUnit.Framework.Assert.IsTrue("Data directory does not exist", dataDir.Exists()); ExtendedBlock blk = GetBlock(bpid, dataDir); if (blk == null) { storageDir = cluster.GetInstanceStorageDir(0, 1); dataDir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); blk = GetBlock(bpid, dataDir); } NUnit.Framework.Assert.IsFalse("Data directory does not contain any blocks or there was an " + "IO error", blk == null); // start a third datanode cluster.StartDataNodes(conf, 1, true, null, null); AList <DataNode> datanodes = cluster.GetDataNodes(); NUnit.Framework.Assert.AreEqual(datanodes.Count, 3); DataNode dataNode = datanodes[2]; // report corrupted block by the third datanode DatanodeRegistration dnR = DataNodeTestUtils.GetDNRegistrationForBP(dataNode, blk .GetBlockPoolId()); FSNamesystem ns = cluster.GetNamesystem(); ns.WriteLock(); try { cluster.GetNamesystem().GetBlockManager().FindAndMarkBlockAsCorrupt(blk, new DatanodeInfo (dnR), "TEST", "STORAGE_ID"); } finally { ns.WriteUnlock(); } // open the file fs.Open(FilePath); //clean up fs.Delete(FilePath, false); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary> /// look for real blocks /// by counting *.meta files in all the storage dirs /// </summary> /// <param name="map"/> /// <returns/> private int CountRealBlocks(IDictionary <string, TestDataNodeVolumeFailure.BlockLocs > map) { int total = 0; string bpid = cluster.GetNamesystem().GetBlockPoolId(); for (int i = 0; i < dn_num; i++) { for (int j = 0; j <= 1; j++) { FilePath storageDir = cluster.GetInstanceStorageDir(i, j); FilePath dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); if (dir == null) { System.Console.Out.WriteLine("dir is null for dn=" + i + " and data_dir=" + j); continue; } IList <FilePath> res = MiniDFSCluster.GetAllBlockMetadataFiles(dir); if (res == null) { System.Console.Out.WriteLine("res is null for dir = " + dir + " i=" + i + " and j=" + j); continue; } //System.out.println("for dn" + i + "." + j + ": " + dir + "=" + res.length+ " files"); //int ii = 0; foreach (FilePath f in res) { string s = f.GetName(); // cut off "blk_-" at the beginning and ".meta" at the end NUnit.Framework.Assert.IsNotNull("Block file name should not be null", s); string bid = Sharpen.Runtime.Substring(s, s.IndexOf("_") + 1, s.LastIndexOf("_")); //System.out.println(ii++ + ". block " + s + "; id=" + bid); TestDataNodeVolumeFailure.BlockLocs val = map[bid]; if (val == null) { val = new TestDataNodeVolumeFailure.BlockLocs(this); } val.num_files++; // one more file for the block map[bid] = val; } //System.out.println("dir1="+dir.getPath() + "blocks=" + res.length); //System.out.println("dir2="+dir2.getPath() + "blocks=" + res2.length); total += res.Count; } } return(total); }
public virtual void TestFileCorruption() { MiniDFSCluster cluster = null; DFSTestUtil util = new DFSTestUtil.Builder().SetName("TestFileCorruption").SetNumFiles (20).Build(); try { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); util.CreateFiles(fs, "/srcdat"); // Now deliberately remove the blocks FilePath storageDir = cluster.GetInstanceStorageDir(2, 0); string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); NUnit.Framework.Assert.IsTrue("data directory does not exist", data_dir.Exists()); FilePath[] blocks = data_dir.ListFiles(); NUnit.Framework.Assert.IsTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.Length > 0)); for (int idx = 0; idx < blocks.Length; idx++) { if (!blocks[idx].GetName().StartsWith(Block.BlockFilePrefix)) { continue; } System.Console.Out.WriteLine("Deliberately removing file " + blocks[idx].GetName( )); NUnit.Framework.Assert.IsTrue("Cannot remove file.", blocks[idx].Delete()); } NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly.", util.CheckFiles (fs, "/srcdat")); util.Cleanup(fs, "/srcdat"); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary> /// For datanode, for a block pool, verify that the current and previous /// directories exist. /// </summary> /// <remarks> /// For datanode, for a block pool, verify that the current and previous /// directories exist. Verify that previous hasn't been modified by comparing /// the checksum of all its files with their original checksum. It /// is assumed that the server has recovered and upgraded. /// </remarks> /// <exception cref="System.IO.IOException"/> internal virtual void CheckDataNode(string[] baseDirs, string bpid) { for (int i = 0; i < baseDirs.Length; i++) { FilePath current = new FilePath(baseDirs[i], "current/" + bpid + "/current"); NUnit.Framework.Assert.AreEqual(UpgradeUtilities.ChecksumContents(HdfsServerConstants.NodeType .DataNode, current, false), UpgradeUtilities.ChecksumMasterDataNodeContents()); // block files are placed under <sd>/current/<bpid>/current/finalized FilePath currentFinalized = MiniDFSCluster.GetFinalizedDir(new FilePath(baseDirs[ i]), bpid); NUnit.Framework.Assert.AreEqual(UpgradeUtilities.ChecksumContents(HdfsServerConstants.NodeType .DataNode, currentFinalized, true), UpgradeUtilities.ChecksumMasterBlockPoolFinalizedContents ()); FilePath previous = new FilePath(baseDirs[i], "current/" + bpid + "/previous"); NUnit.Framework.Assert.IsTrue(previous.IsDirectory()); NUnit.Framework.Assert.AreEqual(UpgradeUtilities.ChecksumContents(HdfsServerConstants.NodeType .DataNode, previous, false), UpgradeUtilities.ChecksumMasterDataNodeContents()); FilePath previousFinalized = new FilePath(baseDirs[i], "current/" + bpid + "/previous" + "/finalized"); NUnit.Framework.Assert.AreEqual(UpgradeUtilities.ChecksumContents(HdfsServerConstants.NodeType .DataNode, previousFinalized, true), UpgradeUtilities.ChecksumMasterBlockPoolFinalizedContents ()); } }
/// <summary> /// Test that blocks should get replicated if we have corrupted blocks and /// having good replicas at least equal or greater to minreplication /// Simulate rbw blocks by creating dummy copies, then a DN restart to detect /// those corrupted blocks asap. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestReplicationWhenBlockCorruption() { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, 1); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); FSDataOutputStream create = fs.Create(new Path("/test")); fs.SetReplication(new Path("/test"), (short)1); create.Write(new byte[1024]); create.Close(); IList <FilePath> nonParticipatedNodeDirs = new AList <FilePath>(); FilePath participatedNodeDirs = null; for (int i = 0; i < cluster.GetDataNodes().Count; i++) { FilePath storageDir = cluster.GetInstanceStorageDir(i, 0); string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); if (data_dir.ListFiles().Length == 0) { nonParticipatedNodeDirs.AddItem(data_dir); } else { participatedNodeDirs = data_dir; } } string blockFile = null; FilePath[] listFiles = participatedNodeDirs.ListFiles(); foreach (FilePath file in listFiles) { if (file.GetName().StartsWith(Block.BlockFilePrefix) && !file.GetName().EndsWith( "meta")) { blockFile = file.GetName(); foreach (FilePath file1 in nonParticipatedNodeDirs) { file1.Mkdirs(); new FilePath(file1, blockFile).CreateNewFile(); new FilePath(file1, blockFile + "_1000.meta").CreateNewFile(); } break; } } fs.SetReplication(new Path("/test"), (short)3); cluster.RestartDataNodes(); // Lets detect all DNs about dummy copied // blocks cluster.WaitActive(); cluster.TriggerBlockReports(); DFSTestUtil.WaitReplication(fs, new Path("/test"), (short)3); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>check if DFS can handle corrupted CRC blocks</summary> /// <exception cref="System.Exception"/> private void Thistest(Configuration conf, DFSTestUtil util) { MiniDFSCluster cluster = null; int numDataNodes = 2; short replFactor = 2; Random random = new Random(); // Set short retry timeouts so this test runs faster conf.SetInt(DFSConfigKeys.DfsClientRetryWindowBase, 10); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDataNodes).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); util.CreateFiles(fs, "/srcdat", replFactor); util.WaitReplication(fs, "/srcdat", (short)2); // Now deliberately remove/truncate meta blocks from the first // directory of the first datanode. The complete absense of a meta // file disallows this Datanode to send data to another datanode. // However, a client is alowed access to this block. // FilePath storageDir = cluster.GetInstanceStorageDir(0, 1); string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); NUnit.Framework.Assert.IsTrue("data directory does not exist", data_dir.Exists()); FilePath[] blocks = data_dir.ListFiles(); NUnit.Framework.Assert.IsTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.Length > 0)); int num = 0; for (int idx = 0; idx < blocks.Length; idx++) { if (blocks[idx].GetName().StartsWith(Block.BlockFilePrefix) && blocks[idx].GetName ().EndsWith(".meta")) { num++; if (num % 3 == 0) { // // remove .meta file // System.Console.Out.WriteLine("Deliberately removing file " + blocks[idx].GetName( )); NUnit.Framework.Assert.IsTrue("Cannot remove file.", blocks[idx].Delete()); } else { if (num % 3 == 1) { // // shorten .meta file // RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw"); FileChannel channel = file.GetChannel(); int newsize = random.Next((int)channel.Size() / 2); System.Console.Out.WriteLine("Deliberately truncating file " + blocks[idx].GetName () + " to size " + newsize + " bytes."); channel.Truncate(newsize); file.Close(); } else { // // corrupt a few bytes of the metafile // RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw"); FileChannel channel = file.GetChannel(); long position = 0; // // The very first time, corrupt the meta header at offset 0 // if (num != 2) { position = (long)random.Next((int)channel.Size()); } int length = random.Next((int)(channel.Size() - position + 1)); byte[] buffer = new byte[length]; random.NextBytes(buffer); channel.Write(ByteBuffer.Wrap(buffer), position); System.Console.Out.WriteLine("Deliberately corrupting file " + blocks[idx].GetName () + " at offset " + position + " length " + length); file.Close(); } } } } // // Now deliberately corrupt all meta blocks from the second // directory of the first datanode // storageDir = cluster.GetInstanceStorageDir(0, 1); data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); NUnit.Framework.Assert.IsTrue("data directory does not exist", data_dir.Exists()); blocks = data_dir.ListFiles(); NUnit.Framework.Assert.IsTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.Length > 0)); int count = 0; FilePath previous = null; for (int idx_1 = 0; idx_1 < blocks.Length; idx_1++) { if (blocks[idx_1].GetName().StartsWith("blk_") && blocks[idx_1].GetName().EndsWith (".meta")) { // // Move the previous metafile into the current one. // count++; if (count % 2 == 0) { System.Console.Out.WriteLine("Deliberately insertimg bad crc into files " + blocks [idx_1].GetName() + " " + previous.GetName()); NUnit.Framework.Assert.IsTrue("Cannot remove file.", blocks[idx_1].Delete()); NUnit.Framework.Assert.IsTrue("Cannot corrupt meta file.", previous.RenameTo(blocks [idx_1])); NUnit.Framework.Assert.IsTrue("Cannot recreate empty meta file.", previous.CreateNewFile ()); previous = null; } else { previous = blocks[idx_1]; } } } // // Only one replica is possibly corrupted. The other replica should still // be good. Verify. // NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly.", util.CheckFiles (fs, "/srcdat")); System.Console.Out.WriteLine("All File still have a valid replica"); // // set replication factor back to 1. This causes only one replica of // of each block to remain in HDFS. The check is to make sure that // the corrupted replica generated above is the one that gets deleted. // This test is currently disabled until HADOOP-1557 is solved. // util.SetReplication(fs, "/srcdat", (short)1); //util.waitReplication(fs, "/srcdat", (short)1); //System.out.println("All Files done with removing replicas"); //assertTrue("Excess replicas deleted. Corrupted replicas found.", // util.checkFiles(fs, "/srcdat")); System.Console.Out.WriteLine("The excess-corrupted-replica test is disabled " + " pending HADOOP-1557" ); util.Cleanup(fs, "/srcdat"); } finally { if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestProcesOverReplicateBlock() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, 100L); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(0, block)); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // remove block scanner log to trigger block scanning FilePath scanCursor = new FilePath(new FilePath(MiniDFSCluster.GetFinalizedDir(cluster .GetInstanceStorageDir(0, 0), cluster.GetNamesystem().GetBlockPoolId()).GetParent ()).GetParent(), "scanner.cursor"); //wait for one minute for deletion to succeed; for (int i = 0; !scanCursor.Delete(); i++) { NUnit.Framework.Assert.IsTrue("Could not delete " + scanCursor.GetAbsolutePath() + " in one minute", i < 60); try { Sharpen.Thread.Sleep(1000); } catch (Exception) { } } // restart the datanode so the corrupt replica will be detected cluster.RestartDataNode(dnProps); DFSTestUtil.WaitReplication(fs, fileName, (short)2); string blockPoolId = cluster.GetNamesystem().GetBlockPoolId(); DatanodeID corruptDataNode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes ()[2], blockPoolId); FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); try { namesystem.WriteLock(); lock (hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs string corruptMachineName = corruptDataNode.GetXferAddr(); foreach (DatanodeDescriptor datanode in hm.GetDatanodes()) { if (!corruptMachineName.Equals(datanode.GetXferAddr())) { datanode.GetStorageInfos()[0].SetUtilizationForTesting(100L, 100L, 0, 100L); datanode.UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(datanode ), 0L, 0L, 0, 0, null); } } // decrease the replication factor to 1; NameNodeAdapter.SetReplication(namesystem, fileName.ToString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost NUnit.Framework.Assert.AreEqual(1, bm.CountNodes(block.GetLocalBlock()).LiveReplicas ()); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
/// <summary>Test if NN.listCorruptFiles() returns the right number of results.</summary> /// <remarks> /// Test if NN.listCorruptFiles() returns the right number of results. /// The corrupt blocks are detected by the BlockPoolSliceScanner. /// Also, test that DFS.listCorruptFileBlocks can make multiple successive /// calls. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestMaxCorruptFiles() { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 3 * 1000); // datanode sends block reports cluster = new MiniDFSCluster.Builder(conf).Build(); FileSystem fs = cluster.GetFileSystem(); int maxCorruptFileBlocks = FSNamesystem.DefaultMaxCorruptFileblocksReturned; // create 110 files with one block each DFSTestUtil util = new DFSTestUtil.Builder().SetName("testMaxCorruptFiles").SetNumFiles (maxCorruptFileBlocks * 3).SetMaxLevels(1).SetMaxSize(512).Build(); util.CreateFiles(fs, "/srcdat2", (short)1); util.WaitReplication(fs, "/srcdat2", (short)1); // verify that there are no bad blocks. NameNode namenode = cluster.GetNameNode(); ICollection <FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.GetNamesystem( ).ListCorruptFileBlocks("/srcdat2", null); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " corrupt files. Expecting none." , badFiles.Count == 0); // Now deliberately blocks from all files string bpid = cluster.GetNamesystem().GetBlockPoolId(); for (int i = 0; i < 4; i++) { for (int j = 0; j <= 1; j++) { FilePath storageDir = cluster.GetInstanceStorageDir(i, j); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); Log.Info("Removing files from " + data_dir); IList <FilePath> metadataFiles = MiniDFSCluster.GetAllBlockMetadataFiles(data_dir); if (metadataFiles == null) { continue; } foreach (FilePath metadataFile in metadataFiles) { FilePath blockFile = Block.MetaToBlockFile(metadataFile); NUnit.Framework.Assert.IsTrue("Cannot remove file.", blockFile.Delete()); NUnit.Framework.Assert.IsTrue("Cannot remove file.", metadataFile.Delete()); } } } // Occasionally the BlockPoolSliceScanner can run before we have removed // the blocks. Restart the Datanode to trigger the scanner into running // once more. Log.Info("Restarting Datanode to trigger BlockPoolSliceScanner"); cluster.RestartDataNodes(); cluster.WaitActive(); badFiles = namenode.GetNamesystem().ListCorruptFileBlocks("/srcdat2", null); while (badFiles.Count < maxCorruptFileBlocks) { Log.Info("# of corrupt files is: " + badFiles.Count); Sharpen.Thread.Sleep(10000); badFiles = namenode.GetNamesystem().ListCorruptFileBlocks("/srcdat2", null); } badFiles = namenode.GetNamesystem().ListCorruptFileBlocks("/srcdat2", null); Log.Info("Namenode has bad files. " + badFiles.Count); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " bad files. Expecting " + maxCorruptFileBlocks + ".", badFiles.Count == maxCorruptFileBlocks); CorruptFileBlockIterator iter = (CorruptFileBlockIterator)fs.ListCorruptFileBlocks (new Path("/srcdat2")); int corruptPaths = CountPaths(iter); NUnit.Framework.Assert.IsTrue("Expected more than " + maxCorruptFileBlocks + " corrupt file blocks but got " + corruptPaths, corruptPaths > maxCorruptFileBlocks); NUnit.Framework.Assert.IsTrue("Iterator should have made more than 1 call but made " + iter.GetCallsMade(), iter.GetCallsMade() > 1); util.Cleanup(fs, "/srcdat2"); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>test listCorruptFileBlocks in DistributedFileSystem</summary> /// <exception cref="System.Exception"/> public virtual void TestlistCorruptFileBlocksDFS() { Configuration conf = new Configuration(); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000); conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanIntervalKey, 1); // datanode scans // directories FileSystem fs = null; MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); DistributedFileSystem dfs = (DistributedFileSystem)fs; DFSTestUtil util = new DFSTestUtil.Builder().SetName("testGetCorruptFiles").SetNumFiles (3).SetMaxLevels(1).SetMaxSize(1024).Build(); util.CreateFiles(fs, "/corruptData"); RemoteIterator <Path> corruptFileBlocks = dfs.ListCorruptFileBlocks(new Path("/corruptData" )); int numCorrupt = CountPaths(corruptFileBlocks); NUnit.Framework.Assert.IsTrue(numCorrupt == 0); // delete the blocks string bpid = cluster.GetNamesystem().GetBlockPoolId(); // For loop through number of datadirectories per datanode (2) for (int i = 0; i < 2; i++) { FilePath storageDir = cluster.GetInstanceStorageDir(0, i); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); IList <FilePath> metadataFiles = MiniDFSCluster.GetAllBlockMetadataFiles(data_dir); if (metadataFiles == null) { continue; } // assertTrue("Blocks do not exist in data-dir", (blocks != null) && // (blocks.length > 0)); foreach (FilePath metadataFile in metadataFiles) { FilePath blockFile = Block.MetaToBlockFile(metadataFile); Log.Info("Deliberately removing file " + blockFile.GetName()); NUnit.Framework.Assert.IsTrue("Cannot remove file.", blockFile.Delete()); Log.Info("Deliberately removing file " + metadataFile.GetName()); NUnit.Framework.Assert.IsTrue("Cannot remove file.", metadataFile.Delete()); } } // break; int count = 0; corruptFileBlocks = dfs.ListCorruptFileBlocks(new Path("/corruptData")); numCorrupt = CountPaths(corruptFileBlocks); while (numCorrupt < 3) { Sharpen.Thread.Sleep(1000); corruptFileBlocks = dfs.ListCorruptFileBlocks(new Path("/corruptData")); numCorrupt = CountPaths(corruptFileBlocks); count++; if (count > 30) { break; } } // Validate we get all the corrupt files Log.Info("Namenode has bad files. " + numCorrupt); NUnit.Framework.Assert.IsTrue(numCorrupt == 3); util.Cleanup(fs, "/corruptData"); util.Cleanup(fs, "/goodData"); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>check if nn.getCorruptFiles() returns a file that has corrupted blocks</summary> /// <exception cref="System.Exception"/> public virtual void TestListCorruptFilesCorruptedBlock() { MiniDFSCluster cluster = null; Random random = new Random(); try { Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanIntervalKey, 1); // datanode scans directories conf.SetInt(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 3 * 1000); // datanode sends block reports // Set short retry timeouts so this test runs faster conf.SetInt(DFSConfigKeys.DfsClientRetryWindowBase, 10); cluster = new MiniDFSCluster.Builder(conf).Build(); FileSystem fs = cluster.GetFileSystem(); // create two files with one block each DFSTestUtil util = new DFSTestUtil.Builder().SetName("testCorruptFilesCorruptedBlock" ).SetNumFiles(2).SetMaxLevels(1).SetMaxSize(512).Build(); util.CreateFiles(fs, "/srcdat10"); // fetch bad file list from namenode. There should be none. NameNode namenode = cluster.GetNameNode(); ICollection <FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.GetNamesystem( ).ListCorruptFileBlocks("/", null); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " corrupt files. Expecting None." , badFiles.Count == 0); // Now deliberately corrupt one block string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath storageDir = cluster.GetInstanceStorageDir(0, 1); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); NUnit.Framework.Assert.IsTrue("data directory does not exist", data_dir.Exists()); IList <FilePath> metaFiles = MiniDFSCluster.GetAllBlockMetadataFiles(data_dir); NUnit.Framework.Assert.IsTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.IsEmpty()); FilePath metaFile = metaFiles[0]; RandomAccessFile file = new RandomAccessFile(metaFile, "rw"); FileChannel channel = file.GetChannel(); long position = channel.Size() - 2; int length = 2; byte[] buffer = new byte[length]; random.NextBytes(buffer); channel.Write(ByteBuffer.Wrap(buffer), position); file.Close(); Log.Info("Deliberately corrupting file " + metaFile.GetName() + " at offset " + position + " length " + length); // read all files to trigger detection of corrupted replica try { util.CheckFiles(fs, "/srcdat10"); } catch (BlockMissingException) { System.Console.Out.WriteLine("Received BlockMissingException as expected."); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. badFiles = namenode.GetNamesystem().ListCorruptFileBlocks("/", null); Log.Info("Namenode has bad files. " + badFiles.Count); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " bad files. Expecting 1." , badFiles.Count == 1); util.Cleanup(fs, "/srcdat10"); } finally { if (cluster != null) { cluster.Shutdown(); } } }
// deliberately remove blocks from a file and validate the list-corrupt-file-blocks API /// <exception cref="System.Exception"/> public virtual void TestlistCorruptFileBlocks() { Configuration conf = new Configuration(); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000); conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanIntervalKey, 1); // datanode scans // directories FileSystem fs = null; MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); DFSTestUtil util = new DFSTestUtil.Builder().SetName("testGetCorruptFiles").SetNumFiles (3).SetMaxLevels(1).SetMaxSize(1024).Build(); util.CreateFiles(fs, "/corruptData"); NameNode namenode = cluster.GetNameNode(); ICollection <FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode.GetNamesystem ().ListCorruptFileBlocks("/corruptData", null); int numCorrupt = corruptFileBlocks.Count; NUnit.Framework.Assert.IsTrue(numCorrupt == 0); // delete the blocks string bpid = cluster.GetNamesystem().GetBlockPoolId(); for (int i = 0; i < 4; i++) { for (int j = 0; j <= 1; j++) { FilePath storageDir = cluster.GetInstanceStorageDir(i, j); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); IList <FilePath> metadataFiles = MiniDFSCluster.GetAllBlockMetadataFiles(data_dir); if (metadataFiles == null) { continue; } // assertTrue("Blocks do not exist in data-dir", (blocks != null) && // (blocks.length > 0)); foreach (FilePath metadataFile in metadataFiles) { FilePath blockFile = Block.MetaToBlockFile(metadataFile); Log.Info("Deliberately removing file " + blockFile.GetName()); NUnit.Framework.Assert.IsTrue("Cannot remove file.", blockFile.Delete()); Log.Info("Deliberately removing file " + metadataFile.GetName()); NUnit.Framework.Assert.IsTrue("Cannot remove file.", metadataFile.Delete()); } } } // break; int count = 0; corruptFileBlocks = namenode.GetNamesystem().ListCorruptFileBlocks("/corruptData" , null); numCorrupt = corruptFileBlocks.Count; while (numCorrupt < 3) { Sharpen.Thread.Sleep(1000); corruptFileBlocks = namenode.GetNamesystem().ListCorruptFileBlocks("/corruptData" , null); numCorrupt = corruptFileBlocks.Count; count++; if (count > 30) { break; } } // Validate we get all the corrupt files Log.Info("Namenode has bad files. " + numCorrupt); NUnit.Framework.Assert.IsTrue(numCorrupt == 3); // test the paging here FSNamesystem.CorruptFileBlockInfo[] cfb = Sharpen.Collections.ToArray(corruptFileBlocks , new FSNamesystem.CorruptFileBlockInfo[0]); // now get the 2nd and 3rd file that is corrupt string[] cookie = new string[] { "1" }; ICollection <FSNamesystem.CorruptFileBlockInfo> nextCorruptFileBlocks = namenode.GetNamesystem ().ListCorruptFileBlocks("/corruptData", cookie); FSNamesystem.CorruptFileBlockInfo[] ncfb = Sharpen.Collections.ToArray(nextCorruptFileBlocks , new FSNamesystem.CorruptFileBlockInfo[0]); numCorrupt = nextCorruptFileBlocks.Count; NUnit.Framework.Assert.IsTrue(numCorrupt == 2); NUnit.Framework.Assert.IsTrue(Sharpen.Runtime.EqualsIgnoreCase(ncfb[0].block.GetBlockName (), cfb[1].block.GetBlockName())); corruptFileBlocks = namenode.GetNamesystem().ListCorruptFileBlocks("/corruptData" , cookie); numCorrupt = corruptFileBlocks.Count; NUnit.Framework.Assert.IsTrue(numCorrupt == 0); // Do a listing on a dir which doesn't have any corrupt blocks and // validate util.CreateFiles(fs, "/goodData"); corruptFileBlocks = namenode.GetNamesystem().ListCorruptFileBlocks("/goodData", null ); numCorrupt = corruptFileBlocks.Count; NUnit.Framework.Assert.IsTrue(numCorrupt == 0); util.Cleanup(fs, "/corruptData"); util.Cleanup(fs, "/goodData"); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary>Check that listCorruptFileBlocks works while the namenode is still in safemode. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestListCorruptFileBlocksInSafeMode() { MiniDFSCluster cluster = null; Random random = new Random(); try { Configuration conf = new HdfsConfiguration(); // datanode scans directories conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanIntervalKey, 1); // datanode sends block reports conf.SetInt(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 3 * 1000); // never leave safemode automatically conf.SetFloat(DFSConfigKeys.DfsNamenodeSafemodeThresholdPctKey, 1.5f); // start populating repl queues immediately conf.SetFloat(DFSConfigKeys.DfsNamenodeReplQueueThresholdPctKey, 0f); // Set short retry timeouts so this test runs faster conf.SetInt(DFSConfigKeys.DfsClientRetryWindowBase, 10); cluster = new MiniDFSCluster.Builder(conf).WaitSafeMode(false).Build(); cluster.GetNameNodeRpc().SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave, false); FileSystem fs = cluster.GetFileSystem(); // create two files with one block each DFSTestUtil util = new DFSTestUtil.Builder().SetName("testListCorruptFileBlocksInSafeMode" ).SetNumFiles(2).SetMaxLevels(1).SetMaxSize(512).Build(); util.CreateFiles(fs, "/srcdat10"); // fetch bad file list from namenode. There should be none. ICollection <FSNamesystem.CorruptFileBlockInfo> badFiles = cluster.GetNameNode().GetNamesystem ().ListCorruptFileBlocks("/", null); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " corrupt files. Expecting None." , badFiles.Count == 0); // Now deliberately corrupt one block FilePath storageDir = cluster.GetInstanceStorageDir(0, 0); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, cluster.GetNamesystem ().GetBlockPoolId()); NUnit.Framework.Assert.IsTrue("data directory does not exist", data_dir.Exists()); IList <FilePath> metaFiles = MiniDFSCluster.GetAllBlockMetadataFiles(data_dir); NUnit.Framework.Assert.IsTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.IsEmpty()); FilePath metaFile = metaFiles[0]; RandomAccessFile file = new RandomAccessFile(metaFile, "rw"); FileChannel channel = file.GetChannel(); long position = channel.Size() - 2; int length = 2; byte[] buffer = new byte[length]; random.NextBytes(buffer); channel.Write(ByteBuffer.Wrap(buffer), position); file.Close(); Log.Info("Deliberately corrupting file " + metaFile.GetName() + " at offset " + position + " length " + length); // read all files to trigger detection of corrupted replica try { util.CheckFiles(fs, "/srcdat10"); } catch (BlockMissingException) { System.Console.Out.WriteLine("Received BlockMissingException as expected."); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. badFiles = cluster.GetNameNode().GetNamesystem().ListCorruptFileBlocks("/", null); Log.Info("Namenode has bad files. " + badFiles.Count); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " bad files. Expecting 1." , badFiles.Count == 1); // restart namenode cluster.RestartNameNode(0); fs = cluster.GetFileSystem(); // wait until replication queues have been initialized while (!cluster.GetNameNode().namesystem.IsPopulatingReplQueues()) { try { Log.Info("waiting for replication queues"); Sharpen.Thread.Sleep(1000); } catch (Exception) { } } // read all files to trigger detection of corrupted replica try { util.CheckFiles(fs, "/srcdat10"); } catch (BlockMissingException) { System.Console.Out.WriteLine("Received BlockMissingException as expected."); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. badFiles = cluster.GetNameNode().GetNamesystem().ListCorruptFileBlocks("/", null); Log.Info("Namenode has bad files. " + badFiles.Count); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " bad files. Expecting 1." , badFiles.Count == 1); // check that we are still in safe mode NUnit.Framework.Assert.IsTrue("Namenode is not in safe mode", cluster.GetNameNode ().IsInSafeMode()); // now leave safe mode so that we can clean up cluster.GetNameNodeRpc().SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave, false); util.Cleanup(fs, "/srcdat10"); } catch (Exception e) { Log.Error(StringUtils.StringifyException(e)); throw; } finally { if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestDeleteBlockPool() { // Start cluster with a 2 NN and 2 DN Configuration conf = new Configuration(); MiniDFSCluster cluster = null; try { conf.Set(DFSConfigKeys.DfsNameservices, "namesServerId1,namesServerId2"); cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleFederatedTopology (conf.Get(DFSConfigKeys.DfsNameservices))).NumDataNodes(2).Build(); cluster.WaitActive(); FileSystem fs1 = cluster.GetFileSystem(0); FileSystem fs2 = cluster.GetFileSystem(1); DFSTestUtil.CreateFile(fs1, new Path("/alpha"), 1024, (short)2, 54); DFSTestUtil.CreateFile(fs2, new Path("/beta"), 1024, (short)2, 54); DataNode dn1 = cluster.GetDataNodes()[0]; DataNode dn2 = cluster.GetDataNodes()[1]; string bpid1 = cluster.GetNamesystem(0).GetBlockPoolId(); string bpid2 = cluster.GetNamesystem(1).GetBlockPoolId(); FilePath dn1StorageDir1 = cluster.GetInstanceStorageDir(0, 0); FilePath dn1StorageDir2 = cluster.GetInstanceStorageDir(0, 1); FilePath dn2StorageDir1 = cluster.GetInstanceStorageDir(1, 0); FilePath dn2StorageDir2 = cluster.GetInstanceStorageDir(1, 1); // Although namenode is shutdown, the bp offerservice is still running try { dn1.DeleteBlockPool(bpid1, true); NUnit.Framework.Assert.Fail("Must not delete a running block pool"); } catch (IOException) { } Configuration nn1Conf = cluster.GetConfiguration(1); nn1Conf.Set(DFSConfigKeys.DfsNameservices, "namesServerId2"); dn1.RefreshNamenodes(nn1Conf); NUnit.Framework.Assert.AreEqual(1, dn1.GetAllBpOs().Length); try { dn1.DeleteBlockPool(bpid1, false); NUnit.Framework.Assert.Fail("Must not delete if any block files exist unless " + "force is true"); } catch (IOException) { } VerifyBlockPoolDirectories(true, dn1StorageDir1, bpid1); VerifyBlockPoolDirectories(true, dn1StorageDir2, bpid1); dn1.DeleteBlockPool(bpid1, true); VerifyBlockPoolDirectories(false, dn1StorageDir1, bpid1); VerifyBlockPoolDirectories(false, dn1StorageDir2, bpid1); fs1.Delete(new Path("/alpha"), true); // Wait till all blocks are deleted from the dn2 for bpid1. FilePath finalDir1 = MiniDFSCluster.GetFinalizedDir(dn2StorageDir1, bpid1); FilePath finalDir2 = MiniDFSCluster.GetFinalizedDir(dn2StorageDir1, bpid2); while ((!DatanodeUtil.DirNoFilesRecursive(finalDir1)) || (!DatanodeUtil.DirNoFilesRecursive (finalDir2))) { try { Sharpen.Thread.Sleep(3000); } catch (Exception) { } } cluster.ShutdownNameNode(0); // Although namenode is shutdown, the bp offerservice is still running // on dn2 try { dn2.DeleteBlockPool(bpid1, true); NUnit.Framework.Assert.Fail("Must not delete a running block pool"); } catch (IOException) { } dn2.RefreshNamenodes(nn1Conf); NUnit.Framework.Assert.AreEqual(1, dn2.GetAllBpOs().Length); VerifyBlockPoolDirectories(true, dn2StorageDir1, bpid1); VerifyBlockPoolDirectories(true, dn2StorageDir2, bpid1); // Now deleteBlockPool must succeed with force as false, because no // blocks exist for bpid1 and bpOfferService is also stopped for bpid1. dn2.DeleteBlockPool(bpid1, false); VerifyBlockPoolDirectories(false, dn2StorageDir1, bpid1); VerifyBlockPoolDirectories(false, dn2StorageDir2, bpid1); //bpid2 must not be impacted VerifyBlockPoolDirectories(true, dn1StorageDir1, bpid2); VerifyBlockPoolDirectories(true, dn1StorageDir2, bpid2); VerifyBlockPoolDirectories(true, dn2StorageDir1, bpid2); VerifyBlockPoolDirectories(true, dn2StorageDir2, bpid2); //make sure second block pool is running all fine Path gammaFile = new Path("/gamma"); DFSTestUtil.CreateFile(fs2, gammaFile, 1024, (short)1, 55); fs2.SetReplication(gammaFile, (short)2); DFSTestUtil.WaitReplication(fs2, gammaFile, (short)2); } finally { if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestVolumeFailure() { System.Console.Out.WriteLine("Data dir: is " + dataDir.GetPath()); // Data dir structure is dataDir/data[1-4]/[current,tmp...] // data1,2 is for datanode 1, data2,3 - datanode2 string filename = "/test.txt"; Path filePath = new Path(filename); // we use only small number of blocks to avoid creating subdirs in the data dir.. int filesize = block_size * blocks_num; DFSTestUtil.CreateFile(fs, filePath, filesize, repl, 1L); DFSTestUtil.WaitReplication(fs, filePath, repl); System.Console.Out.WriteLine("file " + filename + "(size " + filesize + ") is created and replicated" ); // fail the volume // delete/make non-writable one of the directories (failed volume) data_fail = new FilePath(dataDir, "data3"); failedDir = MiniDFSCluster.GetFinalizedDir(dataDir, cluster.GetNamesystem().GetBlockPoolId ()); if (failedDir.Exists() && !DeteteBlocks(failedDir)) { //!FileUtil.fullyDelete(failedDir) throw new IOException("Could not delete hdfs directory '" + failedDir + "'"); } data_fail.SetReadOnly(); failedDir.SetReadOnly(); System.Console.Out.WriteLine("Deleteing " + failedDir.GetPath() + "; exist=" + failedDir .Exists()); // access all the blocks on the "failed" DataNode, // we need to make sure that the "failed" volume is being accessed - // and that will cause failure, blocks removal, "emergency" block report TriggerFailure(filename, filesize); // make sure a block report is sent DataNode dn = cluster.GetDataNodes()[1]; //corresponds to dir data3 string bpid = cluster.GetNamesystem().GetBlockPoolId(); DatanodeRegistration dnR = dn.GetDNRegistrationForBP(bpid); IDictionary <DatanodeStorage, BlockListAsLongs> perVolumeBlockLists = dn.GetFSDataset ().GetBlockReports(bpid); // Send block report StorageBlockReport[] reports = new StorageBlockReport[perVolumeBlockLists.Count]; int reportIndex = 0; foreach (KeyValuePair <DatanodeStorage, BlockListAsLongs> kvPair in perVolumeBlockLists) { DatanodeStorage dnStorage = kvPair.Key; BlockListAsLongs blockList = kvPair.Value; reports[reportIndex++] = new StorageBlockReport(dnStorage, blockList); } cluster.GetNameNodeRpc().BlockReport(dnR, bpid, reports, null); // verify number of blocks and files... Verify(filename, filesize); // create another file (with one volume failed). System.Console.Out.WriteLine("creating file test1.txt"); Path fileName1 = new Path("/test1.txt"); DFSTestUtil.CreateFile(fs, fileName1, filesize, repl, 1L); // should be able to replicate to both nodes (2 DN, repl=2) DFSTestUtil.WaitReplication(fs, fileName1, repl); System.Console.Out.WriteLine("file " + fileName1.GetName() + " is created and replicated" ); }