/// <summary>Check that listCorruptFileBlocks works while the namenode is still in safemode. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestListCorruptFileBlocksInSafeMode() { MiniDFSCluster cluster = null; Random random = new Random(); try { Configuration conf = new HdfsConfiguration(); // datanode scans directories conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanIntervalKey, 1); // datanode sends block reports conf.SetInt(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 3 * 1000); // never leave safemode automatically conf.SetFloat(DFSConfigKeys.DfsNamenodeSafemodeThresholdPctKey, 1.5f); // start populating repl queues immediately conf.SetFloat(DFSConfigKeys.DfsNamenodeReplQueueThresholdPctKey, 0f); // Set short retry timeouts so this test runs faster conf.SetInt(DFSConfigKeys.DfsClientRetryWindowBase, 10); cluster = new MiniDFSCluster.Builder(conf).WaitSafeMode(false).Build(); cluster.GetNameNodeRpc().SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave, false); FileSystem fs = cluster.GetFileSystem(); // create two files with one block each DFSTestUtil util = new DFSTestUtil.Builder().SetName("testListCorruptFileBlocksInSafeMode" ).SetNumFiles(2).SetMaxLevels(1).SetMaxSize(512).Build(); util.CreateFiles(fs, "/srcdat10"); // fetch bad file list from namenode. There should be none. ICollection <FSNamesystem.CorruptFileBlockInfo> badFiles = cluster.GetNameNode().GetNamesystem ().ListCorruptFileBlocks("/", null); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " corrupt files. Expecting None." , badFiles.Count == 0); // Now deliberately corrupt one block FilePath storageDir = cluster.GetInstanceStorageDir(0, 0); FilePath data_dir = MiniDFSCluster.GetFinalizedDir(storageDir, cluster.GetNamesystem ().GetBlockPoolId()); NUnit.Framework.Assert.IsTrue("data directory does not exist", data_dir.Exists()); IList <FilePath> metaFiles = MiniDFSCluster.GetAllBlockMetadataFiles(data_dir); NUnit.Framework.Assert.IsTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.IsEmpty()); FilePath metaFile = metaFiles[0]; RandomAccessFile file = new RandomAccessFile(metaFile, "rw"); FileChannel channel = file.GetChannel(); long position = channel.Size() - 2; int length = 2; byte[] buffer = new byte[length]; random.NextBytes(buffer); channel.Write(ByteBuffer.Wrap(buffer), position); file.Close(); Log.Info("Deliberately corrupting file " + metaFile.GetName() + " at offset " + position + " length " + length); // read all files to trigger detection of corrupted replica try { util.CheckFiles(fs, "/srcdat10"); } catch (BlockMissingException) { System.Console.Out.WriteLine("Received BlockMissingException as expected."); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. badFiles = cluster.GetNameNode().GetNamesystem().ListCorruptFileBlocks("/", null); Log.Info("Namenode has bad files. " + badFiles.Count); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " bad files. Expecting 1." , badFiles.Count == 1); // restart namenode cluster.RestartNameNode(0); fs = cluster.GetFileSystem(); // wait until replication queues have been initialized while (!cluster.GetNameNode().namesystem.IsPopulatingReplQueues()) { try { Log.Info("waiting for replication queues"); Sharpen.Thread.Sleep(1000); } catch (Exception) { } } // read all files to trigger detection of corrupted replica try { util.CheckFiles(fs, "/srcdat10"); } catch (BlockMissingException) { System.Console.Out.WriteLine("Received BlockMissingException as expected."); } catch (IOException e) { NUnit.Framework.Assert.IsTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. badFiles = cluster.GetNameNode().GetNamesystem().ListCorruptFileBlocks("/", null); Log.Info("Namenode has bad files. " + badFiles.Count); NUnit.Framework.Assert.IsTrue("Namenode has " + badFiles.Count + " bad files. Expecting 1." , badFiles.Count == 1); // check that we are still in safe mode NUnit.Framework.Assert.IsTrue("Namenode is not in safe mode", cluster.GetNameNode ().IsInSafeMode()); // now leave safe mode so that we can clean up cluster.GetNameNodeRpc().SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave, false); util.Cleanup(fs, "/srcdat10"); } catch (Exception e) { Log.Error(StringUtils.StringifyException(e)); throw; } finally { if (cluster != null) { cluster.Shutdown(); } } }