/// <summary>Test that a full block report is sent after hot swapping volumes</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.Conf.ReconfigurationException"/> public virtual void TestFullBlockReportAfterRemovingVolumes() { Configuration conf = new Configuration(); conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, BlockSize); // Similar to TestTriggerBlockReport, set a really long value for // dfs.heartbeat.interval, so that incremental block reports and heartbeats // won't be sent during this test unless they're triggered // manually. conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 10800000L); conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1080L); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build(); cluster.WaitActive(); DataNode dn = cluster.GetDataNodes()[0]; DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(dn, cluster.GetNameNode()); // Remove a data dir from datanode FilePath dataDirToKeep = new FilePath(cluster.GetDataDirectory(), "data1"); dn.ReconfigurePropertyImpl(DFSConfigKeys.DfsDatanodeDataDirKey, dataDirToKeep.ToString ()); // We should get 1 full report Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(1)).BlockReport (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport []>(), Matchers.Any <BlockReportContext>()); }
public virtual void TestStorageReportHasStorageTypeAndState() { // Make sure we are not testing with the default type, that would not // be a very good test. NUnit.Framework.Assert.AreNotSame(storageType, StorageType.Default); NameNode nn = cluster.GetNameNode(); DataNode dn = cluster.GetDataNodes()[0]; // Insert a spy object for the NN RPC. DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(dn , nn); // Trigger a heartbeat so there is an interaction with the spy // object. DataNodeTestUtils.TriggerHeartbeat(dn); // Verify that the callback passed in the expected parameters. ArgumentCaptor <StorageReport[]> captor = ArgumentCaptor.ForClass <StorageReport[]> (); Org.Mockito.Mockito.Verify(nnSpy).SendHeartbeat(Matchers.Any <DatanodeRegistration >(), captor.Capture(), Matchers.AnyLong(), Matchers.AnyLong(), Matchers.AnyInt() , Matchers.AnyInt(), Matchers.AnyInt(), Org.Mockito.Mockito.Any <VolumeFailureSummary >()); StorageReport[] reports = captor.GetValue(); foreach (StorageReport report in reports) { Assert.AssertThat(report.GetStorage().GetStorageType(), IS.Is(storageType)); Assert.AssertThat(report.GetStorage().GetState(), IS.Is(DatanodeStorage.State.Normal )); } }
/// <summary> /// Verify that /// <see cref="DataNode#checkDiskErrors()"/> /// removes all metadata in /// DataNode upon a volume failure. Thus we can run reconfig on the same /// configuration to reload the new volume on the same directory as the failed one. /// </summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.TimeoutException"/> /// <exception cref="System.Exception"/> /// <exception cref="Org.Apache.Hadoop.Conf.ReconfigurationException"/> public virtual void TestDirectlyReloadAfterCheckDiskError() { StartDFSCluster(1, 2); CreateFile(new Path("/test"), 32, (short)2); DataNode dn = cluster.GetDataNodes()[0]; string oldDataDir = dn.GetConf().Get(DFSConfigKeys.DfsDatanodeDataDirKey); FilePath dirToFail = new FilePath(cluster.GetDataDirectory(), "data1"); FsVolumeImpl failedVolume = GetVolume(dn, dirToFail); NUnit.Framework.Assert.IsTrue("No FsVolume was found for " + dirToFail, failedVolume != null); long used = failedVolume.GetDfsUsed(); DataNodeTestUtils.InjectDataDirFailure(dirToFail); // Call and wait DataNode to detect disk failure. long lastDiskErrorCheck = dn.GetLastDiskErrorCheck(); dn.CheckDiskErrorAsync(); while (dn.GetLastDiskErrorCheck() == lastDiskErrorCheck) { Sharpen.Thread.Sleep(100); } CreateFile(new Path("/test1"), 32, (short)2); NUnit.Framework.Assert.AreEqual(used, failedVolume.GetDfsUsed()); DataNodeTestUtils.RestoreDataDirFromFailure(dirToFail); dn.ReconfigurePropertyImpl(DFSConfigKeys.DfsDatanodeDataDirKey, oldDataDir); CreateFile(new Path("/test2"), 32, (short)2); FsVolumeImpl restoredVolume = GetVolume(dn, dirToFail); NUnit.Framework.Assert.IsTrue(restoredVolume != null); NUnit.Framework.Assert.IsTrue(restoredVolume != failedVolume); // More data has been written to this volume. NUnit.Framework.Assert.IsTrue(restoredVolume.GetDfsUsed() > used); }
public virtual void TestUnderReplicationAfterVolFailure() { // This test relies on denying access to data volumes to simulate data volume // failure. This doesn't work on Windows, because an owner of an object // always has the ability to read and change permissions on the object. Assume.AssumeTrue(!Path.Windows); // Bring up one more datanode cluster.StartDataNodes(conf, 1, true, null, null); cluster.WaitActive(); BlockManager bm = cluster.GetNamesystem().GetBlockManager(); Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)3); // Fail the first volume on both datanodes FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1)); FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1)); DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1); Path file2 = new Path("/test2"); DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file2, (short)3); // underReplicatedBlocks are due to failed volumes int underReplicatedBlocks = BlockManagerTestUtil.CheckHeartbeatAndGetUnderReplicatedBlocksCount (cluster.GetNamesystem(), bm); NUnit.Framework.Assert.IsTrue("There is no under replicated block after volume failure" , underReplicatedBlocks > 0); }
public virtual void TestConfigureMinValidVolumes() { Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows")); // Bring up two additional datanodes that need both of their volumes // functioning in order to stay up. conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 0); cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager (); long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm); long dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0); // Fail a volume on the 2nd DN FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1)); DataNodeTestUtils.InjectDataDirFailure(dn2Vol1); // Should only get two replicas (the first DN and the 3rd) Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)2); // Check that this single failure caused a DN to die. DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 0, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); // If we restore the volume we should still only be able to get // two replicas since the DN is still considered dead. DataNodeTestUtils.RestoreDataDirFromFailure(dn2Vol1); Path file2 = new Path("/test2"); DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file2, (short)2); }
/// <summary>Ensure that a delayed IBR is generated for a block deleted on the DN.</summary> /// <exception cref="System.Exception"/> /// <exception cref="System.IO.IOException"/> public virtual void TestReportBlockDeleted() { try { // Trigger a block report to reset the IBR timer. DataNodeTestUtils.TriggerBlockReport(singletonDn); // Spy on calls from the DN to the NN DatanodeProtocolClientSideTranslatorPB nnSpy = SpyOnDnCallsToNn(); InjectBlockDeleted(); // Sleep for a very short time since IBR is generated // asynchronously. Sharpen.Thread.Sleep(2000); // Ensure that no block report was generated immediately. // Deleted blocks are reported when the IBR timer elapses. Org.Mockito.Mockito.Verify(nnSpy, Org.Mockito.Mockito.Times(0)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); // Trigger a block report, this also triggers an IBR. DataNodeTestUtils.TriggerBlockReport(singletonDn); Sharpen.Thread.Sleep(2000); // Ensure that the deleted block is reported. Org.Mockito.Mockito.Verify(nnSpy, Org.Mockito.Mockito.Times(1)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); } finally { cluster.Shutdown(); cluster = null; } }
// return the initial state of the configuration /// <summary> /// Test for the case where one of the DNs in the pipeline is in the /// process of doing a block report exactly when the block is closed. /// </summary> /// <remarks> /// Test for the case where one of the DNs in the pipeline is in the /// process of doing a block report exactly when the block is closed. /// In this case, the block report becomes delayed until after the /// block is marked completed on the NN, and hence it reports an RBW /// replica for a COMPLETE block. Such a report should not be marked /// corrupt. /// This is a regression test for HDFS-2791. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestOneReplicaRbwReportArrivesAfterBlockCompleted() { CountDownLatch brFinished = new CountDownLatch(1); GenericTestUtils.DelayAnswer delayer = new _DelayAnswer_579(brFinished, Log); // inform the test that our block report went through. string MethodName = GenericTestUtils.GetMethodName(); Path filePath = new Path("/" + MethodName + ".dat"); // Start a second DN for this test -- we're checking // what happens when one of the DNs is slowed for some reason. ReplFactor = 2; StartDNandWait(null, false); NameNode nn = cluster.GetNameNode(); FSDataOutputStream @out = fs.Create(filePath, ReplFactor); try { AppendTestUtil.Write(@out, 0, 10); @out.Hflush(); // Set up a spy so that we can delay the block report coming // from this node. DataNode dn = cluster.GetDataNodes()[0]; DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(dn, nn); Org.Mockito.Mockito.DoAnswer(delayer).When(spy).BlockReport(Org.Mockito.Mockito.AnyObject <DatanodeRegistration>(), Org.Mockito.Mockito.AnyString(), Org.Mockito.Mockito.AnyObject <StorageBlockReport[]>(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); // Force a block report to be generated. The block report will have // an RBW replica in it. Wait for the RPC to be sent, but block // it before it gets to the NN. dn.ScheduleAllBlockReport(0); delayer.WaitForCall(); } finally { IOUtils.CloseStream(@out); } // Now that the stream is closed, the NN will have the block in COMPLETE // state. delayer.Proceed(); brFinished.Await(); // Verify that no replicas are marked corrupt, and that the // file is still readable. BlockManagerTestUtil.UpdateState(nn.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn.GetNamesystem().GetCorruptReplicaBlocks()); DFSTestUtil.ReadFile(fs, filePath); // Ensure that the file is readable even from the DN that we futzed with. cluster.StopDataNode(1); DFSTestUtil.ReadFile(fs, filePath); }
public virtual void TestMultipleVolFailuresOnNode() { // Reinitialize the cluster, configured with 4 storage locations per DataNode // and tolerating up to 2 failures. TearDown(); InitCluster(3, 4, 2); // Calculate the total capacity of all the datanodes. Sleep for three seconds // to be sure the datanodes have had a chance to heartbeat their capacities. Sharpen.Thread.Sleep(WaitForHeartbeats); DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager (); long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm); long dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0); FilePath dn1Vol1 = new FilePath(dataDir, "data" + (4 * 0 + 1)); FilePath dn1Vol2 = new FilePath(dataDir, "data" + (4 * 0 + 2)); FilePath dn2Vol1 = new FilePath(dataDir, "data" + (4 * 1 + 1)); FilePath dn2Vol2 = new FilePath(dataDir, "data" + (4 * 1 + 2)); // Make the first two volume directories on the first two datanodes // non-accessible. DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn1Vol2, dn2Vol1, dn2Vol2); // Create file1 and wait for 3 replicas (ie all DNs can still store a block). // Then assert that all DNs are up, despite the volume failures. Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)3); AList <DataNode> dns = cluster.GetDataNodes(); NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp()); NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp()); NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp()); CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath ()); CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath ()); CheckFailuresAtDataNode(dns[2], 0, true); // Ensure we wait a sufficient amount of time System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath); // Eventually the NN should report four volume failures DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 4, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 4); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath ()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath ()); CheckFailuresAtNameNode(dm, dns[2], true); }
public virtual void SetUp() { conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsNamenodePathBasedCacheRefreshIntervalMs, 100); conf.SetLong(DFSConfigKeys.DfsCachereportIntervalMsecKey, 500); conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, BlockSize); conf.SetLong(DFSConfigKeys.DfsDatanodeMaxLockedMemoryKey, CacheCapacity); conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1); prevCacheManipulator = NativeIO.POSIX.GetCacheManipulator(); NativeIO.POSIX.SetCacheManipulator(new NativeIO.POSIX.NoMlockCacheManipulator()); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); nn = cluster.GetNameNode(); fsImage = nn.GetFSImage(); dn = cluster.GetDataNodes()[0]; fsd = dn.GetFSDataset(); spyNN = DataNodeTestUtils.SpyOnBposToNN(dn, nn); }
public virtual void TestValidVolumesAtStartup() { Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows")); // Make sure no DNs are running. cluster.ShutdownDataNodes(); // Bring up a datanode with two default data dirs, but with one bad one. conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 1); // We use subdirectories 0 and 1 in order to have only a single // data dir's parent inject a failure. FilePath tld = new FilePath(MiniDFSCluster.GetBaseDirectory(), "badData"); FilePath dataDir1 = new FilePath(tld, "data1"); FilePath dataDir1Actual = new FilePath(dataDir1, "1"); dataDir1Actual.Mkdirs(); // Force an IOE to occur on one of the dfs.data.dir. FilePath dataDir2 = new FilePath(tld, "data2"); PrepareDirToFail(dataDir2); FilePath dataDir2Actual = new FilePath(dataDir2, "2"); // Start one DN, with manually managed DN dir conf.Set(DFSConfigKeys.DfsDatanodeDataDirKey, dataDir1Actual.GetPath() + "," + dataDir2Actual .GetPath()); cluster.StartDataNodes(conf, 1, false, null, null); cluster.WaitActive(); try { NUnit.Framework.Assert.IsTrue("The DN should have started up fine.", cluster.IsDataNodeUp ()); DataNode dn = cluster.GetDataNodes()[0]; string si = DataNodeTestUtils.GetFSDataset(dn).GetStorageInfo(); NUnit.Framework.Assert.IsTrue("The DN should have started with this directory", si .Contains(dataDir1Actual.GetPath())); NUnit.Framework.Assert.IsFalse("The DN shouldn't have a bad directory.", si.Contains (dataDir2Actual.GetPath())); } finally { cluster.ShutdownDataNodes(); FileUtil.Chmod(dataDir2.ToString(), "755"); } }
/// <summary> /// Verify that the DataNode sends a single incremental block report for all /// storages. /// </summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestDataNodeDoesNotSplitReports() { LocatedBlocks blocks = CreateFileGetBlocks(GenericTestUtils.GetMethodName()); Assert.AssertThat(cluster.GetDataNodes().Count, IS.Is(1)); // Remove all blocks from the DataNode. foreach (LocatedBlock block in blocks.GetLocatedBlocks()) { dn0.NotifyNamenodeDeletedBlock(block.GetBlock(), block.GetStorageIDs()[0]); } Log.Info("Triggering report after deleting blocks"); long ops = MetricsAsserts.GetLongCounter("BlockReceivedAndDeletedOps", MetricsAsserts.GetMetrics (NnMetrics)); // Trigger a report to the NameNode and give it a few seconds. DataNodeTestUtils.TriggerBlockReport(dn0); Sharpen.Thread.Sleep(5000); // Ensure that NameNodeRpcServer.blockReceivedAndDeletes is invoked // exactly once after we triggered the report. MetricsAsserts.AssertCounter("BlockReceivedAndDeletedOps", ops + 1, MetricsAsserts.GetMetrics (NnMetrics)); }
/// <exception cref="System.Exception"/> public virtual void TestDeleteBlockOnTransientStorage() { cluster = new MiniDFSCluster.Builder(Conf).StorageTypes(new StorageType[] { StorageType .RamDisk, StorageType.Default }).NumDataNodes(1).Build(); try { cluster.WaitActive(); bpid = cluster.GetNamesystem().GetBlockPoolId(); DataNode dataNode = cluster.GetDataNodes()[0]; fds = DataNodeTestUtils.GetFSDataset(cluster.GetDataNodes()[0]); client = cluster.GetFileSystem().GetClient(); scanner = new DirectoryScanner(dataNode, fds, Conf); scanner.SetRetainDiffs(true); FsDatasetTestUtil.StopLazyWriter(cluster.GetDataNodes()[0]); // Create a file file on RAM_DISK IList <LocatedBlock> blocks = CreateFile(GenericTestUtils.GetMethodName(), BlockLength , true); // Ensure no difference between volumeMap and disk. Scan(1, 0, 0, 0, 0, 0); // Make a copy of the block on DEFAULT storage and ensure that it is // picked up by the scanner. DuplicateBlock(blocks[0].GetBlock().GetBlockId()); Scan(2, 1, 0, 0, 0, 0, 1); // Ensure that the copy on RAM_DISK was deleted. VerifyStorageType(blocks[0].GetBlock().GetBlockId(), false); Scan(1, 0, 0, 0, 0, 0); } finally { if (scanner != null) { scanner.Shutdown(); scanner = null; } cluster.Shutdown(); cluster = null; } }
/// <summary> /// Test that if splitThreshold is zero, then we always get a separate /// call per storage. /// </summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestAlwaysSplit() { StartUpCluster(0); NameNode nn = cluster.GetNameNode(); DataNode dn = cluster.GetDataNodes()[0]; // Create a file with a few blocks. CreateFile(GenericTestUtils.GetMethodName(), BlocksInFile); // Insert a spy object for the NN RPC. DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(dn , nn); // Trigger a block report so there is an interaction with the spy // object. DataNodeTestUtils.TriggerBlockReport(dn); ArgumentCaptor <StorageBlockReport[]> captor = ArgumentCaptor.ForClass <StorageBlockReport []>(); Org.Mockito.Mockito.Verify(nnSpy, Org.Mockito.Mockito.Times(cluster.GetStoragesPerDatanode ())).BlockReport(Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), captor .Capture(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); VerifyCapturedArguments(captor, 1, BlocksInFile); }
public virtual void TestVolFailureStatsPreservedOnNNRestart() { // Bring up two more datanodes that can tolerate 1 failure cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager (); long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm); long dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0); // Fail the first volume on both datanodes (we have to keep the // third healthy so one node in the pipeline will not fail). FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1)); FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1)); DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1); Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)2); AList <DataNode> dns = cluster.GetDataNodes(); // The NN reports two volumes failures DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 2); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath()); // After restarting the NN it still see the two failures cluster.RestartNameNode(0); cluster.WaitActive(); DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 2); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath()); }
/// <summary>Spy on calls from the DN to the NN.</summary> /// <returns>spy object that can be used for Mockito verification.</returns> internal virtual DatanodeProtocolClientSideTranslatorPB SpyOnDnCallsToNn() { return(DataNodeTestUtils.SpyOnBposToNN(singletonDn, singletonNn)); }
/// <exception cref="System.IO.IOException"/> private void WaitForTempReplica(Block bl, int DnN1) { bool tooLongWait = false; int Timeout = 40000; if (Log.IsDebugEnabled()) { Log.Debug("Wait for datanode " + DnN1 + " to appear"); } while (cluster.GetDataNodes().Count <= DnN1) { WaitTil(20); } if (Log.IsDebugEnabled()) { Log.Debug("Total number of DNs " + cluster.GetDataNodes().Count); } cluster.WaitActive(); // Look about specified DN for the replica of the block from 1st DN DataNode dn1 = cluster.GetDataNodes()[DnN1]; string bpid = cluster.GetNamesystem().GetBlockPoolId(); Replica r = DataNodeTestUtils.FetchReplicaInfo(dn1, bpid, bl.GetBlockId()); long start = Time.MonotonicNow(); int count = 0; while (r == null) { WaitTil(5); r = DataNodeTestUtils.FetchReplicaInfo(dn1, bpid, bl.GetBlockId()); long waiting_period = Time.MonotonicNow() - start; if (count++ % 100 == 0) { if (Log.IsDebugEnabled()) { Log.Debug("Has been waiting for " + waiting_period + " ms."); } } if (waiting_period > Timeout) { NUnit.Framework.Assert.IsTrue("Was waiting too long to get ReplicaInfo from a datanode" , tooLongWait); } } HdfsServerConstants.ReplicaState state = r.GetState(); if (Log.IsDebugEnabled()) { Log.Debug("Replica state before the loop " + state.GetValue()); } start = Time.MonotonicNow(); while (state != HdfsServerConstants.ReplicaState.Temporary) { WaitTil(5); state = r.GetState(); if (Log.IsDebugEnabled()) { Log.Debug("Keep waiting for " + bl.GetBlockName() + " is in state " + state.GetValue ()); } if (Time.MonotonicNow() - start > Timeout) { NUnit.Framework.Assert.IsTrue("Was waiting too long for a replica to become TEMPORARY" , tooLongWait); } } if (Log.IsDebugEnabled()) { Log.Debug("Replica state after the loop " + state.GetValue()); } }
/// <summary> /// Test that DataStorage and BlockPoolSliceStorage remove the failed volume /// after failure. /// </summary> /// <exception cref="System.Exception"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.TimeoutException"/> public virtual void TestFailedVolumeBeingRemovedFromDataNode() { Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)2); FilePath dn0Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1)); DataNodeTestUtils.InjectDataDirFailure(dn0Vol1); DataNode dn0 = cluster.GetDataNodes()[0]; long lastDiskErrorCheck = dn0.GetLastDiskErrorCheck(); dn0.CheckDiskErrorAsync(); // Wait checkDiskError thread finish to discover volume failure. while (dn0.GetLastDiskErrorCheck() == lastDiskErrorCheck) { Sharpen.Thread.Sleep(100); } // Verify dn0Vol1 has been completely removed from DN0. // 1. dn0Vol1 is removed from DataStorage. DataStorage storage = dn0.GetStorage(); NUnit.Framework.Assert.AreEqual(1, storage.GetNumStorageDirs()); for (int i = 0; i < storage.GetNumStorageDirs(); i++) { Storage.StorageDirectory sd = storage.GetStorageDir(i); NUnit.Framework.Assert.IsFalse(sd.GetRoot().GetAbsolutePath().StartsWith(dn0Vol1. GetAbsolutePath())); } string bpid = cluster.GetNamesystem().GetBlockPoolId(); BlockPoolSliceStorage bpsStorage = storage.GetBPStorage(bpid); NUnit.Framework.Assert.AreEqual(1, bpsStorage.GetNumStorageDirs()); for (int i_1 = 0; i_1 < bpsStorage.GetNumStorageDirs(); i_1++) { Storage.StorageDirectory sd = bpsStorage.GetStorageDir(i_1); NUnit.Framework.Assert.IsFalse(sd.GetRoot().GetAbsolutePath().StartsWith(dn0Vol1. GetAbsolutePath())); } // 2. dn0Vol1 is removed from FsDataset FsDatasetSpi <FsVolumeSpi> data = dn0.GetFSDataset(); foreach (FsVolumeSpi volume in data.GetVolumes()) { Assert.AssertNotEquals(new FilePath(volume.GetBasePath()).GetAbsoluteFile(), dn0Vol1 .GetAbsoluteFile()); } // 3. all blocks on dn0Vol1 have been removed. foreach (ReplicaInfo replica in FsDatasetTestUtil.GetReplicas(data, bpid)) { NUnit.Framework.Assert.IsNotNull(replica.GetVolume()); Assert.AssertNotEquals(new FilePath(replica.GetVolume().GetBasePath()).GetAbsoluteFile (), dn0Vol1.GetAbsoluteFile()); } // 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore. string[] dataDirStrs = dn0.GetConf().Get(DFSConfigKeys.DfsDatanodeDataDirKey).Split (","); NUnit.Framework.Assert.AreEqual(1, dataDirStrs.Length); NUnit.Framework.Assert.IsFalse(dataDirStrs[0].Contains(dn0Vol1.GetAbsolutePath()) ); }
public virtual void TestSuccessiveVolumeFailures() { // Bring up two more datanodes cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); /* * Calculate the total capacity of all the datanodes. Sleep for * three seconds to be sure the datanodes have had a chance to * heartbeat their capacities. */ Sharpen.Thread.Sleep(WaitForHeartbeats); DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager (); long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm); long dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0); FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1)); FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1)); FilePath dn3Vol1 = new FilePath(dataDir, "data" + (2 * 2 + 1)); FilePath dn3Vol2 = new FilePath(dataDir, "data" + (2 * 2 + 2)); /* * Make the 1st volume directories on the first two datanodes * non-accessible. We don't make all three 1st volume directories * readonly since that would cause the entire pipeline to * fail. The client does not retry failed nodes even though * perhaps they could succeed because just a single volume failed. */ DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1); /* * Create file1 and wait for 3 replicas (ie all DNs can still * store a block). Then assert that all DNs are up, despite the * volume failures. */ Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)3); AList <DataNode> dns = cluster.GetDataNodes(); NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp()); NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp()); NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp()); /* * The metrics should confirm the volume failures. */ CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath()); CheckFailuresAtDataNode(dns[2], 0, true); // Ensure we wait a sufficient amount of time System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath); // Eventually the NN should report two volume failures DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 2); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[2], true); /* * Now fail a volume on the third datanode. We should be able to get * three replicas since we've already identified the other failures. */ DataNodeTestUtils.InjectDataDirFailure(dn3Vol1); Path file2 = new Path("/test2"); DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file2, (short)3); NUnit.Framework.Assert.IsTrue("DN3 should still be up", dns[2].IsDatanodeUp()); CheckFailuresAtDataNode(dns[2], 1, true, dn3Vol1.GetAbsolutePath()); DataNodeTestUtils.TriggerHeartbeat(dns[2]); CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath()); /* * Once the datanodes have a chance to heartbeat their new capacity the * total capacity should be down by three volumes (assuming the host * did not grow or shrink the data volume while the test was running). */ dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0); DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 3, origCapacity - (3 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 3); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath()); /* * Now fail the 2nd volume on the 3rd datanode. All its volumes * are now failed and so it should report two volume failures * and that it's no longer up. Only wait for two replicas since * we'll never get a third. */ DataNodeTestUtils.InjectDataDirFailure(dn3Vol2); Path file3 = new Path("/test3"); DFSTestUtil.CreateFile(fs, file3, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file3, (short)2); // The DN should consider itself dead DFSTestUtil.WaitForDatanodeDeath(dns[2]); // And report two failed volumes CheckFailuresAtDataNode(dns[2], 2, true, dn3Vol1.GetAbsolutePath(), dn3Vol2.GetAbsolutePath ()); // The NN considers the DN dead DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 2, origCapacity - (4 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 2); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath()); /* * The datanode never tries to restore the failed volume, even if * it's subsequently repaired, but it should see this volume on * restart, so file creation should be able to succeed after * restoring the data directories and restarting the datanodes. */ DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1, dn3Vol1, dn3Vol2); cluster.RestartDataNodes(); cluster.WaitActive(); Path file4 = new Path("/test4"); DFSTestUtil.CreateFile(fs, file4, 1024, (short)3, 1L); DFSTestUtil.WaitReplication(fs, file4, (short)3); /* * Eventually the capacity should be restored to its original value, * and that the volume failure count should be reported as zero by * both the metrics and the NN. */ DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats); CheckAggregateFailuresAtNameNode(true, 0); dns = cluster.GetDataNodes(); CheckFailuresAtNameNode(dm, dns[0], true); CheckFailuresAtNameNode(dm, dns[1], true); CheckFailuresAtNameNode(dm, dns[2], true); }
public virtual void TestDataNodeReconfigureWithVolumeFailures() { // Bring up two more datanodes cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager (); long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm); long dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0); // Fail the first volume on both datanodes (we have to keep the // third healthy so one node in the pipeline will not fail). FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1)); FilePath dn1Vol2 = new FilePath(dataDir, "data" + (2 * 0 + 2)); FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1)); FilePath dn2Vol2 = new FilePath(dataDir, "data" + (2 * 1 + 2)); DataNodeTestUtils.InjectDataDirFailure(dn1Vol1); DataNodeTestUtils.InjectDataDirFailure(dn2Vol1); Path file1 = new Path("/test1"); DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L); DFSTestUtil.WaitReplication(fs, file1, (short)2); AList <DataNode> dns = cluster.GetDataNodes(); NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp()); NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp()); NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp()); CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath()); CheckFailuresAtDataNode(dns[2], 0, true); // Ensure we wait a sufficient amount of time System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath); // The NN reports two volume failures DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(true, 2); CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath()); // Reconfigure again to try to add back the failed volumes. ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2); ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2); DataNodeTestUtils.TriggerHeartbeat(dns[0]); DataNodeTestUtils.TriggerHeartbeat(dns[1]); CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath()); CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath()); // Ensure we wait a sufficient amount of time. System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath); // The NN reports two volume failures again. DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(false, 2); CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath()); // Reconfigure a third time with the failed volumes. Afterwards, we expect // the same volume failures to be reported. (No double-counting.) ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2); ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2); DataNodeTestUtils.TriggerHeartbeat(dns[0]); DataNodeTestUtils.TriggerHeartbeat(dns[1]); CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath()); CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath()); // Ensure we wait a sufficient amount of time. System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath); // The NN reports two volume failures again. DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats ); CheckAggregateFailuresAtNameNode(false, 2); CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath()); CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath()); // Replace failed volume with healthy volume and run reconfigure DataNode. // The failed volume information should be cleared. DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1); ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2); ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2); DataNodeTestUtils.TriggerHeartbeat(dns[0]); DataNodeTestUtils.TriggerHeartbeat(dns[1]); CheckFailuresAtDataNode(dns[0], 1, true); CheckFailuresAtDataNode(dns[1], 1, true); DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats); CheckAggregateFailuresAtNameNode(true, 0); CheckFailuresAtNameNode(dm, dns[0], true); CheckFailuresAtNameNode(dm, dns[1], true); }
/// <summary>Force the DataNode to report missing blocks immediately.</summary> /// <exception cref="System.IO.IOException"/> private static void TriggerDeleteReport(DataNode datanode) { datanode.ScheduleAllBlockReport(0); DataNodeTestUtils.TriggerDeletionReport(datanode); }
/// <summary>Test write a file, verifies and closes it.</summary> /// <remarks> /// Test write a file, verifies and closes it. Then a couple of random blocks /// is removed and BlockReport is forced; the FSNamesystem is pushed to /// recalculate required DN's activities such as replications and so on. /// The number of missing and under-replicated blocks should be the same in /// case of a single-DN cluster. /// </remarks> /// <exception cref="System.IO.IOException">in case of errors</exception> public virtual void BlockReport_02() { string MethodName = GenericTestUtils.GetMethodName(); Log.Info("Running test " + MethodName); Path filePath = new Path("/" + MethodName + ".dat"); DFSTestUtil.CreateFile(fs, filePath, FileSize, ReplFactor, rand.NextLong()); // mock around with newly created blocks and delete some FilePath dataDir = new FilePath(cluster.GetDataDirectory()); NUnit.Framework.Assert.IsTrue(dataDir.IsDirectory()); IList <ExtendedBlock> blocks2Remove = new AList <ExtendedBlock>(); IList <int> removedIndex = new AList <int>(); IList <LocatedBlock> lBlocks = cluster.GetNameNodeRpc().GetBlockLocations(filePath .ToString(), FileStart, FileSize).GetLocatedBlocks(); while (removedIndex.Count != 2) { int newRemoveIndex = rand.Next(lBlocks.Count); if (!removedIndex.Contains(newRemoveIndex)) { removedIndex.AddItem(newRemoveIndex); } } foreach (int aRemovedIndex in removedIndex) { blocks2Remove.AddItem(lBlocks[aRemovedIndex].GetBlock()); } if (Log.IsDebugEnabled()) { Log.Debug("Number of blocks allocated " + lBlocks.Count); } DataNode dn0 = cluster.GetDataNodes()[DnN0]; foreach (ExtendedBlock b in blocks2Remove) { if (Log.IsDebugEnabled()) { Log.Debug("Removing the block " + b.GetBlockName()); } foreach (FilePath f in FindAllFiles(dataDir, new BlockReportTestBase.MyFileFilter (this, b.GetBlockName(), true))) { DataNodeTestUtils.GetFSDataset(dn0).UnfinalizeBlock(b); if (!f.Delete()) { Log.Warn("Couldn't delete " + b.GetBlockName()); } else { Log.Debug("Deleted file " + f.ToString()); } } } WaitTil(TimeUnit.Seconds.ToMillis(DnRescanExtraWait)); // all blocks belong to the same file, hence same BP string poolId = cluster.GetNamesystem().GetBlockPoolId(); DatanodeRegistration dnR = dn0.GetDNRegistrationForBP(poolId); StorageBlockReport[] reports = GetBlockReports(dn0, poolId, false, false); SendBlockReports(dnR, poolId, reports); BlockManagerTestUtil.GetComputedDatanodeWork(cluster.GetNamesystem().GetBlockManager ()); PrintStats(); NUnit.Framework.Assert.AreEqual("Wrong number of MissingBlocks is found", blocks2Remove .Count, cluster.GetNamesystem().GetMissingBlocksCount()); NUnit.Framework.Assert.AreEqual("Wrong number of UnderReplicatedBlocks is found", blocks2Remove.Count, cluster.GetNamesystem().GetUnderReplicatedBlocks()); }
/// <exception cref="System.Exception"/> private void TestTriggerBlockReport(bool incremental) { Configuration conf = new HdfsConfiguration(); // Set a really long value for dfs.blockreport.intervalMsec and // dfs.heartbeat.interval, so that incremental block reports and heartbeats // won't be sent during this test unless they're triggered // manually. conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 10800000L); conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1080L); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(cluster .GetDataNodes()[0], cluster.GetNameNode()); DFSTestUtil.CreateFile(fs, new Path("/abc"), 16, (short)1, 1L); // We should get 1 incremental block report. Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(1)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); // We should not receive any more incremental or incremental block reports, // since the interval we configured is so long. for (int i = 0; i < 3; i++) { Sharpen.Thread.Sleep(10); Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Times(0)).BlockReport(Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport[] >(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Times(1)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); } // Create a fake block deletion notification on the DataNode. // This will be sent with the next incremental block report. ReceivedDeletedBlockInfo rdbi = new ReceivedDeletedBlockInfo(new Block(5678, 512, 1000), ReceivedDeletedBlockInfo.BlockStatus.DeletedBlock, null); DataNode datanode = cluster.GetDataNodes()[0]; BPServiceActor actor = datanode.GetAllBpOs()[0].GetBPServiceActors()[0]; string storageUuid = datanode.GetFSDataset().GetVolumes()[0].GetStorageID(); actor.NotifyNamenodeDeletedBlock(rdbi, storageUuid); // Manually trigger a block report. datanode.TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental(incremental ).Build()); // triggerBlockReport returns before the block report is // actually sent. Wait for it to be sent here. if (incremental) { Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(2)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); } else { Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000)).BlockReport(Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport[] >(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); } cluster.Shutdown(); }
public virtual void TestDeletedBlockWhenAddBlockIsInEdit() { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(1).Build(); DFSClient client = null; try { cluster.WaitActive(); NUnit.Framework.Assert.AreEqual("Number of namenodes is not 2", 2, cluster.GetNumNameNodes ()); // Transitioning the namenode 0 to active. cluster.TransitionToActive(0); NUnit.Framework.Assert.IsTrue("Namenode 0 should be in active state", cluster.GetNameNode (0).IsActiveState()); NUnit.Framework.Assert.IsTrue("Namenode 1 should be in standby state", cluster.GetNameNode (1).IsStandbyState()); // Trigger heartbeat to mark DatanodeStorageInfo#heartbeatedSinceFailover // to true. DataNodeTestUtils.TriggerHeartbeat(cluster.GetDataNodes()[0]); FileSystem fs = cluster.GetFileSystem(0); // Trigger blockReport to mark DatanodeStorageInfo#blockContentsStale // to false. cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental (false).Build()); Path fileName = new Path("/tmp.txt"); // create a file with one block DFSTestUtil.CreateFile(fs, fileName, 10L, (short)1, 1234L); DFSTestUtil.WaitReplication(fs, fileName, (short)1); client = new DFSClient(cluster.GetFileSystem(0).GetUri(), conf); IList <LocatedBlock> locatedBlocks = client.GetNamenode().GetBlockLocations("/tmp.txt" , 0, 10L).GetLocatedBlocks(); NUnit.Framework.Assert.IsTrue(locatedBlocks.Count == 1); NUnit.Framework.Assert.IsTrue(locatedBlocks[0].GetLocations().Length == 1); // add a second datanode to the cluster cluster.StartDataNodes(conf, 1, true, null, null, null, null); NUnit.Framework.Assert.AreEqual("Number of datanodes should be 2", 2, cluster.GetDataNodes ().Count); DataNode dn0 = cluster.GetDataNodes()[0]; DataNode dn1 = cluster.GetDataNodes()[1]; string activeNNBPId = cluster.GetNamesystem(0).GetBlockPoolId(); DatanodeDescriptor sourceDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem (0), dn0.GetDNRegistrationForBP(activeNNBPId)); DatanodeDescriptor destDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem (0), dn1.GetDNRegistrationForBP(activeNNBPId)); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); Log.Info("replaceBlock: " + ReplaceBlock(block, (DatanodeInfo)sourceDnDesc, (DatanodeInfo )sourceDnDesc, (DatanodeInfo)destDnDesc)); // Waiting for the FsDatasetAsyncDsikService to delete the block Sharpen.Thread.Sleep(3000); // Triggering the incremental block report to report the deleted block to // namnemode cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental (true).Build()); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue("Namenode 1 should be in active state", cluster.GetNameNode (1).IsActiveState()); NUnit.Framework.Assert.IsTrue("Namenode 0 should be in standby state", cluster.GetNameNode (0).IsStandbyState()); client.Close(); // Opening a new client for new active namenode client = new DFSClient(cluster.GetFileSystem(1).GetUri(), conf); IList <LocatedBlock> locatedBlocks1 = client.GetNamenode().GetBlockLocations("/tmp.txt" , 0, 10L).GetLocatedBlocks(); NUnit.Framework.Assert.AreEqual(1, locatedBlocks1.Count); NUnit.Framework.Assert.AreEqual("The block should be only on 1 datanode ", 1, locatedBlocks1 [0].GetLocations().Length); } finally { IOUtils.Cleanup(null, client); cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public virtual void RunTest(int parallelism) { cluster = new MiniDFSCluster.Builder(Conf).Build(); try { cluster.WaitActive(); bpid = cluster.GetNamesystem().GetBlockPoolId(); fds = DataNodeTestUtils.GetFSDataset(cluster.GetDataNodes()[0]); client = cluster.GetFileSystem().GetClient(); Conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanThreadsKey, parallelism); DataNode dataNode = cluster.GetDataNodes()[0]; scanner = new DirectoryScanner(dataNode, fds, Conf); scanner.SetRetainDiffs(true); // Add files with 100 blocks CreateFile(GenericTestUtils.GetMethodName(), BlockLength * 100, false); long totalBlocks = 100; // Test1: No difference between volumeMap and disk Scan(100, 0, 0, 0, 0, 0); // Test2: block metafile is missing long blockId = DeleteMetaFile(); Scan(totalBlocks, 1, 1, 0, 0, 1); VerifyGenStamp(blockId, GenerationStamp.GrandfatherGenerationStamp); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test3: block file is missing blockId = DeleteBlockFile(); Scan(totalBlocks, 1, 0, 1, 0, 0); totalBlocks--; VerifyDeletion(blockId); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test4: A block file exists for which there is no metafile and // a block in memory blockId = CreateBlockFile(); totalBlocks++; Scan(totalBlocks, 1, 1, 0, 1, 0); VerifyAddition(blockId, GenerationStamp.GrandfatherGenerationStamp, 0); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test5: A metafile exists for which there is no block file and // a block in memory blockId = CreateMetaFile(); Scan(totalBlocks + 1, 1, 0, 1, 1, 0); FilePath metafile = new FilePath(GetMetaFile(blockId)); NUnit.Framework.Assert.IsTrue(!metafile.Exists()); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test6: A block file and metafile exists for which there is no block in // memory blockId = CreateBlockMetaFile(); totalBlocks++; Scan(totalBlocks, 1, 0, 0, 1, 0); VerifyAddition(blockId, DefaultGenStamp, 0); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test7: Delete bunch of metafiles for (int i = 0; i < 10; i++) { blockId = DeleteMetaFile(); } Scan(totalBlocks, 10, 10, 0, 0, 10); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test8: Delete bunch of block files for (int i_1 = 0; i_1 < 10; i_1++) { blockId = DeleteBlockFile(); } Scan(totalBlocks, 10, 0, 10, 0, 0); totalBlocks -= 10; Scan(totalBlocks, 0, 0, 0, 0, 0); // Test9: create a bunch of blocks files for (int i_2 = 0; i_2 < 10; i_2++) { blockId = CreateBlockFile(); } totalBlocks += 10; Scan(totalBlocks, 10, 10, 0, 10, 0); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test10: create a bunch of metafiles for (int i_3 = 0; i_3 < 10; i_3++) { blockId = CreateMetaFile(); } Scan(totalBlocks + 10, 10, 0, 10, 10, 0); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test11: create a bunch block files and meta files for (int i_4 = 0; i_4 < 10; i_4++) { blockId = CreateBlockMetaFile(); } totalBlocks += 10; Scan(totalBlocks, 10, 0, 0, 10, 0); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test12: truncate block files to test block length mismatch for (int i_5 = 0; i_5 < 10; i_5++) { TruncateBlockFile(); } Scan(totalBlocks, 10, 0, 0, 0, 10); Scan(totalBlocks, 0, 0, 0, 0, 0); // Test13: all the conditions combined CreateMetaFile(); CreateBlockFile(); CreateBlockMetaFile(); DeleteMetaFile(); DeleteBlockFile(); TruncateBlockFile(); Scan(totalBlocks + 3, 6, 2, 2, 3, 2); Scan(totalBlocks + 1, 0, 0, 0, 0, 0); // Test14: validate clean shutdown of DirectoryScanner ////assertTrue(scanner.getRunStatus()); //assumes "real" FSDataset, not sim scanner.Shutdown(); NUnit.Framework.Assert.IsFalse(scanner.GetRunStatus()); } finally { if (scanner != null) { scanner.Shutdown(); scanner = null; } cluster.Shutdown(); } }