/// <summary> /// Regression test for HDFS-4799, a case where, upon restart, if there /// were RWR replicas with out-of-date genstamps, the NN could accidentally /// delete good replicas instead of the bad replicas. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestRWRInvalidation() { Configuration conf = new HdfsConfiguration(); // Set the deletion policy to be randomized rather than the default. // The default is based on disk space, which isn't controllable // in the context of the test, whereas a random one is more accurate // to what is seen in real clusters (nodes have random amounts of free // space) conf.SetClass(DFSConfigKeys.DfsBlockReplicatorClassnameKey, typeof(TestDNFencing.RandomDeleterPolicy ), typeof(BlockPlacementPolicy)); // Speed up the test a bit with faster heartbeats. conf.SetInt(DFSConfigKeys.DfsHeartbeatIntervalKey, 1); // Test with a bunch of separate files, since otherwise the test may // fail just due to "good luck", even if a bug is present. IList <Path> testPaths = Lists.NewArrayList(); for (int i = 0; i < 10; i++) { testPaths.AddItem(new Path("/test" + i)); } MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build(); try { IList <FSDataOutputStream> streams = Lists.NewArrayList(); try { // Open the test files and write some data to each foreach (Path path in testPaths) { FSDataOutputStream @out = cluster.GetFileSystem().Create(path, (short)2); streams.AddItem(@out); @out.WriteBytes("old gs data\n"); @out.Hflush(); } // Shutdown one of the nodes in the pipeline MiniDFSCluster.DataNodeProperties oldGenstampNode = cluster.StopDataNode(0); // Write some more data and flush again. This data will only // be in the latter genstamp copy of the blocks. for (int i_1 = 0; i_1 < streams.Count; i_1++) { Path path_1 = testPaths[i_1]; FSDataOutputStream @out = streams[i_1]; @out.WriteBytes("new gs data\n"); @out.Hflush(); // Set replication so that only one node is necessary for this block, // and close it. cluster.GetFileSystem().SetReplication(path_1, (short)1); @out.Close(); } // Upon restart, there will be two replicas, one with an old genstamp // and one current copy. This test wants to ensure that the old genstamp // copy is the one that is deleted. Log.Info("=========================== restarting cluster"); MiniDFSCluster.DataNodeProperties otherNode = cluster.StopDataNode(0); cluster.RestartNameNode(); // Restart the datanode with the corrupt replica first. cluster.RestartDataNode(oldGenstampNode); cluster.WaitActive(); // Then the other node cluster.RestartDataNode(otherNode); cluster.WaitActive(); // Compute and send invalidations, waiting until they're fully processed. cluster.GetNameNode().GetNamesystem().GetBlockManager().ComputeInvalidateWork(2); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); // Make sure we can still read the blocks. foreach (Path path_2 in testPaths) { string ret = DFSTestUtil.ReadFile(cluster.GetFileSystem(), path_2); NUnit.Framework.Assert.AreEqual("old gs data\n" + "new gs data\n", ret); } } finally { IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0])); } } finally { cluster.Shutdown(); } }