public virtual void TestReplDueToNodeFailRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 3; Path filePath = new Path("/testFile"); // Last datanode is on a different rack string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack2", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); DatanodeManager dm = ns.GetBlockManager().GetDatanodeManager(); try { // Create a file with one block with a replication factor of 2 FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Make the last datanode look like it failed to heartbeat by // calling removeDatanode and stopping it. AList <DataNode> datanodes = cluster.GetDataNodes(); int idx = datanodes.Count - 1; DataNode dataNode = datanodes[idx]; DatanodeID dnId = dataNode.GetDatanodeId(); cluster.StopDataNode(idx); dm.RemoveDatanode(dnId); // The block should still have sufficient # replicas, across racks. // The last node may not have contained a replica, but if it did // it should have been replicated within the same rack. DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Fail the last datanode again, it's also on rack2 so there is // only 1 rack for all the replicas datanodes = cluster.GetDataNodes(); idx = datanodes.Count - 1; dataNode = datanodes[idx]; dnId = dataNode.GetDatanodeId(); cluster.StopDataNode(idx); dm.RemoveDatanode(dnId); // Make sure we have enough live replicas even though we are // short one rack and therefore need one replica DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 1); } finally { cluster.Shutdown(); } }
public virtual void TestReduceReplFactorDueToRejoinRespectsRackPolicy() { Configuration conf = GetConf(); short ReplicationFactor = 2; Path filePath = new Path("/testFile"); // Last datanode is on a different rack string[] racks = new string[] { "/rack1", "/rack1", "/rack2" }; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length ).Racks(racks).Build(); FSNamesystem ns = cluster.GetNameNode().GetNamesystem(); DatanodeManager dm = ns.GetBlockManager().GetDatanodeManager(); try { // Create a file with one block FileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L); ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath); DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); // Make the last (cross rack) datanode look like it failed // to heartbeat by stopping it and calling removeDatanode. AList <DataNode> datanodes = cluster.GetDataNodes(); NUnit.Framework.Assert.AreEqual(3, datanodes.Count); DataNode dataNode = datanodes[2]; DatanodeID dnId = dataNode.GetDatanodeId(); cluster.StopDataNode(2); dm.RemoveDatanode(dnId); // The block gets re-replicated to another datanode so it has a // sufficient # replicas, but not across racks, so there should // be 1 rack, and 1 needed replica (even though there are 2 hosts // available and only 2 replicas required). DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 1); // Start the "failed" datanode, which has a replica so the block is // now over-replicated and therefore a replica should be removed but // not on the restarted datanode as that would violate the rack policy. string[] rack2 = new string[] { "/rack2" }; cluster.StartDataNodes(conf, 1, true, null, rack2); cluster.WaitActive(); // The block now has sufficient # replicas, across racks DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0); } finally { cluster.Shutdown(); } }
public virtual void TestNumVersionsReportedCorrect() { //Create the DatanodeManager which will be tested FSNamesystem fsn = Org.Mockito.Mockito.Mock <FSNamesystem>(); Org.Mockito.Mockito.When(fsn.HasWriteLock()).ThenReturn(true); DatanodeManager dm = new DatanodeManager(Org.Mockito.Mockito.Mock <BlockManager>() , fsn, new Configuration()); //Seed the RNG with a known value so test failures are easier to reproduce Random rng = new Random(); int seed = rng.Next(); rng = new Random(seed); Log.Info("Using seed " + seed + " for testing"); //A map of the Storage IDs to the DN registration it was registered with Dictionary <string, DatanodeRegistration> sIdToDnReg = new Dictionary <string, DatanodeRegistration >(); for (int i = 0; i < NumIterations; ++i) { //If true, remove a node for every 3rd time (if there's one) if (rng.NextBoolean() && i % 3 == 0 && sIdToDnReg.Count != 0) { //Pick a random node. int randomIndex = rng.Next() % sIdToDnReg.Count; //Iterate to that random position IEnumerator <KeyValuePair <string, DatanodeRegistration> > it = sIdToDnReg.GetEnumerator (); for (int j = 0; j < randomIndex - 1; ++j) { it.Next(); } DatanodeRegistration toRemove = it.Next().Value; Log.Info("Removing node " + toRemove.GetDatanodeUuid() + " ip " + toRemove.GetXferAddr () + " version : " + toRemove.GetSoftwareVersion()); //Remove that random node dm.RemoveDatanode(toRemove); it.Remove(); } else { // Otherwise register a node. This node may be a new / an old one //Pick a random storageID to register. string storageID = "someStorageID" + rng.Next(5000); DatanodeRegistration dr = Org.Mockito.Mockito.Mock <DatanodeRegistration>(); Org.Mockito.Mockito.When(dr.GetDatanodeUuid()).ThenReturn(storageID); //If this storageID had already been registered before if (sIdToDnReg.Contains(storageID)) { dr = sIdToDnReg[storageID]; //Half of the times, change the IP address if (rng.NextBoolean()) { dr.SetIpAddr(dr.GetIpAddr() + "newIP"); } } else { //This storageID has never been registered //Ensure IP address is unique to storageID string ip = "someIP" + storageID; Org.Mockito.Mockito.When(dr.GetIpAddr()).ThenReturn(ip); Org.Mockito.Mockito.When(dr.GetXferAddr()).ThenReturn(ip + ":9000"); Org.Mockito.Mockito.When(dr.GetXferPort()).ThenReturn(9000); } //Pick a random version to register with Org.Mockito.Mockito.When(dr.GetSoftwareVersion()).ThenReturn("version" + rng.Next (5)); Log.Info("Registering node storageID: " + dr.GetDatanodeUuid() + ", version: " + dr.GetSoftwareVersion() + ", IP address: " + dr.GetXferAddr()); //Register this random node dm.RegisterDatanode(dr); sIdToDnReg[storageID] = dr; } //Verify DatanodeManager still has the right count IDictionary <string, int> mapToCheck = dm.GetDatanodesSoftwareVersions(); //Remove counts from versions and make sure that after removing all nodes //mapToCheck is empty foreach (KeyValuePair <string, DatanodeRegistration> it_1 in sIdToDnReg) { string ver = it_1.Value.GetSoftwareVersion(); if (!mapToCheck.Contains(ver)) { throw new Exception("The correct number of datanodes of a " + "version was not found on iteration " + i); } mapToCheck[ver] = mapToCheck[ver] - 1; if (mapToCheck[ver] == 0) { Sharpen.Collections.Remove(mapToCheck, ver); } } foreach (KeyValuePair <string, int> entry in mapToCheck) { Log.Info("Still in map: " + entry.Key + " has " + entry.Value); } NUnit.Framework.Assert.AreEqual("The map of version counts returned by DatanodeManager was" + " not what it was expected to be on iteration " + i, 0, mapToCheck.Count); } }