Пример #1
0
        /// <summary>
        /// Verify that
        /// <see cref="DataNode#checkDiskErrors()"/>
        /// removes all metadata in
        /// DataNode upon a volume failure. Thus we can run reconfig on the same
        /// configuration to reload the new volume on the same directory as the failed one.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.TimeoutException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="Org.Apache.Hadoop.Conf.ReconfigurationException"/>
        public virtual void TestDirectlyReloadAfterCheckDiskError()
        {
            StartDFSCluster(1, 2);
            CreateFile(new Path("/test"), 32, (short)2);
            DataNode     dn           = cluster.GetDataNodes()[0];
            string       oldDataDir   = dn.GetConf().Get(DFSConfigKeys.DfsDatanodeDataDirKey);
            FilePath     dirToFail    = new FilePath(cluster.GetDataDirectory(), "data1");
            FsVolumeImpl failedVolume = GetVolume(dn, dirToFail);

            NUnit.Framework.Assert.IsTrue("No FsVolume was found for " + dirToFail, failedVolume
                                          != null);
            long used = failedVolume.GetDfsUsed();

            DataNodeTestUtils.InjectDataDirFailure(dirToFail);
            // Call and wait DataNode to detect disk failure.
            long lastDiskErrorCheck = dn.GetLastDiskErrorCheck();

            dn.CheckDiskErrorAsync();
            while (dn.GetLastDiskErrorCheck() == lastDiskErrorCheck)
            {
                Sharpen.Thread.Sleep(100);
            }
            CreateFile(new Path("/test1"), 32, (short)2);
            NUnit.Framework.Assert.AreEqual(used, failedVolume.GetDfsUsed());
            DataNodeTestUtils.RestoreDataDirFromFailure(dirToFail);
            dn.ReconfigurePropertyImpl(DFSConfigKeys.DfsDatanodeDataDirKey, oldDataDir);
            CreateFile(new Path("/test2"), 32, (short)2);
            FsVolumeImpl restoredVolume = GetVolume(dn, dirToFail);

            NUnit.Framework.Assert.IsTrue(restoredVolume != null);
            NUnit.Framework.Assert.IsTrue(restoredVolume != failedVolume);
            // More data has been written to this volume.
            NUnit.Framework.Assert.IsTrue(restoredVolume.GetDfsUsed() > used);
        }
Пример #2
0
        public virtual void TestUnderReplicationAfterVolFailure()
        {
            // This test relies on denying access to data volumes to simulate data volume
            // failure.  This doesn't work on Windows, because an owner of an object
            // always has the ability to read and change permissions on the object.
            Assume.AssumeTrue(!Path.Windows);
            // Bring up one more datanode
            cluster.StartDataNodes(conf, 1, true, null, null);
            cluster.WaitActive();
            BlockManager bm    = cluster.GetNamesystem().GetBlockManager();
            Path         file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            // Fail the first volume on both datanodes
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)3);
            // underReplicatedBlocks are due to failed volumes
            int underReplicatedBlocks = BlockManagerTestUtil.CheckHeartbeatAndGetUnderReplicatedBlocksCount
                                            (cluster.GetNamesystem(), bm);

            NUnit.Framework.Assert.IsTrue("There is no under replicated block after volume failure"
                                          , underReplicatedBlocks > 0);
        }
        public virtual void TestConfigureMinValidVolumes()
        {
            Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows"));
            // Bring up two additional datanodes that need both of their volumes
            // functioning in order to stay up.
            conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 0);
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail a volume on the 2nd DN
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn2Vol1);
            // Should only get two replicas (the first DN and the 3rd)
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            // Check that this single failure caused a DN to die.
            DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 0, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            // If we restore the volume we should still only be able to get
            // two replicas since the DN is still considered dead.
            DataNodeTestUtils.RestoreDataDirFromFailure(dn2Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)2);
        }
Пример #4
0
        public virtual void TestMultipleVolFailuresOnNode()
        {
            // Reinitialize the cluster, configured with 4 storage locations per DataNode
            // and tolerating up to 2 failures.
            TearDown();
            InitCluster(3, 4, 2);
            // Calculate the total capacity of all the datanodes. Sleep for three seconds
            // to be sure the datanodes have had a chance to heartbeat their capacities.
            Sharpen.Thread.Sleep(WaitForHeartbeats);
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long     dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            FilePath dn1Vol1      = new FilePath(dataDir, "data" + (4 * 0 + 1));
            FilePath dn1Vol2      = new FilePath(dataDir, "data" + (4 * 0 + 2));
            FilePath dn2Vol1      = new FilePath(dataDir, "data" + (4 * 1 + 1));
            FilePath dn2Vol2      = new FilePath(dataDir, "data" + (4 * 1 + 2));

            // Make the first two volume directories on the first two datanodes
            // non-accessible.
            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn1Vol2, dn2Vol1, dn2Vol2);
            // Create file1 and wait for 3 replicas (ie all DNs can still store a block).
            // Then assert that all DNs are up, despite the volume failures.
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // Eventually the NN should report four volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 4, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 4);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtNameNode(dm, dns[2], true);
        }
Пример #5
0
        public virtual void TestVolFailureStatsPreservedOnNNRestart()
        {
            // Bring up two more datanodes that can tolerate 1 failure
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail the first volume on both datanodes (we have to keep the
            // third healthy so one node in the pipeline will not fail).
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            AList <DataNode> dns = cluster.GetDataNodes();

            // The NN reports two volumes failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            // After restarting the NN it still see the two failures
            cluster.RestartNameNode(0);
            cluster.WaitActive();
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
        }
Пример #6
0
        public virtual void TestSuccessiveVolumeFailures()
        {
            // Bring up two more datanodes
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();

            /*
             * Calculate the total capacity of all the datanodes. Sleep for
             * three seconds to be sure the datanodes have had a chance to
             * heartbeat their capacities.
             */
            Sharpen.Thread.Sleep(WaitForHeartbeats);
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long     dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            FilePath dn1Vol1      = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1      = new FilePath(dataDir, "data" + (2 * 1 + 1));
            FilePath dn3Vol1      = new FilePath(dataDir, "data" + (2 * 2 + 1));
            FilePath dn3Vol2      = new FilePath(dataDir, "data" + (2 * 2 + 2));

            /*
             * Make the 1st volume directories on the first two datanodes
             * non-accessible.  We don't make all three 1st volume directories
             * readonly since that would cause the entire pipeline to
             * fail. The client does not retry failed nodes even though
             * perhaps they could succeed because just a single volume failed.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);

            /*
             * Create file1 and wait for 3 replicas (ie all DNs can still
             * store a block).  Then assert that all DNs are up, despite the
             * volume failures.
             */
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());

            /*
             * The metrics should confirm the volume failures.
             */
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // Eventually the NN should report two volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[2], true);

            /*
             * Now fail a volume on the third datanode. We should be able to get
             * three replicas since we've already identified the other failures.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn3Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)3);
            NUnit.Framework.Assert.IsTrue("DN3 should still be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[2], 1, true, dn3Vol1.GetAbsolutePath());
            DataNodeTestUtils.TriggerHeartbeat(dns[2]);
            CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath());

            /*
             * Once the datanodes have a chance to heartbeat their new capacity the
             * total capacity should be down by three volumes (assuming the host
             * did not grow or shrink the data volume while the test was running).
             */
            dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 3, origCapacity - (3 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 3);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath());

            /*
             * Now fail the 2nd volume on the 3rd datanode. All its volumes
             * are now failed and so it should report two volume failures
             * and that it's no longer up. Only wait for two replicas since
             * we'll never get a third.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn3Vol2);
            Path file3 = new Path("/test3");

            DFSTestUtil.CreateFile(fs, file3, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file3, (short)2);
            // The DN should consider itself dead
            DFSTestUtil.WaitForDatanodeDeath(dns[2]);
            // And report two failed volumes
            CheckFailuresAtDataNode(dns[2], 2, true, dn3Vol1.GetAbsolutePath(), dn3Vol2.GetAbsolutePath
                                        ());
            // The NN considers the DN dead
            DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 2, origCapacity - (4 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());

            /*
             * The datanode never tries to restore the failed volume, even if
             * it's subsequently repaired, but it should see this volume on
             * restart, so file creation should be able to succeed after
             * restoring the data directories and restarting the datanodes.
             */
            DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1, dn3Vol1, dn3Vol2);
            cluster.RestartDataNodes();
            cluster.WaitActive();
            Path file4 = new Path("/test4");

            DFSTestUtil.CreateFile(fs, file4, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file4, (short)3);

            /*
             * Eventually the capacity should be restored to its original value,
             * and that the volume failure count should be reported as zero by
             * both the metrics and the NN.
             */
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats);
            CheckAggregateFailuresAtNameNode(true, 0);
            dns = cluster.GetDataNodes();
            CheckFailuresAtNameNode(dm, dns[0], true);
            CheckFailuresAtNameNode(dm, dns[1], true);
            CheckFailuresAtNameNode(dm, dns[2], true);
        }
Пример #7
0
        public virtual void TestDataNodeReconfigureWithVolumeFailures()
        {
            // Bring up two more datanodes
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail the first volume on both datanodes (we have to keep the
            // third healthy so one node in the pipeline will not fail).
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn1Vol2 = new FilePath(dataDir, "data" + (2 * 0 + 2));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));
            FilePath dn2Vol2 = new FilePath(dataDir, "data" + (2 * 1 + 2));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1);
            DataNodeTestUtils.InjectDataDirFailure(dn2Vol1);
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            // Reconfigure again to try to add back the failed volumes.
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath());
            // Ensure we wait a sufficient amount of time.
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures again.
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(false, 2);
            CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath());
            // Reconfigure a third time with the failed volumes.  Afterwards, we expect
            // the same volume failures to be reported.  (No double-counting.)
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath());
            // Ensure we wait a sufficient amount of time.
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures again.
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(false, 2);
            CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath());
            // Replace failed volume with healthy volume and run reconfigure DataNode.
            // The failed volume information should be cleared.
            DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1);
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, true);
            CheckFailuresAtDataNode(dns[1], 1, true);
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats);
            CheckAggregateFailuresAtNameNode(true, 0);
            CheckFailuresAtNameNode(dm, dns[0], true);
            CheckFailuresAtNameNode(dm, dns[1], true);
        }
Пример #8
0
        /// <summary>
        /// Test that DataStorage and BlockPoolSliceStorage remove the failed volume
        /// after failure.
        /// </summary>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.TimeoutException"/>
        public virtual void TestFailedVolumeBeingRemovedFromDataNode()
        {
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            FilePath dn0Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn0Vol1);
            DataNode dn0 = cluster.GetDataNodes()[0];
            long     lastDiskErrorCheck = dn0.GetLastDiskErrorCheck();

            dn0.CheckDiskErrorAsync();
            // Wait checkDiskError thread finish to discover volume failure.
            while (dn0.GetLastDiskErrorCheck() == lastDiskErrorCheck)
            {
                Sharpen.Thread.Sleep(100);
            }
            // Verify dn0Vol1 has been completely removed from DN0.
            // 1. dn0Vol1 is removed from DataStorage.
            DataStorage storage = dn0.GetStorage();

            NUnit.Framework.Assert.AreEqual(1, storage.GetNumStorageDirs());
            for (int i = 0; i < storage.GetNumStorageDirs(); i++)
            {
                Storage.StorageDirectory sd = storage.GetStorageDir(i);
                NUnit.Framework.Assert.IsFalse(sd.GetRoot().GetAbsolutePath().StartsWith(dn0Vol1.
                                                                                         GetAbsolutePath()));
            }
            string bpid = cluster.GetNamesystem().GetBlockPoolId();
            BlockPoolSliceStorage bpsStorage = storage.GetBPStorage(bpid);

            NUnit.Framework.Assert.AreEqual(1, bpsStorage.GetNumStorageDirs());
            for (int i_1 = 0; i_1 < bpsStorage.GetNumStorageDirs(); i_1++)
            {
                Storage.StorageDirectory sd = bpsStorage.GetStorageDir(i_1);
                NUnit.Framework.Assert.IsFalse(sd.GetRoot().GetAbsolutePath().StartsWith(dn0Vol1.
                                                                                         GetAbsolutePath()));
            }
            // 2. dn0Vol1 is removed from FsDataset
            FsDatasetSpi <FsVolumeSpi> data = dn0.GetFSDataset();

            foreach (FsVolumeSpi volume in data.GetVolumes())
            {
                Assert.AssertNotEquals(new FilePath(volume.GetBasePath()).GetAbsoluteFile(), dn0Vol1
                                       .GetAbsoluteFile());
            }
            // 3. all blocks on dn0Vol1 have been removed.
            foreach (ReplicaInfo replica in FsDatasetTestUtil.GetReplicas(data, bpid))
            {
                NUnit.Framework.Assert.IsNotNull(replica.GetVolume());
                Assert.AssertNotEquals(new FilePath(replica.GetVolume().GetBasePath()).GetAbsoluteFile
                                           (), dn0Vol1.GetAbsoluteFile());
            }
            // 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore.
            string[] dataDirStrs = dn0.GetConf().Get(DFSConfigKeys.DfsDatanodeDataDirKey).Split
                                       (",");
            NUnit.Framework.Assert.AreEqual(1, dataDirStrs.Length);
            NUnit.Framework.Assert.IsFalse(dataDirStrs[0].Contains(dn0Vol1.GetAbsolutePath())
                                           );
        }