Exemple #1
0
        /// <summary>Test that a full block report is sent after hot swapping volumes</summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.Conf.ReconfigurationException"/>
        public virtual void TestFullBlockReportAfterRemovingVolumes()
        {
            Configuration conf = new Configuration();

            conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            // Similar to TestTriggerBlockReport, set a really long value for
            // dfs.heartbeat.interval, so that incremental block reports and heartbeats
            // won't be sent during this test unless they're triggered
            // manually.
            conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 10800000L);
            conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1080L);
            cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build();
            cluster.WaitActive();
            DataNode dn = cluster.GetDataNodes()[0];
            DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(dn,
                                                                                         cluster.GetNameNode());
            // Remove a data dir from datanode
            FilePath dataDirToKeep = new FilePath(cluster.GetDataDirectory(), "data1");

            dn.ReconfigurePropertyImpl(DFSConfigKeys.DfsDatanodeDataDirKey, dataDirToKeep.ToString
                                           ());
            // We should get 1 full report
            Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(1)).BlockReport
                (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport
                                                                                            []>(), Matchers.Any <BlockReportContext>());
        }
Exemple #2
0
        public virtual void TestStorageReportHasStorageTypeAndState()
        {
            // Make sure we are not testing with the default type, that would not
            // be a very good test.
            NUnit.Framework.Assert.AreNotSame(storageType, StorageType.Default);
            NameNode nn = cluster.GetNameNode();
            DataNode dn = cluster.GetDataNodes()[0];
            // Insert a spy object for the NN RPC.
            DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(dn
                                                                                           , nn);

            // Trigger a heartbeat so there is an interaction with the spy
            // object.
            DataNodeTestUtils.TriggerHeartbeat(dn);
            // Verify that the callback passed in the expected parameters.
            ArgumentCaptor <StorageReport[]> captor = ArgumentCaptor.ForClass <StorageReport[]>
                                                          ();

            Org.Mockito.Mockito.Verify(nnSpy).SendHeartbeat(Matchers.Any <DatanodeRegistration
                                                                          >(), captor.Capture(), Matchers.AnyLong(), Matchers.AnyLong(), Matchers.AnyInt()
                                                            , Matchers.AnyInt(), Matchers.AnyInt(), Org.Mockito.Mockito.Any <VolumeFailureSummary
                                                                                                                             >());
            StorageReport[] reports = captor.GetValue();
            foreach (StorageReport report in reports)
            {
                Assert.AssertThat(report.GetStorage().GetStorageType(), IS.Is(storageType));
                Assert.AssertThat(report.GetStorage().GetState(), IS.Is(DatanodeStorage.State.Normal
                                                                        ));
            }
        }
Exemple #3
0
        /// <summary>
        /// Verify that
        /// <see cref="DataNode#checkDiskErrors()"/>
        /// removes all metadata in
        /// DataNode upon a volume failure. Thus we can run reconfig on the same
        /// configuration to reload the new volume on the same directory as the failed one.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.TimeoutException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="Org.Apache.Hadoop.Conf.ReconfigurationException"/>
        public virtual void TestDirectlyReloadAfterCheckDiskError()
        {
            StartDFSCluster(1, 2);
            CreateFile(new Path("/test"), 32, (short)2);
            DataNode     dn           = cluster.GetDataNodes()[0];
            string       oldDataDir   = dn.GetConf().Get(DFSConfigKeys.DfsDatanodeDataDirKey);
            FilePath     dirToFail    = new FilePath(cluster.GetDataDirectory(), "data1");
            FsVolumeImpl failedVolume = GetVolume(dn, dirToFail);

            NUnit.Framework.Assert.IsTrue("No FsVolume was found for " + dirToFail, failedVolume
                                          != null);
            long used = failedVolume.GetDfsUsed();

            DataNodeTestUtils.InjectDataDirFailure(dirToFail);
            // Call and wait DataNode to detect disk failure.
            long lastDiskErrorCheck = dn.GetLastDiskErrorCheck();

            dn.CheckDiskErrorAsync();
            while (dn.GetLastDiskErrorCheck() == lastDiskErrorCheck)
            {
                Sharpen.Thread.Sleep(100);
            }
            CreateFile(new Path("/test1"), 32, (short)2);
            NUnit.Framework.Assert.AreEqual(used, failedVolume.GetDfsUsed());
            DataNodeTestUtils.RestoreDataDirFromFailure(dirToFail);
            dn.ReconfigurePropertyImpl(DFSConfigKeys.DfsDatanodeDataDirKey, oldDataDir);
            CreateFile(new Path("/test2"), 32, (short)2);
            FsVolumeImpl restoredVolume = GetVolume(dn, dirToFail);

            NUnit.Framework.Assert.IsTrue(restoredVolume != null);
            NUnit.Framework.Assert.IsTrue(restoredVolume != failedVolume);
            // More data has been written to this volume.
            NUnit.Framework.Assert.IsTrue(restoredVolume.GetDfsUsed() > used);
        }
Exemple #4
0
        public virtual void TestUnderReplicationAfterVolFailure()
        {
            // This test relies on denying access to data volumes to simulate data volume
            // failure.  This doesn't work on Windows, because an owner of an object
            // always has the ability to read and change permissions on the object.
            Assume.AssumeTrue(!Path.Windows);
            // Bring up one more datanode
            cluster.StartDataNodes(conf, 1, true, null, null);
            cluster.WaitActive();
            BlockManager bm    = cluster.GetNamesystem().GetBlockManager();
            Path         file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            // Fail the first volume on both datanodes
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)3);
            // underReplicatedBlocks are due to failed volumes
            int underReplicatedBlocks = BlockManagerTestUtil.CheckHeartbeatAndGetUnderReplicatedBlocksCount
                                            (cluster.GetNamesystem(), bm);

            NUnit.Framework.Assert.IsTrue("There is no under replicated block after volume failure"
                                          , underReplicatedBlocks > 0);
        }
        public virtual void TestConfigureMinValidVolumes()
        {
            Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows"));
            // Bring up two additional datanodes that need both of their volumes
            // functioning in order to stay up.
            conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 0);
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail a volume on the 2nd DN
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn2Vol1);
            // Should only get two replicas (the first DN and the 3rd)
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            // Check that this single failure caused a DN to die.
            DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 0, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            // If we restore the volume we should still only be able to get
            // two replicas since the DN is still considered dead.
            DataNodeTestUtils.RestoreDataDirFromFailure(dn2Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)2);
        }
 /// <summary>Ensure that a delayed IBR is generated for a block deleted on the DN.</summary>
 /// <exception cref="System.Exception"/>
 /// <exception cref="System.IO.IOException"/>
 public virtual void TestReportBlockDeleted()
 {
     try
     {
         // Trigger a block report to reset the IBR timer.
         DataNodeTestUtils.TriggerBlockReport(singletonDn);
         // Spy on calls from the DN to the NN
         DatanodeProtocolClientSideTranslatorPB nnSpy = SpyOnDnCallsToNn();
         InjectBlockDeleted();
         // Sleep for a very short time since IBR is generated
         // asynchronously.
         Sharpen.Thread.Sleep(2000);
         // Ensure that no block report was generated immediately.
         // Deleted blocks are reported when the IBR timer elapses.
         Org.Mockito.Mockito.Verify(nnSpy, Org.Mockito.Mockito.Times(0)).BlockReceivedAndDeleted
             (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks
                                                                                         []>());
         // Trigger a block report, this also triggers an IBR.
         DataNodeTestUtils.TriggerBlockReport(singletonDn);
         Sharpen.Thread.Sleep(2000);
         // Ensure that the deleted block is reported.
         Org.Mockito.Mockito.Verify(nnSpy, Org.Mockito.Mockito.Times(1)).BlockReceivedAndDeleted
             (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks
                                                                                         []>());
     }
     finally
     {
         cluster.Shutdown();
         cluster = null;
     }
 }
Exemple #7
0
        // return the initial state of the configuration
        /// <summary>
        /// Test for the case where one of the DNs in the pipeline is in the
        /// process of doing a block report exactly when the block is closed.
        /// </summary>
        /// <remarks>
        /// Test for the case where one of the DNs in the pipeline is in the
        /// process of doing a block report exactly when the block is closed.
        /// In this case, the block report becomes delayed until after the
        /// block is marked completed on the NN, and hence it reports an RBW
        /// replica for a COMPLETE block. Such a report should not be marked
        /// corrupt.
        /// This is a regression test for HDFS-2791.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestOneReplicaRbwReportArrivesAfterBlockCompleted()
        {
            CountDownLatch brFinished = new CountDownLatch(1);

            GenericTestUtils.DelayAnswer delayer = new _DelayAnswer_579(brFinished, Log);
            // inform the test that our block report went through.
            string MethodName = GenericTestUtils.GetMethodName();
            Path   filePath   = new Path("/" + MethodName + ".dat");

            // Start a second DN for this test -- we're checking
            // what happens when one of the DNs is slowed for some reason.
            ReplFactor = 2;
            StartDNandWait(null, false);
            NameNode           nn   = cluster.GetNameNode();
            FSDataOutputStream @out = fs.Create(filePath, ReplFactor);

            try
            {
                AppendTestUtil.Write(@out, 0, 10);
                @out.Hflush();
                // Set up a spy so that we can delay the block report coming
                // from this node.
                DataNode dn = cluster.GetDataNodes()[0];
                DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(dn,
                                                                                             nn);
                Org.Mockito.Mockito.DoAnswer(delayer).When(spy).BlockReport(Org.Mockito.Mockito.AnyObject
                                                                            <DatanodeRegistration>(), Org.Mockito.Mockito.AnyString(), Org.Mockito.Mockito.AnyObject
                                                                            <StorageBlockReport[]>(), Org.Mockito.Mockito.AnyObject <BlockReportContext>());
                // Force a block report to be generated. The block report will have
                // an RBW replica in it. Wait for the RPC to be sent, but block
                // it before it gets to the NN.
                dn.ScheduleAllBlockReport(0);
                delayer.WaitForCall();
            }
            finally
            {
                IOUtils.CloseStream(@out);
            }
            // Now that the stream is closed, the NN will have the block in COMPLETE
            // state.
            delayer.Proceed();
            brFinished.Await();
            // Verify that no replicas are marked corrupt, and that the
            // file is still readable.
            BlockManagerTestUtil.UpdateState(nn.GetNamesystem().GetBlockManager());
            NUnit.Framework.Assert.AreEqual(0, nn.GetNamesystem().GetCorruptReplicaBlocks());
            DFSTestUtil.ReadFile(fs, filePath);
            // Ensure that the file is readable even from the DN that we futzed with.
            cluster.StopDataNode(1);
            DFSTestUtil.ReadFile(fs, filePath);
        }
Exemple #8
0
        public virtual void TestMultipleVolFailuresOnNode()
        {
            // Reinitialize the cluster, configured with 4 storage locations per DataNode
            // and tolerating up to 2 failures.
            TearDown();
            InitCluster(3, 4, 2);
            // Calculate the total capacity of all the datanodes. Sleep for three seconds
            // to be sure the datanodes have had a chance to heartbeat their capacities.
            Sharpen.Thread.Sleep(WaitForHeartbeats);
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long     dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            FilePath dn1Vol1      = new FilePath(dataDir, "data" + (4 * 0 + 1));
            FilePath dn1Vol2      = new FilePath(dataDir, "data" + (4 * 0 + 2));
            FilePath dn2Vol1      = new FilePath(dataDir, "data" + (4 * 1 + 1));
            FilePath dn2Vol2      = new FilePath(dataDir, "data" + (4 * 1 + 2));

            // Make the first two volume directories on the first two datanodes
            // non-accessible.
            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn1Vol2, dn2Vol1, dn2Vol2);
            // Create file1 and wait for 3 replicas (ie all DNs can still store a block).
            // Then assert that all DNs are up, despite the volume failures.
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // Eventually the NN should report four volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 4, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 4);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtNameNode(dm, dns[2], true);
        }
 public virtual void SetUp()
 {
     conf = new HdfsConfiguration();
     conf.SetLong(DFSConfigKeys.DfsNamenodePathBasedCacheRefreshIntervalMs, 100);
     conf.SetLong(DFSConfigKeys.DfsCachereportIntervalMsecKey, 500);
     conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
     conf.SetLong(DFSConfigKeys.DfsDatanodeMaxLockedMemoryKey, CacheCapacity);
     conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1);
     prevCacheManipulator = NativeIO.POSIX.GetCacheManipulator();
     NativeIO.POSIX.SetCacheManipulator(new NativeIO.POSIX.NoMlockCacheManipulator());
     cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build();
     cluster.WaitActive();
     fs      = cluster.GetFileSystem();
     nn      = cluster.GetNameNode();
     fsImage = nn.GetFSImage();
     dn      = cluster.GetDataNodes()[0];
     fsd     = dn.GetFSDataset();
     spyNN   = DataNodeTestUtils.SpyOnBposToNN(dn, nn);
 }
        public virtual void TestValidVolumesAtStartup()
        {
            Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows"));
            // Make sure no DNs are running.
            cluster.ShutdownDataNodes();
            // Bring up a datanode with two default data dirs, but with one bad one.
            conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 1);
            // We use subdirectories 0 and 1 in order to have only a single
            // data dir's parent inject a failure.
            FilePath tld            = new FilePath(MiniDFSCluster.GetBaseDirectory(), "badData");
            FilePath dataDir1       = new FilePath(tld, "data1");
            FilePath dataDir1Actual = new FilePath(dataDir1, "1");

            dataDir1Actual.Mkdirs();
            // Force an IOE to occur on one of the dfs.data.dir.
            FilePath dataDir2 = new FilePath(tld, "data2");

            PrepareDirToFail(dataDir2);
            FilePath dataDir2Actual = new FilePath(dataDir2, "2");

            // Start one DN, with manually managed DN dir
            conf.Set(DFSConfigKeys.DfsDatanodeDataDirKey, dataDir1Actual.GetPath() + "," + dataDir2Actual
                     .GetPath());
            cluster.StartDataNodes(conf, 1, false, null, null);
            cluster.WaitActive();
            try
            {
                NUnit.Framework.Assert.IsTrue("The DN should have started up fine.", cluster.IsDataNodeUp
                                                  ());
                DataNode dn = cluster.GetDataNodes()[0];
                string   si = DataNodeTestUtils.GetFSDataset(dn).GetStorageInfo();
                NUnit.Framework.Assert.IsTrue("The DN should have started with this directory", si
                                              .Contains(dataDir1Actual.GetPath()));
                NUnit.Framework.Assert.IsFalse("The DN shouldn't have a bad directory.", si.Contains
                                                   (dataDir2Actual.GetPath()));
            }
            finally
            {
                cluster.ShutdownDataNodes();
                FileUtil.Chmod(dataDir2.ToString(), "755");
            }
        }
        /// <summary>
        /// Verify that the DataNode sends a single incremental block report for all
        /// storages.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestDataNodeDoesNotSplitReports()
        {
            LocatedBlocks blocks = CreateFileGetBlocks(GenericTestUtils.GetMethodName());

            Assert.AssertThat(cluster.GetDataNodes().Count, IS.Is(1));
            // Remove all blocks from the DataNode.
            foreach (LocatedBlock block in blocks.GetLocatedBlocks())
            {
                dn0.NotifyNamenodeDeletedBlock(block.GetBlock(), block.GetStorageIDs()[0]);
            }
            Log.Info("Triggering report after deleting blocks");
            long ops = MetricsAsserts.GetLongCounter("BlockReceivedAndDeletedOps", MetricsAsserts.GetMetrics
                                                         (NnMetrics));

            // Trigger a report to the NameNode and give it a few seconds.
            DataNodeTestUtils.TriggerBlockReport(dn0);
            Sharpen.Thread.Sleep(5000);
            // Ensure that NameNodeRpcServer.blockReceivedAndDeletes is invoked
            // exactly once after we triggered the report.
            MetricsAsserts.AssertCounter("BlockReceivedAndDeletedOps", ops + 1, MetricsAsserts.GetMetrics
                                             (NnMetrics));
        }
Exemple #12
0
 /// <exception cref="System.Exception"/>
 public virtual void TestDeleteBlockOnTransientStorage()
 {
     cluster = new MiniDFSCluster.Builder(Conf).StorageTypes(new StorageType[] { StorageType
                                                                                 .RamDisk, StorageType.Default }).NumDataNodes(1).Build();
     try
     {
         cluster.WaitActive();
         bpid = cluster.GetNamesystem().GetBlockPoolId();
         DataNode dataNode = cluster.GetDataNodes()[0];
         fds     = DataNodeTestUtils.GetFSDataset(cluster.GetDataNodes()[0]);
         client  = cluster.GetFileSystem().GetClient();
         scanner = new DirectoryScanner(dataNode, fds, Conf);
         scanner.SetRetainDiffs(true);
         FsDatasetTestUtil.StopLazyWriter(cluster.GetDataNodes()[0]);
         // Create a file file on RAM_DISK
         IList <LocatedBlock> blocks = CreateFile(GenericTestUtils.GetMethodName(), BlockLength
                                                  , true);
         // Ensure no difference between volumeMap and disk.
         Scan(1, 0, 0, 0, 0, 0);
         // Make a copy of the block on DEFAULT storage and ensure that it is
         // picked up by the scanner.
         DuplicateBlock(blocks[0].GetBlock().GetBlockId());
         Scan(2, 1, 0, 0, 0, 0, 1);
         // Ensure that the copy on RAM_DISK was deleted.
         VerifyStorageType(blocks[0].GetBlock().GetBlockId(), false);
         Scan(1, 0, 0, 0, 0, 0);
     }
     finally
     {
         if (scanner != null)
         {
             scanner.Shutdown();
             scanner = null;
         }
         cluster.Shutdown();
         cluster = null;
     }
 }
        /// <summary>
        /// Test that if splitThreshold is zero, then we always get a separate
        /// call per storage.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestAlwaysSplit()
        {
            StartUpCluster(0);
            NameNode nn = cluster.GetNameNode();
            DataNode dn = cluster.GetDataNodes()[0];

            // Create a file with a few blocks.
            CreateFile(GenericTestUtils.GetMethodName(), BlocksInFile);
            // Insert a spy object for the NN RPC.
            DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(dn
                                                                                           , nn);

            // Trigger a block report so there is an interaction with the spy
            // object.
            DataNodeTestUtils.TriggerBlockReport(dn);
            ArgumentCaptor <StorageBlockReport[]> captor = ArgumentCaptor.ForClass <StorageBlockReport
                                                                                    []>();

            Org.Mockito.Mockito.Verify(nnSpy, Org.Mockito.Mockito.Times(cluster.GetStoragesPerDatanode
                                                                            ())).BlockReport(Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), captor
                                                                                             .Capture(), Org.Mockito.Mockito.AnyObject <BlockReportContext>());
            VerifyCapturedArguments(captor, 1, BlocksInFile);
        }
Exemple #14
0
        public virtual void TestVolFailureStatsPreservedOnNNRestart()
        {
            // Bring up two more datanodes that can tolerate 1 failure
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail the first volume on both datanodes (we have to keep the
            // third healthy so one node in the pipeline will not fail).
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            AList <DataNode> dns = cluster.GetDataNodes();

            // The NN reports two volumes failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            // After restarting the NN it still see the two failures
            cluster.RestartNameNode(0);
            cluster.WaitActive();
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
        }
 /// <summary>Spy on calls from the DN to the NN.</summary>
 /// <returns>spy object that can be used for Mockito verification.</returns>
 internal virtual DatanodeProtocolClientSideTranslatorPB SpyOnDnCallsToNn()
 {
     return(DataNodeTestUtils.SpyOnBposToNN(singletonDn, singletonNn));
 }
Exemple #16
0
        /// <exception cref="System.IO.IOException"/>
        private void WaitForTempReplica(Block bl, int DnN1)
        {
            bool tooLongWait = false;
            int  Timeout     = 40000;

            if (Log.IsDebugEnabled())
            {
                Log.Debug("Wait for datanode " + DnN1 + " to appear");
            }
            while (cluster.GetDataNodes().Count <= DnN1)
            {
                WaitTil(20);
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Total number of DNs " + cluster.GetDataNodes().Count);
            }
            cluster.WaitActive();
            // Look about specified DN for the replica of the block from 1st DN
            DataNode dn1   = cluster.GetDataNodes()[DnN1];
            string   bpid  = cluster.GetNamesystem().GetBlockPoolId();
            Replica  r     = DataNodeTestUtils.FetchReplicaInfo(dn1, bpid, bl.GetBlockId());
            long     start = Time.MonotonicNow();
            int      count = 0;

            while (r == null)
            {
                WaitTil(5);
                r = DataNodeTestUtils.FetchReplicaInfo(dn1, bpid, bl.GetBlockId());
                long waiting_period = Time.MonotonicNow() - start;
                if (count++ % 100 == 0)
                {
                    if (Log.IsDebugEnabled())
                    {
                        Log.Debug("Has been waiting for " + waiting_period + " ms.");
                    }
                }
                if (waiting_period > Timeout)
                {
                    NUnit.Framework.Assert.IsTrue("Was waiting too long to get ReplicaInfo from a datanode"
                                                  , tooLongWait);
                }
            }
            HdfsServerConstants.ReplicaState state = r.GetState();
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Replica state before the loop " + state.GetValue());
            }
            start = Time.MonotonicNow();
            while (state != HdfsServerConstants.ReplicaState.Temporary)
            {
                WaitTil(5);
                state = r.GetState();
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Keep waiting for " + bl.GetBlockName() + " is in state " + state.GetValue
                                  ());
                }
                if (Time.MonotonicNow() - start > Timeout)
                {
                    NUnit.Framework.Assert.IsTrue("Was waiting too long for a replica to become TEMPORARY"
                                                  , tooLongWait);
                }
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Replica state after the loop " + state.GetValue());
            }
        }
Exemple #17
0
        /// <summary>
        /// Test that DataStorage and BlockPoolSliceStorage remove the failed volume
        /// after failure.
        /// </summary>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.TimeoutException"/>
        public virtual void TestFailedVolumeBeingRemovedFromDataNode()
        {
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            FilePath dn0Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn0Vol1);
            DataNode dn0 = cluster.GetDataNodes()[0];
            long     lastDiskErrorCheck = dn0.GetLastDiskErrorCheck();

            dn0.CheckDiskErrorAsync();
            // Wait checkDiskError thread finish to discover volume failure.
            while (dn0.GetLastDiskErrorCheck() == lastDiskErrorCheck)
            {
                Sharpen.Thread.Sleep(100);
            }
            // Verify dn0Vol1 has been completely removed from DN0.
            // 1. dn0Vol1 is removed from DataStorage.
            DataStorage storage = dn0.GetStorage();

            NUnit.Framework.Assert.AreEqual(1, storage.GetNumStorageDirs());
            for (int i = 0; i < storage.GetNumStorageDirs(); i++)
            {
                Storage.StorageDirectory sd = storage.GetStorageDir(i);
                NUnit.Framework.Assert.IsFalse(sd.GetRoot().GetAbsolutePath().StartsWith(dn0Vol1.
                                                                                         GetAbsolutePath()));
            }
            string bpid = cluster.GetNamesystem().GetBlockPoolId();
            BlockPoolSliceStorage bpsStorage = storage.GetBPStorage(bpid);

            NUnit.Framework.Assert.AreEqual(1, bpsStorage.GetNumStorageDirs());
            for (int i_1 = 0; i_1 < bpsStorage.GetNumStorageDirs(); i_1++)
            {
                Storage.StorageDirectory sd = bpsStorage.GetStorageDir(i_1);
                NUnit.Framework.Assert.IsFalse(sd.GetRoot().GetAbsolutePath().StartsWith(dn0Vol1.
                                                                                         GetAbsolutePath()));
            }
            // 2. dn0Vol1 is removed from FsDataset
            FsDatasetSpi <FsVolumeSpi> data = dn0.GetFSDataset();

            foreach (FsVolumeSpi volume in data.GetVolumes())
            {
                Assert.AssertNotEquals(new FilePath(volume.GetBasePath()).GetAbsoluteFile(), dn0Vol1
                                       .GetAbsoluteFile());
            }
            // 3. all blocks on dn0Vol1 have been removed.
            foreach (ReplicaInfo replica in FsDatasetTestUtil.GetReplicas(data, bpid))
            {
                NUnit.Framework.Assert.IsNotNull(replica.GetVolume());
                Assert.AssertNotEquals(new FilePath(replica.GetVolume().GetBasePath()).GetAbsoluteFile
                                           (), dn0Vol1.GetAbsoluteFile());
            }
            // 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore.
            string[] dataDirStrs = dn0.GetConf().Get(DFSConfigKeys.DfsDatanodeDataDirKey).Split
                                       (",");
            NUnit.Framework.Assert.AreEqual(1, dataDirStrs.Length);
            NUnit.Framework.Assert.IsFalse(dataDirStrs[0].Contains(dn0Vol1.GetAbsolutePath())
                                           );
        }
Exemple #18
0
        public virtual void TestSuccessiveVolumeFailures()
        {
            // Bring up two more datanodes
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();

            /*
             * Calculate the total capacity of all the datanodes. Sleep for
             * three seconds to be sure the datanodes have had a chance to
             * heartbeat their capacities.
             */
            Sharpen.Thread.Sleep(WaitForHeartbeats);
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long     dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            FilePath dn1Vol1      = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1      = new FilePath(dataDir, "data" + (2 * 1 + 1));
            FilePath dn3Vol1      = new FilePath(dataDir, "data" + (2 * 2 + 1));
            FilePath dn3Vol2      = new FilePath(dataDir, "data" + (2 * 2 + 2));

            /*
             * Make the 1st volume directories on the first two datanodes
             * non-accessible.  We don't make all three 1st volume directories
             * readonly since that would cause the entire pipeline to
             * fail. The client does not retry failed nodes even though
             * perhaps they could succeed because just a single volume failed.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);

            /*
             * Create file1 and wait for 3 replicas (ie all DNs can still
             * store a block).  Then assert that all DNs are up, despite the
             * volume failures.
             */
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());

            /*
             * The metrics should confirm the volume failures.
             */
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // Eventually the NN should report two volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[2], true);

            /*
             * Now fail a volume on the third datanode. We should be able to get
             * three replicas since we've already identified the other failures.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn3Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)3);
            NUnit.Framework.Assert.IsTrue("DN3 should still be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[2], 1, true, dn3Vol1.GetAbsolutePath());
            DataNodeTestUtils.TriggerHeartbeat(dns[2]);
            CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath());

            /*
             * Once the datanodes have a chance to heartbeat their new capacity the
             * total capacity should be down by three volumes (assuming the host
             * did not grow or shrink the data volume while the test was running).
             */
            dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 3, origCapacity - (3 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 3);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath());

            /*
             * Now fail the 2nd volume on the 3rd datanode. All its volumes
             * are now failed and so it should report two volume failures
             * and that it's no longer up. Only wait for two replicas since
             * we'll never get a third.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn3Vol2);
            Path file3 = new Path("/test3");

            DFSTestUtil.CreateFile(fs, file3, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file3, (short)2);
            // The DN should consider itself dead
            DFSTestUtil.WaitForDatanodeDeath(dns[2]);
            // And report two failed volumes
            CheckFailuresAtDataNode(dns[2], 2, true, dn3Vol1.GetAbsolutePath(), dn3Vol2.GetAbsolutePath
                                        ());
            // The NN considers the DN dead
            DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 2, origCapacity - (4 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());

            /*
             * The datanode never tries to restore the failed volume, even if
             * it's subsequently repaired, but it should see this volume on
             * restart, so file creation should be able to succeed after
             * restoring the data directories and restarting the datanodes.
             */
            DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1, dn3Vol1, dn3Vol2);
            cluster.RestartDataNodes();
            cluster.WaitActive();
            Path file4 = new Path("/test4");

            DFSTestUtil.CreateFile(fs, file4, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file4, (short)3);

            /*
             * Eventually the capacity should be restored to its original value,
             * and that the volume failure count should be reported as zero by
             * both the metrics and the NN.
             */
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats);
            CheckAggregateFailuresAtNameNode(true, 0);
            dns = cluster.GetDataNodes();
            CheckFailuresAtNameNode(dm, dns[0], true);
            CheckFailuresAtNameNode(dm, dns[1], true);
            CheckFailuresAtNameNode(dm, dns[2], true);
        }
Exemple #19
0
        public virtual void TestDataNodeReconfigureWithVolumeFailures()
        {
            // Bring up two more datanodes
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail the first volume on both datanodes (we have to keep the
            // third healthy so one node in the pipeline will not fail).
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn1Vol2 = new FilePath(dataDir, "data" + (2 * 0 + 2));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));
            FilePath dn2Vol2 = new FilePath(dataDir, "data" + (2 * 1 + 2));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1);
            DataNodeTestUtils.InjectDataDirFailure(dn2Vol1);
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            // Reconfigure again to try to add back the failed volumes.
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath());
            // Ensure we wait a sufficient amount of time.
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures again.
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(false, 2);
            CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath());
            // Reconfigure a third time with the failed volumes.  Afterwards, we expect
            // the same volume failures to be reported.  (No double-counting.)
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath());
            // Ensure we wait a sufficient amount of time.
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures again.
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(false, 2);
            CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath());
            // Replace failed volume with healthy volume and run reconfigure DataNode.
            // The failed volume information should be cleared.
            DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1);
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, true);
            CheckFailuresAtDataNode(dns[1], 1, true);
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats);
            CheckAggregateFailuresAtNameNode(true, 0);
            CheckFailuresAtNameNode(dm, dns[0], true);
            CheckFailuresAtNameNode(dm, dns[1], true);
        }
Exemple #20
0
 /// <summary>Force the DataNode to report missing blocks immediately.</summary>
 /// <exception cref="System.IO.IOException"/>
 private static void TriggerDeleteReport(DataNode datanode)
 {
     datanode.ScheduleAllBlockReport(0);
     DataNodeTestUtils.TriggerDeletionReport(datanode);
 }
Exemple #21
0
        /// <summary>Test write a file, verifies and closes it.</summary>
        /// <remarks>
        /// Test write a file, verifies and closes it. Then a couple of random blocks
        /// is removed and BlockReport is forced; the FSNamesystem is pushed to
        /// recalculate required DN's activities such as replications and so on.
        /// The number of missing and under-replicated blocks should be the same in
        /// case of a single-DN cluster.
        /// </remarks>
        /// <exception cref="System.IO.IOException">in case of errors</exception>
        public virtual void BlockReport_02()
        {
            string MethodName = GenericTestUtils.GetMethodName();

            Log.Info("Running test " + MethodName);
            Path filePath = new Path("/" + MethodName + ".dat");

            DFSTestUtil.CreateFile(fs, filePath, FileSize, ReplFactor, rand.NextLong());
            // mock around with newly created blocks and delete some
            FilePath dataDir = new FilePath(cluster.GetDataDirectory());

            NUnit.Framework.Assert.IsTrue(dataDir.IsDirectory());
            IList <ExtendedBlock> blocks2Remove = new AList <ExtendedBlock>();
            IList <int>           removedIndex  = new AList <int>();
            IList <LocatedBlock>  lBlocks       = cluster.GetNameNodeRpc().GetBlockLocations(filePath
                                                                                             .ToString(), FileStart, FileSize).GetLocatedBlocks();

            while (removedIndex.Count != 2)
            {
                int newRemoveIndex = rand.Next(lBlocks.Count);
                if (!removedIndex.Contains(newRemoveIndex))
                {
                    removedIndex.AddItem(newRemoveIndex);
                }
            }
            foreach (int aRemovedIndex in removedIndex)
            {
                blocks2Remove.AddItem(lBlocks[aRemovedIndex].GetBlock());
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Number of blocks allocated " + lBlocks.Count);
            }
            DataNode dn0 = cluster.GetDataNodes()[DnN0];

            foreach (ExtendedBlock b in blocks2Remove)
            {
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Removing the block " + b.GetBlockName());
                }
                foreach (FilePath f in FindAllFiles(dataDir, new BlockReportTestBase.MyFileFilter
                                                        (this, b.GetBlockName(), true)))
                {
                    DataNodeTestUtils.GetFSDataset(dn0).UnfinalizeBlock(b);
                    if (!f.Delete())
                    {
                        Log.Warn("Couldn't delete " + b.GetBlockName());
                    }
                    else
                    {
                        Log.Debug("Deleted file " + f.ToString());
                    }
                }
            }
            WaitTil(TimeUnit.Seconds.ToMillis(DnRescanExtraWait));
            // all blocks belong to the same file, hence same BP
            string poolId            = cluster.GetNamesystem().GetBlockPoolId();
            DatanodeRegistration dnR = dn0.GetDNRegistrationForBP(poolId);

            StorageBlockReport[] reports = GetBlockReports(dn0, poolId, false, false);
            SendBlockReports(dnR, poolId, reports);
            BlockManagerTestUtil.GetComputedDatanodeWork(cluster.GetNamesystem().GetBlockManager
                                                             ());
            PrintStats();
            NUnit.Framework.Assert.AreEqual("Wrong number of MissingBlocks is found", blocks2Remove
                                            .Count, cluster.GetNamesystem().GetMissingBlocksCount());
            NUnit.Framework.Assert.AreEqual("Wrong number of UnderReplicatedBlocks is found",
                                            blocks2Remove.Count, cluster.GetNamesystem().GetUnderReplicatedBlocks());
        }
Exemple #22
0
        /// <exception cref="System.Exception"/>
        private void TestTriggerBlockReport(bool incremental)
        {
            Configuration conf = new HdfsConfiguration();

            // Set a really long value for dfs.blockreport.intervalMsec and
            // dfs.heartbeat.interval, so that incremental block reports and heartbeats
            // won't be sent during this test unless they're triggered
            // manually.
            conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 10800000L);
            conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1080L);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build();

            cluster.WaitActive();
            FileSystem fs = cluster.GetFileSystem();
            DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(cluster
                                                                                         .GetDataNodes()[0], cluster.GetNameNode());

            DFSTestUtil.CreateFile(fs, new Path("/abc"), 16, (short)1, 1L);
            // We should get 1 incremental block report.
            Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(1)).BlockReceivedAndDeleted
                (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks
                                                                                            []>());
            // We should not receive any more incremental or incremental block reports,
            // since the interval we configured is so long.
            for (int i = 0; i < 3; i++)
            {
                Sharpen.Thread.Sleep(10);
                Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Times(0)).BlockReport(Matchers.Any
                                                                                          <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport[]
                                                                                                                                                        >(), Org.Mockito.Mockito.AnyObject <BlockReportContext>());
                Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Times(1)).BlockReceivedAndDeleted
                    (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks
                                                                                                []>());
            }
            // Create a fake block deletion notification on the DataNode.
            // This will be sent with the next incremental block report.
            ReceivedDeletedBlockInfo rdbi = new ReceivedDeletedBlockInfo(new Block(5678, 512,
                                                                                   1000), ReceivedDeletedBlockInfo.BlockStatus.DeletedBlock, null);
            DataNode       datanode    = cluster.GetDataNodes()[0];
            BPServiceActor actor       = datanode.GetAllBpOs()[0].GetBPServiceActors()[0];
            string         storageUuid = datanode.GetFSDataset().GetVolumes()[0].GetStorageID();

            actor.NotifyNamenodeDeletedBlock(rdbi, storageUuid);
            // Manually trigger a block report.
            datanode.TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental(incremental
                                                                                        ).Build());
            // triggerBlockReport returns before the block report is
            // actually sent.  Wait for it to be sent here.
            if (incremental)
            {
                Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(2)).BlockReceivedAndDeleted
                    (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks
                                                                                                []>());
            }
            else
            {
                Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000)).BlockReport(Matchers.Any
                                                                                                <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport[]
                                                                                                                                                              >(), Org.Mockito.Mockito.AnyObject <BlockReportContext>());
            }
            cluster.Shutdown();
        }
Exemple #23
0
        public virtual void TestDeletedBlockWhenAddBlockIsInEdit()
        {
            Configuration conf = new HdfsConfiguration();

            cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                      ()).NumDataNodes(1).Build();
            DFSClient client = null;

            try
            {
                cluster.WaitActive();
                NUnit.Framework.Assert.AreEqual("Number of namenodes is not 2", 2, cluster.GetNumNameNodes
                                                    ());
                // Transitioning the namenode 0 to active.
                cluster.TransitionToActive(0);
                NUnit.Framework.Assert.IsTrue("Namenode 0 should be in active state", cluster.GetNameNode
                                                  (0).IsActiveState());
                NUnit.Framework.Assert.IsTrue("Namenode 1 should be in standby state", cluster.GetNameNode
                                                  (1).IsStandbyState());
                // Trigger heartbeat to mark DatanodeStorageInfo#heartbeatedSinceFailover
                // to true.
                DataNodeTestUtils.TriggerHeartbeat(cluster.GetDataNodes()[0]);
                FileSystem fs = cluster.GetFileSystem(0);
                // Trigger blockReport to mark DatanodeStorageInfo#blockContentsStale
                // to false.
                cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental
                                                                 (false).Build());
                Path fileName = new Path("/tmp.txt");
                // create a file with one block
                DFSTestUtil.CreateFile(fs, fileName, 10L, (short)1, 1234L);
                DFSTestUtil.WaitReplication(fs, fileName, (short)1);
                client = new DFSClient(cluster.GetFileSystem(0).GetUri(), conf);
                IList <LocatedBlock> locatedBlocks = client.GetNamenode().GetBlockLocations("/tmp.txt"
                                                                                            , 0, 10L).GetLocatedBlocks();
                NUnit.Framework.Assert.IsTrue(locatedBlocks.Count == 1);
                NUnit.Framework.Assert.IsTrue(locatedBlocks[0].GetLocations().Length == 1);
                // add a second datanode to the cluster
                cluster.StartDataNodes(conf, 1, true, null, null, null, null);
                NUnit.Framework.Assert.AreEqual("Number of datanodes should be 2", 2, cluster.GetDataNodes
                                                    ().Count);
                DataNode           dn0          = cluster.GetDataNodes()[0];
                DataNode           dn1          = cluster.GetDataNodes()[1];
                string             activeNNBPId = cluster.GetNamesystem(0).GetBlockPoolId();
                DatanodeDescriptor sourceDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem
                                                                                  (0), dn0.GetDNRegistrationForBP(activeNNBPId));
                DatanodeDescriptor destDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem
                                                                                (0), dn1.GetDNRegistrationForBP(activeNNBPId));
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName);
                Log.Info("replaceBlock:  " + ReplaceBlock(block, (DatanodeInfo)sourceDnDesc, (DatanodeInfo
                                                                                              )sourceDnDesc, (DatanodeInfo)destDnDesc));
                // Waiting for the FsDatasetAsyncDsikService to delete the block
                Sharpen.Thread.Sleep(3000);
                // Triggering the incremental block report to report the deleted block to
                // namnemode
                cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental
                                                                 (true).Build());
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                NUnit.Framework.Assert.IsTrue("Namenode 1 should be in active state", cluster.GetNameNode
                                                  (1).IsActiveState());
                NUnit.Framework.Assert.IsTrue("Namenode 0 should be in standby state", cluster.GetNameNode
                                                  (0).IsStandbyState());
                client.Close();
                // Opening a new client for new active  namenode
                client = new DFSClient(cluster.GetFileSystem(1).GetUri(), conf);
                IList <LocatedBlock> locatedBlocks1 = client.GetNamenode().GetBlockLocations("/tmp.txt"
                                                                                             , 0, 10L).GetLocatedBlocks();
                NUnit.Framework.Assert.AreEqual(1, locatedBlocks1.Count);
                NUnit.Framework.Assert.AreEqual("The block should be only on 1 datanode ", 1, locatedBlocks1
                                                [0].GetLocations().Length);
            }
            finally
            {
                IOUtils.Cleanup(null, client);
                cluster.Shutdown();
            }
        }
Exemple #24
0
 /// <exception cref="System.Exception"/>
 public virtual void RunTest(int parallelism)
 {
     cluster = new MiniDFSCluster.Builder(Conf).Build();
     try
     {
         cluster.WaitActive();
         bpid   = cluster.GetNamesystem().GetBlockPoolId();
         fds    = DataNodeTestUtils.GetFSDataset(cluster.GetDataNodes()[0]);
         client = cluster.GetFileSystem().GetClient();
         Conf.SetInt(DFSConfigKeys.DfsDatanodeDirectoryscanThreadsKey, parallelism);
         DataNode dataNode = cluster.GetDataNodes()[0];
         scanner = new DirectoryScanner(dataNode, fds, Conf);
         scanner.SetRetainDiffs(true);
         // Add files with 100 blocks
         CreateFile(GenericTestUtils.GetMethodName(), BlockLength * 100, false);
         long totalBlocks = 100;
         // Test1: No difference between volumeMap and disk
         Scan(100, 0, 0, 0, 0, 0);
         // Test2: block metafile is missing
         long blockId = DeleteMetaFile();
         Scan(totalBlocks, 1, 1, 0, 0, 1);
         VerifyGenStamp(blockId, GenerationStamp.GrandfatherGenerationStamp);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test3: block file is missing
         blockId = DeleteBlockFile();
         Scan(totalBlocks, 1, 0, 1, 0, 0);
         totalBlocks--;
         VerifyDeletion(blockId);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test4: A block file exists for which there is no metafile and
         // a block in memory
         blockId = CreateBlockFile();
         totalBlocks++;
         Scan(totalBlocks, 1, 1, 0, 1, 0);
         VerifyAddition(blockId, GenerationStamp.GrandfatherGenerationStamp, 0);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test5: A metafile exists for which there is no block file and
         // a block in memory
         blockId = CreateMetaFile();
         Scan(totalBlocks + 1, 1, 0, 1, 1, 0);
         FilePath metafile = new FilePath(GetMetaFile(blockId));
         NUnit.Framework.Assert.IsTrue(!metafile.Exists());
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test6: A block file and metafile exists for which there is no block in
         // memory
         blockId = CreateBlockMetaFile();
         totalBlocks++;
         Scan(totalBlocks, 1, 0, 0, 1, 0);
         VerifyAddition(blockId, DefaultGenStamp, 0);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test7: Delete bunch of metafiles
         for (int i = 0; i < 10; i++)
         {
             blockId = DeleteMetaFile();
         }
         Scan(totalBlocks, 10, 10, 0, 0, 10);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test8: Delete bunch of block files
         for (int i_1 = 0; i_1 < 10; i_1++)
         {
             blockId = DeleteBlockFile();
         }
         Scan(totalBlocks, 10, 0, 10, 0, 0);
         totalBlocks -= 10;
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test9: create a bunch of blocks files
         for (int i_2 = 0; i_2 < 10; i_2++)
         {
             blockId = CreateBlockFile();
         }
         totalBlocks += 10;
         Scan(totalBlocks, 10, 10, 0, 10, 0);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test10: create a bunch of metafiles
         for (int i_3 = 0; i_3 < 10; i_3++)
         {
             blockId = CreateMetaFile();
         }
         Scan(totalBlocks + 10, 10, 0, 10, 10, 0);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test11: create a bunch block files and meta files
         for (int i_4 = 0; i_4 < 10; i_4++)
         {
             blockId = CreateBlockMetaFile();
         }
         totalBlocks += 10;
         Scan(totalBlocks, 10, 0, 0, 10, 0);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test12: truncate block files to test block length mismatch
         for (int i_5 = 0; i_5 < 10; i_5++)
         {
             TruncateBlockFile();
         }
         Scan(totalBlocks, 10, 0, 0, 0, 10);
         Scan(totalBlocks, 0, 0, 0, 0, 0);
         // Test13: all the conditions combined
         CreateMetaFile();
         CreateBlockFile();
         CreateBlockMetaFile();
         DeleteMetaFile();
         DeleteBlockFile();
         TruncateBlockFile();
         Scan(totalBlocks + 3, 6, 2, 2, 3, 2);
         Scan(totalBlocks + 1, 0, 0, 0, 0, 0);
         // Test14: validate clean shutdown of DirectoryScanner
         ////assertTrue(scanner.getRunStatus()); //assumes "real" FSDataset, not sim
         scanner.Shutdown();
         NUnit.Framework.Assert.IsFalse(scanner.GetRunStatus());
     }
     finally
     {
         if (scanner != null)
         {
             scanner.Shutdown();
             scanner = null;
         }
         cluster.Shutdown();
     }
 }