/// <summary> /// After one of the BPServiceActors registers successfully with the /// NN, it calls this function to verify that the NN it connected to /// is consistent with other NNs serving the block-pool. /// </summary> /// <exception cref="System.IO.IOException"/> internal virtual void RegistrationSucceeded(BPServiceActor bpServiceActor, DatanodeRegistration reg) { WriteLock(); try { if (bpRegistration != null) { CheckNSEquality(bpRegistration.GetStorageInfo().GetNamespaceID(), reg.GetStorageInfo ().GetNamespaceID(), "namespace ID"); CheckNSEquality(bpRegistration.GetStorageInfo().GetClusterID(), reg.GetStorageInfo ().GetClusterID(), "cluster ID"); } bpRegistration = reg; dn.BpRegistrationSucceeded(bpRegistration, GetBlockPoolId()); // Add the initial block token secret keys to the DN's secret manager. if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.AddKeys(GetBlockPoolId(), reg.GetExportedKeys()); } } finally { WriteUnlock(); } }
/// <summary> /// Insert a Mockito spy object between the given DataNode and /// the given NameNode. /// </summary> /// <remarks> /// Insert a Mockito spy object between the given DataNode and /// the given NameNode. This can be used to delay or wait for /// RPC calls on the datanode->NN path. /// </remarks> public static DatanodeProtocolClientSideTranslatorPB SpyOnBposToNN(DataNode dn, NameNode nn) { string bpid = nn.GetNamesystem().GetBlockPoolId(); BPOfferService bpos = null; foreach (BPOfferService thisBpos in dn.GetAllBpOs()) { if (thisBpos.GetBlockPoolId().Equals(bpid)) { bpos = thisBpos; break; } } Preconditions.CheckArgument(bpos != null, "No such bpid: %s", bpid); BPServiceActor bpsa = null; foreach (BPServiceActor thisBpsa in bpos.GetBPServiceActors()) { if (thisBpsa.GetNNSocketAddress().Equals(nn.GetServiceRpcAddress())) { bpsa = thisBpsa; break; } } Preconditions.CheckArgument(bpsa != null, "No service actor to NN at %s", nn.GetServiceRpcAddress ()); DatanodeProtocolClientSideTranslatorPB origNN = bpsa.GetNameNodeProxy(); DatanodeProtocolClientSideTranslatorPB spy = Org.Mockito.Mockito.Spy(origNN); bpsa.SetNameNode(spy); return(spy); }
public virtual void SetUp() { mockDnConf = Org.Mockito.Mockito.Mock <DNConf>(); Org.Mockito.Mockito.DoReturn(VersionInfo.GetVersion()).When(mockDnConf).GetMinimumNameNodeVersion (); DataNode mockDN = Org.Mockito.Mockito.Mock <DataNode>(); Org.Mockito.Mockito.DoReturn(true).When(mockDN).ShouldRun(); Org.Mockito.Mockito.DoReturn(mockDnConf).When(mockDN).GetDnConf(); BPOfferService mockBPOS = Org.Mockito.Mockito.Mock <BPOfferService>(); Org.Mockito.Mockito.DoReturn(mockDN).When(mockBPOS).GetDataNode(); actor = new BPServiceActor(InvalidAddr, mockBPOS); fakeNsInfo = Org.Mockito.Mockito.Mock <NamespaceInfo>(); // Return a a good software version. Org.Mockito.Mockito.DoReturn(VersionInfo.GetVersion()).When(fakeNsInfo).GetSoftwareVersion (); // Return a good layout version for now. Org.Mockito.Mockito.DoReturn(HdfsConstants.NamenodeLayoutVersion).When(fakeNsInfo ).GetLayoutVersion(); DatanodeProtocolClientSideTranslatorPB fakeDnProt = Org.Mockito.Mockito.Mock <DatanodeProtocolClientSideTranslatorPB >(); Org.Mockito.Mockito.When(fakeDnProt.VersionRequest()).ThenReturn(fakeNsInfo); actor.SetNameNode(fakeDnProt); }
/// <summary> /// Update the BPOS's view of which NN is active, based on a heartbeat /// response from one of the actors. /// </summary> /// <param name="actor">the actor which received the heartbeat</param> /// <param name="nnHaState">the HA-related heartbeat contents</param> internal virtual void UpdateActorStatesFromHeartbeat(BPServiceActor actor, NNHAStatusHeartbeat nnHaState) { WriteLock(); try { long txid = nnHaState.GetTxId(); bool nnClaimsActive = nnHaState.GetState() == HAServiceProtocol.HAServiceState.Active; bool bposThinksActive = bpServiceToActive == actor; bool isMoreRecentClaim = txid > lastActiveClaimTxId; if (nnClaimsActive && !bposThinksActive) { Log.Info("Namenode " + actor + " trying to claim ACTIVE state with " + "txid=" + txid); if (!isMoreRecentClaim) { // Split-brain scenario - an NN is trying to claim active // state when a different NN has already claimed it with a higher // txid. Log.Warn("NN " + actor + " tried to claim ACTIVE state at txid=" + txid + " but there was already a more recent claim at txid=" + lastActiveClaimTxId); return; } else { if (bpServiceToActive == null) { Log.Info("Acknowledging ACTIVE Namenode " + actor); } else { Log.Info("Namenode " + actor + " taking over ACTIVE state from " + bpServiceToActive + " at higher txid=" + txid); } bpServiceToActive = actor; } } else { if (!nnClaimsActive && bposThinksActive) { Log.Info("Namenode " + actor + " relinquishing ACTIVE state with " + "txid=" + nnHaState .GetTxId()); bpServiceToActive = null; } } if (bpServiceToActive == actor) { System.Diagnostics.Debug.Assert(txid >= lastActiveClaimTxId); lastActiveClaimTxId = txid; } } finally { WriteUnlock(); } }
public virtual void StartCluster() { conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(DnCount).Build(); fs = cluster.GetFileSystem(); singletonNn = cluster.GetNameNode(); singletonDn = cluster.GetDataNodes()[0]; bpos = singletonDn.GetAllBpOs()[0]; actor = bpos.GetBPServiceActors()[0]; storageUuid = singletonDn.GetFSDataset().GetVolumes()[0].GetStorageID(); }
/// <summary>Called when an actor shuts down.</summary> /// <remarks> /// Called when an actor shuts down. If this is the last actor /// to shut down, shuts down the whole blockpool in the DN. /// </remarks> internal virtual void ShutdownActor(BPServiceActor actor) { WriteLock(); try { if (bpServiceToActive == actor) { bpServiceToActive = null; } bpServices.Remove(actor); if (bpServices.IsEmpty()) { dn.ShutdownBlockPool(this); } } finally { WriteUnlock(); } }
/// <exception cref="System.IO.IOException"/> internal virtual bool ProcessCommandFromActor(DatanodeCommand cmd, BPServiceActor actor) { System.Diagnostics.Debug.Assert(bpServices.Contains(actor)); if (cmd == null) { return(true); } /* * Datanode Registration can be done asynchronously here. No need to hold * the lock. for more info refer HDFS-5014 */ if (DatanodeProtocol.DnaRegister == cmd.GetAction()) { // namenode requested a registration - at start or if NN lost contact // Just logging the claiming state is OK here instead of checking the // actor state by obtaining the lock Log.Info("DatanodeCommand action : DNA_REGISTER from " + actor.nnAddr + " with " + actor.state + " state"); actor.ReRegister(); return(false); } WriteLock(); try { if (actor == bpServiceToActive) { return(ProcessCommandFromActive(cmd, actor)); } else { return(ProcessCommandFromStandby(cmd, actor)); } } finally { WriteUnlock(); } }
/// <summary> /// This method should handle commands from Standby namenode except /// DNA_REGISTER which should be handled earlier itself. /// </summary> /// <exception cref="System.IO.IOException"/> private bool ProcessCommandFromStandby(DatanodeCommand cmd, BPServiceActor actor) { switch (cmd.GetAction()) { case DatanodeProtocol.DnaAccesskeyupdate: { Log.Info("DatanodeCommand action from standby: DNA_ACCESSKEYUPDATE"); if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.AddKeys(GetBlockPoolId(), ((KeyUpdateCommand)cmd). GetExportedKeys()); } break; } case DatanodeProtocol.DnaTransfer: case DatanodeProtocol.DnaInvalidate: case DatanodeProtocol.DnaShutdown: case DatanodeProtocol.DnaFinalize: case DatanodeProtocol.DnaRecoverblock: case DatanodeProtocol.DnaBalancerbandwidthupdate: case DatanodeProtocol.DnaCache: case DatanodeProtocol.DnaUncache: { Log.Warn("Got a command from standby NN - ignoring command:" + cmd.GetAction()); break; } default: { Log.Warn("Unknown DatanodeCommand action: " + cmd.GetAction()); break; } } return(true); }
/// <exception cref="System.Exception"/> private void TestTriggerBlockReport(bool incremental) { Configuration conf = new HdfsConfiguration(); // Set a really long value for dfs.blockreport.intervalMsec and // dfs.heartbeat.interval, so that incremental block reports and heartbeats // won't be sent during this test unless they're triggered // manually. conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 10800000L); conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, 1080L); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); FileSystem fs = cluster.GetFileSystem(); DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.SpyOnBposToNN(cluster .GetDataNodes()[0], cluster.GetNameNode()); DFSTestUtil.CreateFile(fs, new Path("/abc"), 16, (short)1, 1L); // We should get 1 incremental block report. Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(1)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); // We should not receive any more incremental or incremental block reports, // since the interval we configured is so long. for (int i = 0; i < 3; i++) { Sharpen.Thread.Sleep(10); Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Times(0)).BlockReport(Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport[] >(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Times(1)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); } // Create a fake block deletion notification on the DataNode. // This will be sent with the next incremental block report. ReceivedDeletedBlockInfo rdbi = new ReceivedDeletedBlockInfo(new Block(5678, 512, 1000), ReceivedDeletedBlockInfo.BlockStatus.DeletedBlock, null); DataNode datanode = cluster.GetDataNodes()[0]; BPServiceActor actor = datanode.GetAllBpOs()[0].GetBPServiceActors()[0]; string storageUuid = datanode.GetFSDataset().GetVolumes()[0].GetStorageID(); actor.NotifyNamenodeDeletedBlock(rdbi, storageUuid); // Manually trigger a block report. datanode.TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental(incremental ).Build()); // triggerBlockReport returns before the block report is // actually sent. Wait for it to be sent here. if (incremental) { Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000).Times(2)).BlockReceivedAndDeleted (Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageReceivedDeletedBlocks []>()); } else { Org.Mockito.Mockito.Verify(spy, Org.Mockito.Mockito.Timeout(60000)).BlockReport(Matchers.Any <DatanodeRegistration>(), Matchers.AnyString(), Matchers.Any <StorageBlockReport[] >(), Org.Mockito.Mockito.AnyObject <BlockReportContext>()); } cluster.Shutdown(); }
/// <summary> /// This method should handle all commands from Active namenode except /// DNA_REGISTER which should be handled earlier itself. /// </summary> /// <param name="cmd"/> /// <returns>true if further processing may be required or false otherwise.</returns> /// <exception cref="System.IO.IOException"/> private bool ProcessCommandFromActive(DatanodeCommand cmd, BPServiceActor actor) { BlockCommand bcmd = cmd is BlockCommand ? (BlockCommand)cmd : null; BlockIdCommand blockIdCmd = cmd is BlockIdCommand ? (BlockIdCommand)cmd : null; switch (cmd.GetAction()) { case DatanodeProtocol.DnaTransfer: { // Send a copy of a block to another datanode dn.TransferBlocks(bcmd.GetBlockPoolId(), bcmd.GetBlocks(), bcmd.GetTargets(), bcmd .GetTargetStorageTypes()); dn.metrics.IncrBlocksReplicated(bcmd.GetBlocks().Length); break; } case DatanodeProtocol.DnaInvalidate: { // // Some local block(s) are obsolete and can be // safely garbage-collected. // Block[] toDelete = bcmd.GetBlocks(); try { // using global fsdataset dn.GetFSDataset().Invalidate(bcmd.GetBlockPoolId(), toDelete); } catch (IOException e) { // Exceptions caught here are not expected to be disk-related. throw; } dn.metrics.IncrBlocksRemoved(toDelete.Length); break; } case DatanodeProtocol.DnaCache: { Log.Info("DatanodeCommand action: DNA_CACHE for " + blockIdCmd.GetBlockPoolId() + " of [" + BlockIdArrayToString(blockIdCmd.GetBlockIds()) + "]"); dn.GetFSDataset().Cache(blockIdCmd.GetBlockPoolId(), blockIdCmd.GetBlockIds()); break; } case DatanodeProtocol.DnaUncache: { Log.Info("DatanodeCommand action: DNA_UNCACHE for " + blockIdCmd.GetBlockPoolId() + " of [" + BlockIdArrayToString(blockIdCmd.GetBlockIds()) + "]"); dn.GetFSDataset().Uncache(blockIdCmd.GetBlockPoolId(), blockIdCmd.GetBlockIds()); break; } case DatanodeProtocol.DnaShutdown: { // TODO: DNA_SHUTDOWN appears to be unused - the NN never sends this command // See HDFS-2987. throw new NotSupportedException("Received unimplemented DNA_SHUTDOWN"); } case DatanodeProtocol.DnaFinalize: { string bp = ((FinalizeCommand)cmd).GetBlockPoolId(); Log.Info("Got finalize command for block pool " + bp); System.Diagnostics.Debug.Assert(GetBlockPoolId().Equals(bp), "BP " + GetBlockPoolId () + " received DNA_FINALIZE " + "for other block pool " + bp); dn.FinalizeUpgradeForPool(bp); break; } case DatanodeProtocol.DnaRecoverblock: { string who = "NameNode at " + actor.GetNNSocketAddress(); dn.RecoverBlocks(who, ((BlockRecoveryCommand)cmd).GetRecoveringBlocks()); break; } case DatanodeProtocol.DnaAccesskeyupdate: { Log.Info("DatanodeCommand action: DNA_ACCESSKEYUPDATE"); if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.AddKeys(GetBlockPoolId(), ((KeyUpdateCommand)cmd). GetExportedKeys()); } break; } case DatanodeProtocol.DnaBalancerbandwidthupdate: { Log.Info("DatanodeCommand action: DNA_BALANCERBANDWIDTHUPDATE"); long bandwidth = ((BalancerBandwidthCommand)cmd).GetBalancerBandwidthValue(); if (bandwidth > 0) { DataXceiverServer dxcs = (DataXceiverServer)dn.dataXceiverServer.GetRunnable(); Log.Info("Updating balance throttler bandwidth from " + dxcs.balanceThrottler.GetBandwidth () + " bytes/s " + "to: " + bandwidth + " bytes/s."); dxcs.balanceThrottler.SetBandwidth(bandwidth); } break; } default: { Log.Warn("Unknown DatanodeCommand action: " + cmd.GetAction()); break; } } return(true); }