private async Task <BoolResult> UpdateClusterStateCoreAsync(OperationContext context, ClusterState clusterState, MachineState machineState) { (var inactiveMachineIdSet, var closedMachineIdSet, var getUnknownMachinesResult) = await _clusterStateKey.UseNonConcurrentReplicatedHashAsync( context, _configuration.RetryWindow, RedisOperation.UpdateClusterState, async (batch, key) => { var heartbeatResultTask = CallHeartbeatAsync(context, clusterState, batch, key, machineState); var getUnknownMachinesTask = batch.GetUnknownMachinesAsync( key, clusterState.MaxMachineId); await Task.WhenAll(heartbeatResultTask, getUnknownMachinesTask); var heartbeatResult = await heartbeatResultTask; var getUnknownMachinesResult = await getUnknownMachinesTask; return(heartbeatResult.inactiveMachineIdSet, heartbeatResult.closedMachineIdSet, getUnknownMachinesResult); }, timeout : _configuration.ClusterRedisOperationTimeout).ThrowIfFailureAsync(); Contract.Assert(inactiveMachineIdSet != null, "inactiveMachineIdSet != null"); Contract.Assert(closedMachineIdSet != null, "closedMachineIdSet != null"); if (getUnknownMachinesResult.maxMachineId != clusterState.MaxMachineId) { Tracer.Debug(context, $"Retrieved unknown machines from ({clusterState.MaxMachineId}, {getUnknownMachinesResult.maxMachineId}]"); foreach (var item in getUnknownMachinesResult.unknownMachines) { context.LogMachineMapping(Tracer, item.Key, item.Value); } } clusterState.AddUnknownMachines(getUnknownMachinesResult.maxMachineId, getUnknownMachinesResult.unknownMachines); clusterState.SetMachineStates(inactiveMachineIdSet, closedMachineIdSet).ThrowIfFailure(); Tracer.Debug(context, $"Inactive machines: Count={inactiveMachineIdSet.Count}, [{string.Join(", ", inactiveMachineIdSet)}]"); Tracer.TrackMetric(context, "InactiveMachineCount", inactiveMachineIdSet.Count); foreach (var machineMapping in clusterState.LocalMachineMappings) { if (!clusterState.TryResolveMachineId(machineMapping.Location, out var machineId)) { return(new BoolResult($"Invalid redis cluster state on machine {machineMapping}. (Missing location {machineMapping.Location})")); } else if (machineId != machineMapping.Id) { Tracer.Warning(context, $"Machine id mismatch for location {machineMapping.Location}. Registered id: {machineMapping.Id}. Cluster state id: {machineId}. Updating registered id with cluster state id."); machineMapping.Id = machineId; } if (getUnknownMachinesResult.maxMachineId < machineMapping.Id.Index) { return(new BoolResult($"Invalid redis cluster state on machine {machineMapping} (redis max machine id={getUnknownMachinesResult.maxMachineId})")); } } return(BoolResult.Success); }
private async Task <BoolResult> UpdateLocalClusterStateAsync(OperationContext context, ClusterState clusterState) { (var heartbeatResult, var getUnknownMachinesResult) = await _clusterStateKey.UseReplicatedHashAsync(context, _configuration.RetryWindow, RedisOperation.UpdateClusterState, async (batch, key) => { var heartbeatResultTask = CallHeartbeatAsync(context, batch, key, MachineState.Active); var getUnknownMachinesTask = batch.GetUnknownMachinesAsync( key, clusterState.MaxMachineId); await Task.WhenAll(heartbeatResultTask, getUnknownMachinesTask); var heartbeatResult = await heartbeatResultTask; var getUnknownMachinesResult = await getUnknownMachinesTask; return(heartbeatResult, getUnknownMachinesResult); }).ThrowIfFailureAsync(); if (getUnknownMachinesResult.maxMachineId < LocalMachineId.Index) { return(new BoolResult($"Invalid redis cluster state on machine {LocalMachineId} (redis max machine id={getUnknownMachinesResult.maxMachineId})")); } if (heartbeatResult.priorState == MachineState.Unavailable || heartbeatResult.priorState == MachineState.Expired) { clusterState.LastInactiveTime = _clock.UtcNow; } if (getUnknownMachinesResult.maxMachineId != clusterState.MaxMachineId) { Tracer.Debug(context, $"Retrieved unknown machines from ({clusterState.MaxMachineId}, {getUnknownMachinesResult.maxMachineId}]"); foreach (var item in getUnknownMachinesResult.unknownMachines) { context.LogMachineMapping(Tracer, item.Key, item.Value); } } clusterState.AddUnknownMachines(getUnknownMachinesResult.maxMachineId, getUnknownMachinesResult.unknownMachines); clusterState.SetInactiveMachines(heartbeatResult.inactiveMachineIdSet); Tracer.Debug(context, $"Inactive machines: Count={heartbeatResult.inactiveMachineIdSet.Count}, [{string.Join(", ", heartbeatResult.inactiveMachineIdSet)}]"); Tracer.TrackMetric(context, "InactiveMachineCount", heartbeatResult.inactiveMachineIdSet.Count); return(BoolResult.Success); }
/// <inheritdoc /> protected override void UpdateClusterStateCore(OperationContext context, ClusterState clusterState, bool write) { _keyValueStore.Use( store => { int maxMachineId = ClusterState.InvalidMachineId; if (!store.TryGetValue(nameof(ClusterStateKeys.MaxMachineId), out var maxMachinesString, nameof(Columns.ClusterState)) || !int.TryParse(maxMachinesString, out maxMachineId)) { Tracer.OperationDebug(context, $"Unable to load cluster state from db. MaxMachineId='{maxMachinesString}'"); if (!write) { // No machine state in db. Return if we are not updating the db. return; } } void logSynchronize() { Tracer.OperationDebug(context, $"Synchronizing cluster state: MaxMachineId={clusterState.MaxMachineId}, Database.MaxMachineId={maxMachineId}]"); } if (clusterState.MaxMachineId > maxMachineId && write) { logSynchronize(); // Update db with values from cluster state for (int machineIndex = maxMachineId + 1; machineIndex <= clusterState.MaxMachineId; machineIndex++) { if (clusterState.TryResolve(new MachineId(machineIndex), out var machineLocation)) { Tracer.OperationDebug(context, $"Storing machine mapping ({machineIndex}={machineLocation})"); store.Put(machineIndex.ToString(), machineLocation.Path, nameof(Columns.ClusterState)); } else { throw Contract.AssertFailure($"Unabled to resolve machine location for machine id={machineIndex}"); } } store.Put(nameof(ClusterStateKeys.MaxMachineId), clusterState.MaxMachineId.ToString(), nameof(Columns.ClusterState)); } else if (maxMachineId > clusterState.MaxMachineId) { logSynchronize(); // Update cluster state with values from db var unknownMachines = new Dictionary <MachineId, MachineLocation>(); for (int machineIndex = clusterState.MaxMachineId + 1; machineIndex <= maxMachineId; machineIndex++) { if (store.TryGetValue(machineIndex.ToString(), out var machineLocationData, nameof(Columns.ClusterState))) { var machineId = new MachineId(machineIndex); var machineLocation = new MachineLocation(machineLocationData); context.LogMachineMapping(Tracer, machineId, machineLocation); unknownMachines[machineId] = machineLocation; } else { throw Contract.AssertFailure($"Unabled to find machine location for machine id={machineIndex}"); } } clusterState.AddUnknownMachines(maxMachineId, unknownMachines); }
private Task <Result <HashEntry[]> > UpdateLocalClusterStateAsync(OperationContext context, ClusterState clusterState, RedisDatabaseAdapter redisDb) { return(redisDb.ExecuteBatchAsync(context, async batch => { var heartbeatResultTask = CallHeartbeatAsync(context, batch, MachineState.Active); var getUnknownMachinesTask = batch.GetUnknownMachinesAsync( _clusterStateKey, clusterState.MaxMachineId); // Only master should mirror cluster state bool shouldMirrorClusterState = _role == Role.Master && HasSecondary && _configuration.MirrorClusterState // Only mirror after a long interval, but not long enough to allow machines to appear expired && !_lastClusterStateMirrorTime.IsRecent(_clock.UtcNow, _configuration.ClusterStateMirrorInterval) // Only mirror from primary to secondary, so no need to dump cluster state if this is the secondary && IsPrimary(redisDb); Task <HashEntry[]> dumpClusterStateBlobTask = shouldMirrorClusterState ? batch.AddOperation(_clusterStateKey, b => b.HashGetAllAsync(_clusterStateKey)) : _emptyClusterStateDump; await Task.WhenAll(heartbeatResultTask, getUnknownMachinesTask, dumpClusterStateBlobTask); var clusterStateBlob = await dumpClusterStateBlobTask ?? CollectionUtilities.EmptyArray <HashEntry>(); var heartbeatResult = await heartbeatResultTask; var getUnknownMachinesResult = await getUnknownMachinesTask; if (shouldMirrorClusterState) { _lastClusterStateMirrorTime = _clock.UtcNow; } if (getUnknownMachinesResult.maxMachineId < LocalMachineId.Index) { return Result.FromErrorMessage <HashEntry[]>($"Invalid {GetDbName(redisDb)} redis cluster state on machine {LocalMachineId} (max machine id={getUnknownMachinesResult.maxMachineId})"); } if (heartbeatResult.priorState == MachineState.Unavailable || heartbeatResult.priorState == MachineState.Expired) { clusterState.LastInactiveTime = _clock.UtcNow; } if (getUnknownMachinesResult.maxMachineId != clusterState.MaxMachineId) { Tracer.Debug(context, $"Retrieved unknown machines from ({clusterState.MaxMachineId}, {getUnknownMachinesResult.maxMachineId}]"); foreach (var item in getUnknownMachinesResult.unknownMachines) { context.LogMachineMapping(Tracer, item.Key, item.Value); } } clusterState.AddUnknownMachines(getUnknownMachinesResult.maxMachineId, getUnknownMachinesResult.unknownMachines); clusterState.SetInactiveMachines(heartbeatResult.inactiveMachineIdSet); Tracer.Debug(context, $"Inactive machines: Count={heartbeatResult.inactiveMachineIdSet.Count}, [{string.Join(", ", heartbeatResult.inactiveMachineIdSet)}]"); Tracer.TrackMetric(context, "InactiveMachineCount", heartbeatResult.inactiveMachineIdSet.Count); return Result.Success(await dumpClusterStateBlobTask ?? CollectionUtilities.EmptyArray <HashEntry>()); }, RedisOperation.UpdateClusterState)); }
private async Task <Result <MachineState> > UpdateClusterStateCoreAsync( OperationContext context, ClusterState clusterState, MachineState machineState) { var heartbeatResponse = await CallHeartbeatAsync(context, clusterState, machineState); var updates = await _storage.GetClusterUpdatesAsync(context, new GetClusterUpdatesRequest() { MaxMachineId = clusterState.MaxMachineId }).ThrowIfFailureAsync(); BitMachineIdSet inactiveMachineIdSet = heartbeatResponse.InactiveMachines; BitMachineIdSet closedMachineIdSet = heartbeatResponse.ClosedMachines; Contract.Assert(inactiveMachineIdSet != null, "inactiveMachineIdSet != null"); Contract.Assert(closedMachineIdSet != null, "closedMachineIdSet != null"); if (updates.MaxMachineId != clusterState.MaxMachineId) { Tracer.Debug(context, $"Retrieved unknown machines from ({clusterState.MaxMachineId}, {updates.MaxMachineId}]"); if (updates.UnknownMachines != null) { foreach (var item in updates.UnknownMachines) { context.LogMachineMapping(Tracer, item.Key, item.Value); } } } if (updates.UnknownMachines != null) { clusterState.AddUnknownMachines(updates.MaxMachineId, updates.UnknownMachines); } clusterState.SetMachineStates(inactiveMachineIdSet, closedMachineIdSet).ThrowIfFailure(); Tracer.Debug(context, $"Inactive machines: Count={inactiveMachineIdSet.Count}, [{string.Join(", ", inactiveMachineIdSet)}]"); Tracer.TrackMetric(context, "InactiveMachineCount", inactiveMachineIdSet.Count); if (!_configuration.DistributedContentConsumerOnly) { foreach (var machineMapping in clusterState.LocalMachineMappings) { if (!clusterState.TryResolveMachineId(machineMapping.Location, out var machineId)) { return(Result.FromErrorMessage <MachineState>($"Invalid cluster state on machine {machineMapping}. (Missing location {machineMapping.Location})")); } else if (machineId != machineMapping.Id) { Tracer.Warning(context, $"Machine id mismatch for location {machineMapping.Location}. Registered id: {machineMapping.Id}. Cluster state id: {machineId}. Updating registered id with cluster state id."); machineMapping.Id = machineId; } if (updates.MaxMachineId < machineMapping.Id.Index) { return(Result.FromErrorMessage <MachineState>($"Invalid cluster state on machine {machineMapping} (max machine id={updates.MaxMachineId})")); } } } return(heartbeatResponse.PriorState); }