public async Task CheckAllReplicas() { while (true) { try { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Checking all replicas"); //Get all nodes you are the primary for foreach (var shard in _stateMachine.GetAllPrimaryShards(_nodeStateService.Id)) { //Get the shard positions var shardPosition = _shardRepository.GetTotalShardWriteOperationsCount(shard.Id); //Wait 2 times the latency tolerance await Task.Delay(_clusterOptions.LatencyToleranceMs * 5); ConcurrentBag <Guid> staleNodes = new ConcurrentBag <Guid>(); var tasks = shard.InsyncAllocations.Where(ia => ia != _nodeStateService.Id).Select(async ia => { var shardOperation = await _clusterClient.Send(ia, new RequestShardWriteOperations() { From = 0, To = 0, ShardId = shard.Id, Type = shard.Type }); //If the operations are lagging or it is infront of the latest count (Old transactions) if (shardOperation.LatestPosition < shardPosition || shardOperation.LatestPosition > shardPosition) { staleNodes.Add(ia); } }); await Task.WhenAll(tasks); if (staleNodes.Count > 0) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + " primary detected stale nodes"); await _clusterClient.Send(new ExecuteCommands() { Commands = new List <BaseCommand>() { new UpdateShardMetadataAllocations() { InsyncAllocationsToRemove = staleNodes.ToHashSet <Guid>(), StaleAllocationsToAdd = staleNodes.ToHashSet <Guid>(), ShardId = shard.Id, Type = shard.Type } }, WaitForCommits = true }); } } } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to check all replicas with exception " + e.Message + Environment.NewLine + e.StackTrace); } await Task.Delay(1000); } }
public async Task <ShardData> GetData(Guid objectId, string type, int timeoutMs, Guid?shardId = null) { Guid?FoundShard = null; Guid?FoundOnNode = null; var currentTime = DateTime.Now; if (shardId == null) { var shards = _stateMachine.GetAllPrimaryShards(type); bool foundResult = false; ShardData finalObject = null; var totalRespondedShards = 0; var tasks = shards.Select(async shard => { if (shard.Value != _nodeStateService.Id) { try { var result = await _clusterClient.Send(shard.Value, new RequestDataShard() { ObjectId = objectId, ShardId = shard.Key, //Set the shard Type = type }); if (result.IsSuccessful) { foundResult = true; finalObject = result.Data; FoundShard = result.ShardId; FoundOnNode = result.NodeId; } Interlocked.Increment(ref totalRespondedShards); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Error thrown while getting " + e.Message); } } else { finalObject = await _dataRouter.GetDataAsync(type, objectId); foundResult = finalObject != null ? true : false; FoundShard = shard.Key; FoundShard = shard.Value; Interlocked.Increment(ref totalRespondedShards); } }); //Don't await, this will trigger the tasks Task.WhenAll(tasks); while (!foundResult && totalRespondedShards < shards.Count) { if ((DateTime.Now - currentTime).TotalMilliseconds > timeoutMs) { throw new ClusterOperationTimeoutException("Get data request for object " + objectId + " from shard " + shardId + " timed out."); } await Task.Delay(10); } return(finalObject); } else { return(await _dataRouter.GetDataAsync(type, objectId)); } }