public List <AllocationCandidate> GetAllocationCandidates(Guid shardId, string type) { //Get all nodes that are contactable var activeNodes = _stateMachine.GetNodes().Where(node => node.IsContactable); ShardAllocationMetadata shard = _stateMachine.GetShard(type, shardId); List <AllocationCandidate> nodes = new List <AllocationCandidate>(); foreach (var activeNode in activeNodes) { //If it is neither stale or insync, allocate the node if (!shard.InsyncAllocations.Contains(activeNode.Id) && !shard.StaleAllocations.Contains(activeNode.Id)) { nodes.Add(new AllocationCandidate() { NodeId = activeNode.Id, Type = shard.Type }); } } return(nodes); }
public async Task <WriteShardDataResponse> WriteShardData(ShardData data, ShardOperationOptions operationType, string operationId, DateTime transactionDate) { ShardWriteOperation operation = new ShardWriteOperation() { Data = data, Id = operationId, Operation = operationType, TransactionDate = transactionDate }; ShardWriteOperation lastOperation = await GetOrPopulateOperationCache(operation.Data.ShardId.Value); //Start at 1 operation.Pos = lastOperation == null ? 1 : lastOperation.Pos + 1; var hash = lastOperation == null ? "" : lastOperation.ShardHash; operation.ShardHash = ObjectUtility.HashStrings(hash, operation.Id); _logger.LogDebug(_nodeStateService.GetNodeLogId() + "writing new operation " + operationId + " with data " + Environment.NewLine + JsonConvert.SerializeObject(data, Formatting.Indented)); //Write the data var writeOperation = await _shardRepository.AddShardWriteOperationAsync(operation); //Add shard operation if (writeOperation) { ApplyOperationToDatastore(operation); var shardMetadata = _stateMachine.GetShard(operation.Data.ShardType, operation.Data.ShardId.Value); //Mark operation as applied await _shardRepository.MarkShardWriteOperationAppliedAsync(operation.Id); //Update the cache UpdateOperationCache(operation.Data.ShardId.Value, operation); ConcurrentBag <Guid> InvalidNodes = new ConcurrentBag <Guid>(); //All allocations except for your own var tasks = shardMetadata.InsyncAllocations.Where(id => id != _nodeStateService.Id).Select(async allocation => { try { var result = await _clusterClient.Send(allocation, new ReplicateShardWriteOperation() { Operation = operation }); if (result.IsSuccessful) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Successfully replicated all " + shardMetadata.Id + "shards."); } else { throw new Exception("Failed to replicate data to shard " + shardMetadata.Id + " to node " + allocation + " for operation " + operation.ToString() + Environment.NewLine + JsonConvert.SerializeObject(operation, Formatting.Indented)); } } catch (TaskCanceledException e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " on node " + allocation + " for operation " + operation.Pos + " as request timed out, marking shard as not insync..."); InvalidNodes.Add(allocation); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " for operation " + operation.Pos + " with error " + e.Message + ", marking shard as not insync..." + Environment.NewLine + e.StackTrace); InvalidNodes.Add(allocation); } }); await Task.WhenAll(tasks); if (InvalidNodes.Count() > 0) { await _clusterClient.Send(new ExecuteCommands() { Commands = new List <BaseCommand>() { new UpdateShardMetadataAllocations() { ShardId = data.ShardId.Value, Type = data.ShardType, StaleAllocationsToAdd = InvalidNodes.ToHashSet(), InsyncAllocationsToRemove = InvalidNodes.ToHashSet() } }, WaitForCommits = true }); _logger.LogInformation(_nodeStateService.GetNodeLogId() + " had stale virtual machines."); } return(new WriteShardDataResponse() { Pos = operation.Pos, ShardHash = operation.ShardHash, IsSuccessful = true }); } else { return(new WriteShardDataResponse() { IsSuccessful = false }); } }
public async Task <AddShardWriteOperationResponse> AddShardWriteOperationHandler(AddShardWriteOperation request) { var startDate = DateTime.Now; var checkpoint = 1; var totalOperation = _writeCache.OperationQueue.Count(); _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Received write request for object " + request.Data.Id + " for request " + request.Data.ShardId); AddShardWriteOperationResponse finalResult = new AddShardWriteOperationResponse(); //Check if index exists, if not - create one if (!_stateMachine.IndexExists(request.Data.ShardType)) { if (IndexCreationQueue.Where(icq => icq == request.Data.ShardType).Count() > 0) { while (!_stateMachine.IndexExists(request.Data.ShardType)) { if ((DateTime.Now - startDate).Milliseconds > _clusterOptions.DataTransferTimeoutMs) { throw new IndexCreationFailedException("Index creation for shard " + request.Data.ShardType + " is already queued."); } await Task.Delay(10); } } else { await _clusterClient.Send(_nodeStateService.CurrentLeader.Value, new RequestCreateIndex() { Type = request.Data.ShardType }); DateTime startIndexCreation = DateTime.Now; while (!_stateMachine.IndexExists(request.Data.ShardType)) { if ((DateTime.Now - startIndexCreation).Milliseconds > _clusterOptions.DataTransferTimeoutMs) { throw new IndexCreationFailedException("Index creation for shard " + request.Data.ShardType + " timed out."); } await Task.Delay(100); } } } if (_nodeOptions.EnablePerformanceLogging) { PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "index allocation"); } ShardAllocationMetadata shardMetadata; if (request.Data.ShardId == null) { var allocations = _stateMachine.GetShards(request.Data.ShardType); Random rand = new Random(); var selectedNodeIndex = rand.Next(0, allocations.Length); request.Data.ShardId = allocations[selectedNodeIndex].Id; shardMetadata = allocations[selectedNodeIndex]; } else { shardMetadata = _stateMachine.GetShard(request.Data.ShardType, request.Data.ShardId.Value); } if (_nodeOptions.EnablePerformanceLogging) { PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "shard Allocation"); } //If the shard is assigned to you if (shardMetadata.PrimaryAllocation == _nodeStateService.Id) { var operationId = Guid.NewGuid().ToString(); finalResult.OperationId = operationId; await _writeCache.EnqueueOperationAsync(new ShardWriteOperation() { Data = request.Data, Id = operationId, Operation = request.Operation, TransactionDate = DateTime.Now }); finalResult.IsSuccessful = true; if (_nodeOptions.EnablePerformanceLogging) { PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "enqueue"); } ShardWriteOperation transaction; while (!_writeCache.IsOperationComplete(operationId)) { if ((DateTime.Now - startDate).Milliseconds > _clusterOptions.DataTransferTimeoutMs) { throw new IndexCreationFailedException("Queue clearance for transaction " + operationId + request.Data.ShardType + " timed out."); } await Task.Delay(totalOperation); } // printCheckPoint(ref startDate, ref checkpoint, "wait for completion"); } else { try { return(await _clusterClient.Send(shardMetadata.PrimaryAllocation, request)); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to write " + request.Operation.ToString() + " request to primary node " + _stateMachine.CurrentState.Nodes[shardMetadata.PrimaryAllocation].TransportAddress + " for object " + request.Data.Id + " shard " + request.Data.ShardId + "|" + e.StackTrace); throw e; } } if (request.RemoveLock) { var result = await _clusterClient.Send(new ExecuteCommands() { Commands = new List <BaseCommand>() { new RemoveLock() { Name = request.Data.GetLockName(), LockId = request.LockId } }, WaitForCommits = true }); if (result.IsSuccessful) { finalResult.LockRemoved = true; } } Interlocked.Increment(ref totalRequests); if (_nodeOptions.EnablePerformanceLogging) { PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "removeLock"); } return(finalResult); }
public async Task <bool> SyncShard(Guid shardId, string type) { var lastOperation = await _shardRepository.GetShardWriteOperationAsync(shardId, _shardRepository.GetTotalShardWriteOperationsCount(shardId)); int lastOperationPos = lastOperation == null ? 0 : lastOperation.Pos; var shardMetadata = _stateMachine.GetShard(type, shardId); var result = await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations() { ShardId = shardId, From = lastOperationPos, To = lastOperationPos + 1 }); if (result.IsSuccessful) { var totalPositions = _shardRepository.GetTotalShardWriteOperationsCount(shardId); //If the primary has less operations if (result.LatestPosition < lastOperationPos) { while (totalPositions != result.LatestPosition) { _writer.ReverseLocalTransaction(shardId, type, totalPositions); totalPositions--; } } else { //Check whether the hash is equal, if not equal roll back each transaction var currentPosition = lastOperationPos; ShardWriteOperation currentOperation = null; if (lastOperationPos != 0 && !(result.Operations[lastOperationPos].ShardHash == lastOperation.ShardHash)) { //While the shard position does not match while (!((await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations() { ShardId = shardId, From = currentPosition, To = currentPosition })).Operations[currentPosition].ShardHash != (currentOperation = await _shardRepository.GetShardWriteOperationAsync(shardId, currentPosition)).ShardHash)) { _logger.LogInformation("Reverting transaction " + currentOperation.Pos + " on shard " + shardId); _writer.ReverseLocalTransaction(shardId, type, currentOperation.Pos); currentPosition--; } } result = await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations() { ShardId = shardId, From = currentPosition + 1, To = currentPosition + 50 }); var totalShards = (_shardRepository.GetTotalShardWriteOperationsCount(shardId)); //If you have more operations if (result.LatestPosition < totalShards) { _logger.LogWarning("Detected more nodes locally then primary, revering to latest position"); while (totalShards != result.LatestPosition) { _writer.ReverseLocalTransaction(shardId, type, totalShards); totalShards--; } } while (result.LatestPosition != (_shardRepository.GetTotalShardWriteOperationsCount(shardId))) { foreach (var operation in result.Operations) { _logger.LogDebug(_nodeStateService.Id + "Replicated operation " + operation.Key + " for shard " + shardId); await _writer.ReplicateShardWriteOperationAsync(operation.Value); } currentPosition = result.Operations.Last().Key; result = await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations() { ShardId = shardId, From = currentPosition + 1, To = currentPosition + 50 }); } } _logger.LogInformation("Successfully recovered data on shard " + shardId); return(true); } else { throw new Exception("Failed to fetch shard from primary."); } }