Beispiel #1
0
        public List <AllocationCandidate> GetAllocationCandidates(Guid shardId, string type)
        {
            //Get all nodes that are contactable
            var activeNodes = _stateMachine.GetNodes().Where(node => node.IsContactable);

            ShardAllocationMetadata    shard = _stateMachine.GetShard(type, shardId);
            List <AllocationCandidate> nodes = new List <AllocationCandidate>();

            foreach (var activeNode in activeNodes)
            {
                //If it is neither stale or insync, allocate the node
                if (!shard.InsyncAllocations.Contains(activeNode.Id) && !shard.StaleAllocations.Contains(activeNode.Id))
                {
                    nodes.Add(new AllocationCandidate()
                    {
                        NodeId = activeNode.Id,
                        Type   = shard.Type
                    });
                }
            }
            return(nodes);
        }
Beispiel #2
0
        public async Task <WriteShardDataResponse> WriteShardData(ShardData data, ShardOperationOptions operationType, string operationId, DateTime transactionDate)
        {
            ShardWriteOperation operation = new ShardWriteOperation()
            {
                Data            = data,
                Id              = operationId,
                Operation       = operationType,
                TransactionDate = transactionDate
            };

            ShardWriteOperation lastOperation = await GetOrPopulateOperationCache(operation.Data.ShardId.Value);

            //Start at 1
            operation.Pos = lastOperation == null ? 1 : lastOperation.Pos + 1;
            var hash = lastOperation == null ? "" : lastOperation.ShardHash;

            operation.ShardHash = ObjectUtility.HashStrings(hash, operation.Id);
            _logger.LogDebug(_nodeStateService.GetNodeLogId() + "writing new operation " + operationId + " with data " + Environment.NewLine + JsonConvert.SerializeObject(data, Formatting.Indented));
            //Write the data

            var writeOperation = await _shardRepository.AddShardWriteOperationAsync(operation); //Add shard operation

            if (writeOperation)
            {
                ApplyOperationToDatastore(operation);
                var shardMetadata = _stateMachine.GetShard(operation.Data.ShardType, operation.Data.ShardId.Value);
                //Mark operation as applied
                await _shardRepository.MarkShardWriteOperationAppliedAsync(operation.Id);

                //Update the cache
                UpdateOperationCache(operation.Data.ShardId.Value, operation);
                ConcurrentBag <Guid> InvalidNodes = new ConcurrentBag <Guid>();
                //All allocations except for your own
                var tasks = shardMetadata.InsyncAllocations.Where(id => id != _nodeStateService.Id).Select(async allocation =>
                {
                    try
                    {
                        var result = await _clusterClient.Send(allocation, new ReplicateShardWriteOperation()
                        {
                            Operation = operation
                        });

                        if (result.IsSuccessful)
                        {
                            _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Successfully replicated all " + shardMetadata.Id + "shards.");
                        }
                        else
                        {
                            throw new Exception("Failed to replicate data to shard " + shardMetadata.Id + " to node " + allocation + " for operation " + operation.ToString() + Environment.NewLine + JsonConvert.SerializeObject(operation, Formatting.Indented));
                        }
                    }
                    catch (TaskCanceledException e)
                    {
                        _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " on node " + allocation + " for operation " + operation.Pos + " as request timed out, marking shard as not insync...");
                        InvalidNodes.Add(allocation);
                    }
                    catch (Exception e)
                    {
                        _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " for operation " + operation.Pos + " with error " + e.Message + ", marking shard as not insync..." + Environment.NewLine + e.StackTrace);
                        InvalidNodes.Add(allocation);
                    }
                });

                await Task.WhenAll(tasks);

                if (InvalidNodes.Count() > 0)
                {
                    await _clusterClient.Send(new ExecuteCommands()
                    {
                        Commands = new List <BaseCommand>()
                        {
                            new UpdateShardMetadataAllocations()
                            {
                                ShardId = data.ShardId.Value,
                                Type    = data.ShardType,
                                StaleAllocationsToAdd     = InvalidNodes.ToHashSet(),
                                InsyncAllocationsToRemove = InvalidNodes.ToHashSet()
                            }
                        },
                        WaitForCommits = true
                    });

                    _logger.LogInformation(_nodeStateService.GetNodeLogId() + " had stale virtual machines.");
                }

                return(new WriteShardDataResponse()
                {
                    Pos = operation.Pos,
                    ShardHash = operation.ShardHash,
                    IsSuccessful = true
                });
            }
            else
            {
                return(new WriteShardDataResponse()
                {
                    IsSuccessful = false
                });
            }
        }
Beispiel #3
0
        public async Task <AddShardWriteOperationResponse> AddShardWriteOperationHandler(AddShardWriteOperation request)
        {
            var startDate      = DateTime.Now;
            var checkpoint     = 1;
            var totalOperation = _writeCache.OperationQueue.Count();

            _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Received write request for object " + request.Data.Id + " for request " + request.Data.ShardId);

            AddShardWriteOperationResponse finalResult = new AddShardWriteOperationResponse();

            //Check if index exists, if not - create one
            if (!_stateMachine.IndexExists(request.Data.ShardType))
            {
                if (IndexCreationQueue.Where(icq => icq == request.Data.ShardType).Count() > 0)
                {
                    while (!_stateMachine.IndexExists(request.Data.ShardType))
                    {
                        if ((DateTime.Now - startDate).Milliseconds > _clusterOptions.DataTransferTimeoutMs)
                        {
                            throw new IndexCreationFailedException("Index creation for shard " + request.Data.ShardType + " is already queued.");
                        }
                        await Task.Delay(10);
                    }
                }
                else
                {
                    await _clusterClient.Send(_nodeStateService.CurrentLeader.Value, new RequestCreateIndex()
                    {
                        Type = request.Data.ShardType
                    });

                    DateTime startIndexCreation = DateTime.Now;
                    while (!_stateMachine.IndexExists(request.Data.ShardType))
                    {
                        if ((DateTime.Now - startIndexCreation).Milliseconds > _clusterOptions.DataTransferTimeoutMs)
                        {
                            throw new IndexCreationFailedException("Index creation for shard " + request.Data.ShardType + " timed out.");
                        }
                        await Task.Delay(100);
                    }
                }
            }

            if (_nodeOptions.EnablePerformanceLogging)
            {
                PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "index allocation");
            }

            ShardAllocationMetadata shardMetadata;

            if (request.Data.ShardId == null)
            {
                var    allocations       = _stateMachine.GetShards(request.Data.ShardType);
                Random rand              = new Random();
                var    selectedNodeIndex = rand.Next(0, allocations.Length);
                request.Data.ShardId = allocations[selectedNodeIndex].Id;
                shardMetadata        = allocations[selectedNodeIndex];
            }
            else
            {
                shardMetadata = _stateMachine.GetShard(request.Data.ShardType, request.Data.ShardId.Value);
            }

            if (_nodeOptions.EnablePerformanceLogging)
            {
                PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "shard Allocation");
            }

            //If the shard is assigned to you
            if (shardMetadata.PrimaryAllocation == _nodeStateService.Id)
            {
                var operationId = Guid.NewGuid().ToString();
                finalResult.OperationId = operationId;
                await _writeCache.EnqueueOperationAsync(new ShardWriteOperation()
                {
                    Data            = request.Data,
                    Id              = operationId,
                    Operation       = request.Operation,
                    TransactionDate = DateTime.Now
                });

                finalResult.IsSuccessful = true;

                if (_nodeOptions.EnablePerformanceLogging)
                {
                    PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "enqueue");
                }

                ShardWriteOperation transaction;

                while (!_writeCache.IsOperationComplete(operationId))
                {
                    if ((DateTime.Now - startDate).Milliseconds > _clusterOptions.DataTransferTimeoutMs)
                    {
                        throw new IndexCreationFailedException("Queue clearance for transaction " + operationId + request.Data.ShardType + " timed out.");
                    }
                    await Task.Delay(totalOperation);
                }
                // printCheckPoint(ref startDate, ref checkpoint, "wait for completion");
            }
            else
            {
                try
                {
                    return(await _clusterClient.Send(shardMetadata.PrimaryAllocation, request));
                }
                catch (Exception e)
                {
                    _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to write " + request.Operation.ToString() + " request to primary node " + _stateMachine.CurrentState.Nodes[shardMetadata.PrimaryAllocation].TransportAddress + " for object " + request.Data.Id + " shard " + request.Data.ShardId + "|" + e.StackTrace);
                    throw e;
                }
            }

            if (request.RemoveLock)
            {
                var result = await _clusterClient.Send(new ExecuteCommands()
                {
                    Commands = new List <BaseCommand>()
                    {
                        new RemoveLock()
                        {
                            Name   = request.Data.GetLockName(),
                            LockId = request.LockId
                        }
                    },
                    WaitForCommits = true
                });

                if (result.IsSuccessful)
                {
                    finalResult.LockRemoved = true;
                }
            }

            Interlocked.Increment(ref totalRequests);

            if (_nodeOptions.EnablePerformanceLogging)
            {
                PerformanceMetricUtility.PrintCheckPoint(ref totalLock, ref totals, ref startDate, ref checkpoint, "removeLock");
            }

            return(finalResult);
        }
Beispiel #4
0
        public async Task <bool> SyncShard(Guid shardId, string type)
        {
            var lastOperation = await _shardRepository.GetShardWriteOperationAsync(shardId, _shardRepository.GetTotalShardWriteOperationsCount(shardId));

            int lastOperationPos = lastOperation == null ? 0 : lastOperation.Pos;

            var shardMetadata = _stateMachine.GetShard(type, shardId);

            var result = await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations()
            {
                ShardId = shardId,
                From    = lastOperationPos,
                To      = lastOperationPos + 1
            });

            if (result.IsSuccessful)
            {
                var totalPositions = _shardRepository.GetTotalShardWriteOperationsCount(shardId);
                //If the primary has less operations
                if (result.LatestPosition < lastOperationPos)
                {
                    while (totalPositions != result.LatestPosition)
                    {
                        _writer.ReverseLocalTransaction(shardId, type, totalPositions);
                        totalPositions--;
                    }
                }
                else
                {
                    //Check whether the hash is equal, if not equal roll back each transaction
                    var currentPosition = lastOperationPos;
                    ShardWriteOperation currentOperation = null;
                    if (lastOperationPos != 0 && !(result.Operations[lastOperationPos].ShardHash == lastOperation.ShardHash))
                    {
                        //While the shard position does not match
                        while (!((await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations()
                        {
                            ShardId = shardId,
                            From = currentPosition,
                            To = currentPosition
                        })).Operations[currentPosition].ShardHash != (currentOperation = await _shardRepository.GetShardWriteOperationAsync(shardId, currentPosition)).ShardHash))
                        {
                            _logger.LogInformation("Reverting transaction " + currentOperation.Pos + " on shard " + shardId);
                            _writer.ReverseLocalTransaction(shardId, type, currentOperation.Pos);
                            currentPosition--;
                        }
                    }

                    result = await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations()
                    {
                        ShardId = shardId,
                        From    = currentPosition + 1,
                        To      = currentPosition + 50
                    });

                    var totalShards = (_shardRepository.GetTotalShardWriteOperationsCount(shardId));
                    //If you have more operations
                    if (result.LatestPosition < totalShards)
                    {
                        _logger.LogWarning("Detected more nodes locally then primary, revering to latest position");
                        while (totalShards != result.LatestPosition)
                        {
                            _writer.ReverseLocalTransaction(shardId, type, totalShards);
                            totalShards--;
                        }
                    }

                    while (result.LatestPosition != (_shardRepository.GetTotalShardWriteOperationsCount(shardId)))
                    {
                        foreach (var operation in result.Operations)
                        {
                            _logger.LogDebug(_nodeStateService.Id + "Replicated operation " + operation.Key + " for shard " + shardId);
                            await _writer.ReplicateShardWriteOperationAsync(operation.Value);
                        }
                        currentPosition = result.Operations.Last().Key;
                        result          = await _clusterClient.Send(shardMetadata.PrimaryAllocation, new RequestShardWriteOperations()
                        {
                            ShardId = shardId,
                            From    = currentPosition + 1,
                            To      = currentPosition + 50
                        });
                    }
                }

                _logger.LogInformation("Successfully recovered data on shard " + shardId);
                return(true);
            }
            else
            {
                throw new Exception("Failed to fetch shard from primary.");
            }
        }