private static void ThrowIfDataLossModeInvalid(DataLossMode dataLossMode)
 {
     if (dataLossMode == DataLossMode.Invalid)
     {
         throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedDataLossMode);
     }
 }
        public override void ClearInfo()
        {
            PartitionSelector ps  = this.Info.PartitionSelector;
            DataLossMode      dlm = this.Info.DataLossMode;

            this.Info = new InvokeDataLossInfo(ps, dlm);
        }
        public InvokeDataLossInfo(PartitionSelector partitionSelector, DataLossMode dataLossMode)
        {
            this.PartitionSelector = partitionSelector;
            this.DataLossMode      = dataLossMode;

            // This is a default value and will be overwritten when the command executes.  The default value is not used during the command.
            this.NodeName = "UNKNOWNNODE";
            this.UnreliableTransportInfo = new List <Tuple <string, string> >();
        }
 public InvokeDataLossDescription(
     Guid operationId,
     PartitionSelector partitionSelector,
     DataLossMode dataLossMode)
 {
     Requires.Argument <Guid>("operationId", operationId).NotNull();
     this.OperationId       = operationId;
     this.PartitionSelector = partitionSelector;
     this.DataLossMode      = dataLossMode;
 }
        public InvokeDataLossAction(
            IReliableStateManager stateManager,
            IStatefulServicePartition partition,
            InvokeDataLossState state,
            PartitionSelector partitionSelector,
            DataLossMode dataLossMode,
            int dataLossCheckWaitDurationInSeconds,
            int dataLossCheckPollIntervalInSeconds,
            int replicaDropWaitDurationInSeconds,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout)
            : base(stateManager, partition, state, requestTimeout, operationTimeout)
        {
            ThrowIf.Null(partitionSelector, "partitionSelector");

            this.PartitionSelector = partitionSelector;
            this.DataLossMode      = dataLossMode;
            this.DataLossCheckWaitDurationInSeconds = dataLossCheckWaitDurationInSeconds;
            this.DataLossCheckPollIntervalInSeconds = dataLossCheckPollIntervalInSeconds;
            this.ReplicaDropWaitDurationInSeconds   = replicaDropWaitDurationInSeconds;
        }
示例#6
0
 /// <summary>
 /// Decorates EventDocument-derived classes, providing event routing info: Namespace and Queue atoms
 /// </summary>
 public EventAttribute(string ns, string queue, DataLossMode mode)
 {
   Namespace = Atom.Encode(ns.NonBlank(nameof(ns)));
   Queue = Atom.Encode(queue.NonBlank(nameof(queue)));
   LossMode = mode;
 }
 public InvokeDataLossState(Guid operationId, ServiceInternalFaultInfo serviceInternalFaultInfo, PartitionSelector partitionSelector, DataLossMode dataLossMode)
     : base(operationId, ActionType.InvokeDataLoss, serviceInternalFaultInfo)
 {
     this.Info = new InvokeDataLossInfo(partitionSelector, dataLossMode);
 }
        private static async Task MainAsync()
        {
            Console.WriteLine("Waiting for services....");

            var proxyPartitionOne = await CreateProxyAsync(-1L);

            var proxyPartitionTwo = await CreateProxyAsync(1L);

            var proxy = proxyPartitionOne;

            Console.WriteLine("Waited for services..");


            while (true)
            {
                Console.WriteLine($"Press any key to continue");
                Console.ReadKey(true);
                Console.Clear();

                Console.WriteLine("Press 0 to select target partition");
                Console.WriteLine("Press 1 to get state");
                Console.WriteLine("Press 2 to set state");
                Console.WriteLine("Press 3 to create a backup");
                Console.WriteLine("Press 4 to restore a backup");
                Console.WriteLine("Press 5 to list all central backups");
                Console.WriteLine("Press 6 to list the current Service Partition Ids");
                Console.WriteLine("Press 7 to invoke full dataloss on one of the current Service's Partitions");
                Console.WriteLine("Other key to exit");

                var    key = Console.ReadKey(true);
                string input;

                switch (key.Key)
                {
                case ConsoleKey.D0:
                    Console.WriteLine("Type 1 for partition one, or 2 for partition two");
                    key = Console.ReadKey(true);
                    if (ConsoleKey.D2 == key.Key)
                    {
                        proxy = proxyPartitionTwo;
                        Console.WriteLine("Using partition two.");
                    }
                    else
                    {
                        proxy = proxyPartitionOne;
                        Console.WriteLine("Using partition one.");
                    }
                    break;

                case ConsoleKey.D1:
                    string state = await proxy.GetState();

                    Console.WriteLine($"State: '{state}'");
                    break;

                case ConsoleKey.D2:
                    Console.WriteLine("Enter string to store as state:");
                    input = Console.ReadLine();
                    await proxy.SetState(input ?? "");

                    Console.WriteLine($"State saved: '{input}'");
                    break;

                case ConsoleKey.D3:
                    Console.WriteLine("Type 1 for full backup or 2 for incremental backup (incremental requires full backup to exist)");
                    key = Console.ReadKey(true);
                    if (ConsoleKey.D1 == key.Key)
                    {
                        Console.WriteLine("Creating a full backup asynchronously...");
                        await proxy.BeginCreateBackup(BackupOption.Full);
                    }
                    else
                    {
                        Console.WriteLine("Creating an incremental backup asynchronously...");
                        await proxy.BeginCreateBackup(BackupOption.Incremental);
                    }

                    break;

                case ConsoleKey.D4:
                    Console.WriteLine($"Starting the restore of a backup");
                    Console.WriteLine($"Enter central backup id (guid):");
                    input = Console.ReadLine();

                    var  backups = (await proxy.ListAllBackups()).ToList();
                    Guid index;
                    if (Guid.TryParse(input, out index))
                    {
                        DataLossMode lossMode = DataLossMode.FullDataLoss;
                        Console.WriteLine("Type 1 for full data loss or 2 for partial data loss.");

                        key = Console.ReadKey(true);
                        if (ConsoleKey.D1 == key.Key)
                        {
                            Console.WriteLine("Restoring backup with full data loss asynchronously...");
                        }
                        else
                        {
                            Console.WriteLine("Restoring backup with partial data loss asynchronously...");
                            lossMode = DataLossMode.PartialDataLoss;
                        }

                        await proxy.BeginRestoreBackup(backups.Single(b => b.BackupId == index), lossMode);

                        Console.WriteLine($"Restore is active. This will take some time. Check progress in SF explorer.");
                    }

                    break;

                case ConsoleKey.D5:
                    Console.WriteLine($"List all central backups");
                    var list = await proxy.ListAllBackups();

                    Console.WriteLine($"Original partition\t\t\tBackup Id\t\t\t\tBackup Type\tTimestamp UTC");
                    Console.WriteLine(string.Join(Environment.NewLine, list.Select(data => $"{data.OriginalServicePartitionId}\t{data.BackupId}\t{data.BackupOption}\t\t{data.TimeStampUtc}")));
                    break;

                case ConsoleKey.D6:
                    var resolver = ServicePartitionResolver.GetDefault();
                    var resolved = await resolver.ResolveAsync(ServiceUri, new ServicePartitionKey(-1L), CancellationToken.None);

                    Console.WriteLine($"Partition key -1L resolves to partition {resolved.Info.Id}");
                    resolved = await resolver.ResolveAsync(ServiceUri, new ServicePartitionKey(1L), CancellationToken.None);

                    Console.WriteLine($"Partition key 1L resolves to partition {resolved.Info.Id}");

                    if (proxy == proxyPartitionOne)
                    {
                        Console.WriteLine("Using partition one (-1L)");
                    }
                    else
                    {
                        Console.WriteLine("Using partition two (1L)");
                    }
                    break;

                case ConsoleKey.D7:
                    Console.WriteLine("Enter partitionID");
                    string partitionString = Console.ReadLine();
                    if (Guid.TryParse(partitionString, out Guid partitionID))
                    {
                        var partitionSelector = PartitionSelector.PartitionIdOf(ServiceUri, partitionID);
                        await new FabricClient(FabricClientRole.Admin).TestManager.StartPartitionDataLossAsync(Guid.NewGuid(), partitionSelector, DataLossMode.FullDataLoss);
                    }
                    break;

                default:
                    return;
                }
            }
        }
        public StartPartitionDataLossRestRequest(IFabricClient fabricClient, Guid operationId, Uri servicename, Guid partitionId, DataLossMode dataLossMode, TimeSpan timeout)
            : base(fabricClient, timeout)
        {
            this.OperationId  = operationId;
            this.ServiceName  = servicename;
            this.PartitionId  = partitionId;
            this.DataLossMode = dataLossMode;

            this.RetryErrorCodes.Add((uint)NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NOT_READY);
            this.RetryErrorCodes.Add((uint)NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_RECONFIGURATION_PENDING);

            this.SucceedErrorCodes.Add((uint)NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_TEST_COMMAND_OPERATION_ID_ALREADY_EXISTS);
        }
示例#10
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeDataLossState state = Convert(this.State);

                PartitionSelector partitionSelector   = state.Info.PartitionSelector;
                DataLossMode      dataLossMode        = state.Info.DataLossMode;
                long   preDataLossNumber              = state.Info.DataLossNumber;
                string failoverManagerPrimaryNodeName = state.Info.NodeName;
                Guid   partitionId          = state.Info.PartitionId;
                string behaviorName         = state.Info.UnreliableTransportInfo.First().Item2;
                int    targetReplicaSetSize = state.Info.TargetReplicaSetSize;

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying UT, partitionId={1}", this.State.OperationId, partitionId);
                System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "DoReconfiguration");
                behavior.AddFilterForPartitionId(partitionId);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        failoverManagerPrimaryNodeName,
                        behaviorName,
                        behavior,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                List <StatefulServiceReplica> replicaList = new List <StatefulServiceReplica>();

                foreach (var replica in replicasResult)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");
                    replicaList.Add(statefulReplica);
                }

                // Select target replicas based on the DataLosMode
                List <StatefulServiceReplica> targets = null;

                if (dataLossMode == DataLossMode.FullDataLoss)
                {
                    targets = GetReplicasForFullDataLoss(replicaList);
                }
                else if (dataLossMode == DataLossMode.PartialDataLoss)
                {
                    targets = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, replicaList);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedDataLossMode);
                }

                if (targets == null)
                {
                    // This will cause the command to rollback and retry
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                foreach (var replica in targets)
                {
                    TestabilityTrace.TraceSource.WriteInfo(
                        StepBase.TraceType,
                        "{0} - Removing replica {1} in partition {2} with role {3} and status {4} to induce data loss",
                        this.State.OperationId,
                        replica.Id,
                        partitionId,
                        replica.ReplicaRole,
                        replica.ReplicaStatus);

                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.ServiceManager.RemoveReplicaAsync(
                            replica.NodeName,
                            partitionId,
                            replica.Id,
                            this.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveReplicaErrors.Value,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);
                }

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                await this.WaitForAllTargetReplicasToGetDroppedAsync(partitionId, targets, cancellationToken).ConfigureAwait(false);

                await RemoveUnreliableTransportAsync(this.FabricClient, failoverManagerPrimaryNodeName, behaviorName, this.RequestTimeout, this.OperationTimeout, cancellationToken).ConfigureAwait(false);

                bool          dataLossWasSuccessful = false;
                TimeoutHelper timeoutHelper         = new TimeoutHelper(TimeSpan.FromSeconds(30));

                do
                {
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.QueryManager.GetPartitionListAsync(
                            this.partitionSelector.ServiceName,
                            null,
                            this.RequestTimeout,
                            cancellationToken),
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    bool partitionFound     = false;
                    long postDataLossNumber = 0;
                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            postDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                            partitionFound     = true;
                            break;
                        }
                    }

                    if (!partitionFound)
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                    }

                    TestabilityTrace.TraceSource.WriteInfo(
                        StepBase.TraceType,
                        "{0} - Checking data loss numbers for partition {1} with remaining time {2}. Current numbers {3}:{4}",
                        this.State.OperationId,
                        partitionId,
                        timeoutHelper.GetRemainingTime(),
                        preDataLossNumber,
                        postDataLossNumber);

                    if (postDataLossNumber != preDataLossNumber)
                    {
                        dataLossWasSuccessful = true;
                        break;
                    }

                    await System.Fabric.Common.AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(this.dataLossCheckPollIntervalInSeconds), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (!dataLossWasSuccessful)
                {
                    // This is only viewable internally for debug.  This will cause a retry of the whole flow.
                    string error = string.Format(
                        CultureInfo.InvariantCulture,
                        "{0} - Service could not induce data loss for service '{1}' partition '{2}' in '{3}' Please retry",
                        this.State.OperationId,
                        partitionSelector.ServiceName,
                        partitionId,
                        this.dataLossCheckWaitDurationInSeconds);
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error);
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
 public InvokeDataLossAction(PartitionSelector partitionSelector, DataLossMode dataLossMode)
 {
     this.PartitionSelector = partitionSelector;
     this.DataLossMode      = dataLossMode;
 }
        /// <summary>
        /// Asynchronously starts a restore operation using the state indicated by <paramref name="backupMetadata"/>.
        /// The backup is retrieved from the central store.
        /// </summary>
        /// <param name="service"></param>
        /// <param name="dataLossMode"></param>
        /// <param name="backupMetadata"></param>
        /// <returns></returns>
        public static async Task BeginRestoreBackup(this IBackupRestoreServiceInternal service, BackupMetadata backupMetadata, DataLossMode dataLossMode)
        {
            service.LogCallback?.Invoke($"BackupRestoreService - Beginning restore backup {backupMetadata.BackupId} for partition {service.Context.PartitionId}.");

            if (backupMetadata == null)
            {
                throw new ArgumentNullException(nameof(backupMetadata));
            }

            await service.CentralBackupStore.ScheduleBackupAsync(service.Context.PartitionId, backupMetadata.BackupId);

            var partitionSelector = PartitionSelector.PartitionIdOf(service.Context.ServiceName, service.Context.PartitionId);

            var operationId = Guid.NewGuid();

            await new FabricClient(FabricClientRole.Admin).TestManager.StartPartitionDataLossAsync(operationId, partitionSelector, dataLossMode);
            //Causes OnDataLossAsync to be called.

            service.LogCallback?.Invoke($"BackupRestoreService - Begun restore backup {backupMetadata.BackupId} for partition {service.Context.PartitionId}.");
        }
        // Use this method signature for now until the actual client interface is decided
        public async Task ProcessDataLossCommandAsync(Guid operationId, PartitionSelector partitionSelector, DataLossMode dataLossMode, TimeSpan timeout, ServiceInternalFaultInfo serviceInternalFaultInfo)
        {
            ThrowIfDataLossModeInvalid(dataLossMode);

            ActionStateBase actionState = new InvokeDataLossState(operationId, serviceInternalFaultInfo, partitionSelector, dataLossMode);

            try
            {
                // After this call finishes the intent has been persisted
                await this.actionStore.InitializeNewActionAsync(actionState, timeout);

                this.Enqueue(actionState);
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception {1}", operationId, e);
                throw;
            }
        }
示例#14
0
        /// <summary>
        /// Fetches raw events along with their deserialized EventDocument-derived instances when possible, returning an enumerable of
        /// (raw, doc, error) tuples
        /// </summary>
        /// <param name="consumer">Event consumer implementation</param>
        /// <param name="route">Queue designator</param>
        /// <param name="partition">Logical partition to fetch from <see cref="IEventConsumer.PartitionCount"/></param>
        /// <param name="checkpoint">A point in time as of which to fetch</param>
        /// <param name="skip">Number of events to skip in the beginning</param>
        /// <param name="count">Number of events to fetch</param>
        /// <param name="lossMode">Data loss tolerance</param>
        /// <returns>
        ///  A tuple of `raw` event representation, its converted EventDocument-derived instance `doc`, and an error (if any) which surfaced
        ///  during event doc deserialization attempt, thus `doc` and `err` are mutually exclusive
        /// </returns>
        public static async Task <IEnumerable <(Event raw, EventDocument doc, Exception err)> > FetchEventDocsAsync(this IEventConsumer consumer,
                                                                                                                    Route route,
                                                                                                                    int partition,
                                                                                                                    ulong checkpoint,
                                                                                                                    int skip,
                                                                                                                    int count,
                                                                                                                    DataLossMode lossMode = DataLossMode.Default)
        {
            var got = await consumer.NonNull(nameof(consumer))
                      .FetchAsync(route, partition, checkpoint, skip, count, lossMode);

            using (var ms = new IO.BufferSegmentReadingStream())
            {
                return(got.Select(e => {
                    EventDocument doc = null;
                    Exception error = null;

                    try
                    {
                        if (e.ContentType == CONTENT_TYPE_JSON_DOC && e.Content != null)
                        {
                            ms.UnsafeBindBuffer(e.Content, 0, e.Content.Length);
                            var map = JsonReader.DeserializeDataObject(ms, EVENT_JSON_ENCODING, true) as JsonDataMap;
                            doc = JsonReader.ToDoc <EventDocument>(map, fromUI: false);
                        }
                    }
                    catch (Exception err)
                    {
                        error = err;
                    }

                    return (raw: e, doc: doc, err: error);
                }).ToArray());
            }
        }
 /// <inheritdoc />
 public Task BeginRestoreBackup(BackupMetadata backupMetadata, DataLossMode dataLossMode)
 {
     return(BackupRestoreServiceInternalExtensions.BeginRestoreBackup(this, backupMetadata, dataLossMode));
 }
 /// <inheritdoc />
 public Task BeginRestoreBackup(BackupMetadata backupMetadata, DataLossMode dataLossMode)
 {
     return(BackupRestoreServiceOperations.BeginRestoreBackup(this, backupMetadata, dataLossMode));
 }
示例#17
0
        /// <summary>
        /// Asynchronously starts a restore operation using the state indicated by <paramref name="backupMetadata"/>.
        /// The backup is retrieved from the central store.
        /// This method completes and returns before the backup restore process is completely done.
        /// </summary>
        /// <param name="service"></param>
        /// <param name="dataLossMode"></param>
        /// <param name="backupMetadata"></param>
        /// <returns></returns>
        public static async Task BeginRestoreBackup(this IBackupRestoreServiceOperations service, BackupMetadata backupMetadata, DataLossMode dataLossMode)
        {
            service.LogCallback?.Invoke($"BackupRestoreService - Beginning restore backup {backupMetadata.BackupId} for partition {service.Context.PartitionId}.");

            try
            {
                if (backupMetadata == null)
                {
                    throw new ArgumentNullException(nameof(backupMetadata));
                }

                await service.CentralBackupStore.ScheduleBackupRestoreAsync(service.Context.PartitionId, backupMetadata.BackupId);

                var partitionSelector = PartitionSelector.PartitionIdOf(service.Context.ServiceName, service.Context.PartitionId);

                var operationId = Guid.NewGuid();
                await new FabricClient(FabricClientRole.Admin).TestManager.StartPartitionDataLossAsync(operationId, partitionSelector, dataLossMode);
                //Causes OnDataLossAsync to be called later on.
            }
            catch (Exception ex)
            {
                string message = $"Failed to restore backup for partition {service.Context.PartitionId}";
                service.LogCallback?.Invoke($"{nameof(BackupRestoreServiceOperations)} - {nameof(BeginRestoreBackup)} failed for partition: {service.Context.PartitionId}. Message:{message} - Error: {ex.Message}");
                throw new Exception(message, ex);
            }
            service.LogCallback?.Invoke($"{nameof(BackupRestoreServiceOperations)} - {nameof(BeginRestoreBackup)} succeeded {backupMetadata.BackupId} for partition {service.Context.PartitionId}.");
        }