public async Task <double> RewardFunction(IState <TData> stateFrom, IAgentAction <TData> action) { if (await HasReachedAGoalCondition(stateFrom, action)) { return(100); } return(-1); }
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously public async Task <double> RewardFunction(IState <int> state, IAgentAction <int> action) #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously { if (action is TestAction testAction) { return(R[state.Data][testAction.ToState.Data]); } throw new InvalidCastException(); }
private async Task ApplyQMatrixLogic(IState <TData> currentState, IAgentAction <TData> nextAction, IState <TData> nextState) { var nextNextActions = await options.Environment.GetPossibleActions(nextState); var maxQ = nextNextActions.Max(x => { var pair = new StateAndActionPair <TData>(nextState, x); return(options.Policy.QualityMatrix.ContainsKey(pair) ? options.Policy.QualityMatrix[pair] : 0D); }); var selectedPair = new StateAndActionPairWithResultState <TData>(currentState, nextAction, nextState); if (!options.Policy.QualityMatrix.ContainsKey(selectedPair)) { options.Policy.QualityMatrix.Add(selectedPair, 0D); } // Q = [(1-a) * Q] + [a * (R + (g * maxQ))] options.Policy.QualityMatrix[selectedPair] = ((1 - options.LearningRate) * options.Policy.QualityMatrix[selectedPair]) + (options.LearningRate * (await options.TrainGoal.RewardFunction(currentState, nextAction) + (options.DiscountRate * maxQ))); }
/// <summary> /// Executes one step of the algorithm with a predetermined state and action. /// </summary> /// <param name="currentState">The state the algorithm will start from</param> /// <param name="nextAction">The action that will try to apply</param> /// <returns>The new state after the action has been resolved</returns> public async Task <(IState <TData>, int)> Step(IState <TData> currentState, IAgentAction <TData> nextAction, int maximumWaitForStabilization = 1000) { var nextState = await nextAction.ExecuteAction(options.Environment, currentState); var currentStabilizationCounter = 0; while (await options.Environment.IsIntermediateState(nextState) && currentStabilizationCounter < maximumWaitForStabilization) { await options.Environment.WaitForPostActionIntermediateStabilization(); nextState = await options.Environment.GetCurrentState(); ++currentStabilizationCounter; } if (currentStabilizationCounter >= maximumWaitForStabilization) { return(nextState, currentStabilizationCounter); } await ApplyQMatrixLogic(currentState, nextAction, nextState); return(nextState, currentStabilizationCounter); }
public UserControl.Messages Excute(string module,string filePath,string encoding) { IAgentAction agentAction = (new AgentActionFactory(_domainDataProvider)).GetAgentAction(module); return agentAction.CollectExcute(filePath,encoding); }
public async Task <bool> HasReachedAGoalCondition(IState <TData> state, IAgentAction <TData> action) { return(await hasReachedAGoalConditionPredicate.Invoke(state, action)); }
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously public async Task <bool> HasReachedAGoalCondition(IState <int> state, IAgentAction <int> action) #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously { return(new TestState(11).Equals(state)); }