Exemple #1
0
        internal void RemoveState(TStateKey stateKey)
        {
            var predecessorQueue = new NativeQueue <TStateKey>(Allocator.Temp);

            // State Info
            StateInfoLookup.Remove(stateKey);

            // Actions
            if (ActionLookup.TryGetFirstValue(stateKey, out var actionKey, out var actionIterator))
            {
                do
                {
                    var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey);
                    // Action Info
                    ActionInfoLookup.Remove(stateActionPair);

                    // Results
                    if (ResultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var resultIterator))
                    {
                        do
                        {
                            // Remove Predecessor Link
                            if (PredecessorGraph.TryGetFirstValue(resultingStateKey, out var predecessorKey, out var predecessorIterator))
                            {
                                predecessorQueue.Clear();

                                do
                                {
                                    if (!stateKey.Equals(predecessorKey))
                                    {
                                        predecessorQueue.Enqueue(predecessorKey);
                                    }
                                } while (PredecessorGraph.TryGetNextValue(out predecessorKey, ref predecessorIterator));

                                // Reset Predecessors
                                PredecessorGraph.Remove(resultingStateKey);

                                // Requeue Predecessors
                                while (predecessorQueue.TryDequeue(out var queuedPredecessorKey))
                                {
                                    PredecessorGraph.Add(resultingStateKey, queuedPredecessorKey);
                                }
                            }

                            // Action Result Info
                            StateTransitionInfoLookup.Remove(new StateTransition <TStateKey, TActionKey>(stateKey, stateActionPair.ActionKey, resultingStateKey));
                        } while (ResultingStateLookup.TryGetNextValue(out resultingStateKey, ref resultIterator));

                        ResultingStateLookup.Remove(stateActionPair);
                    }
                } while (ActionLookup.TryGetNextValue(out actionKey, ref actionIterator));

                ActionLookup.Remove(stateKey);
            }

            // Predecessors
            PredecessorGraph.Remove(stateKey);

            predecessorQueue.Dispose();
        }
        bool UpdateStateValue(TStateKey stateKey, NativeMultiHashMap <TStateKey, TActionKey> actionLookup,
                              NativeHashMap <TStateKey, StateInfo> stateInfoLookup,
                              NativeHashMap <StateActionPair <TStateKey, TActionKey>, ActionInfo> actionInfoLookup)
        {
            var stateInfo = stateInfoLookup[stateKey];

            // Handle case of no valid actions (mark complete)
            if (!actionLookup.TryGetFirstValue(stateKey, out var actionKey, out var iterator))
            {
                if (!stateInfo.SubplanIsComplete)
                {
                    // State was not marked terminal, so the value should be reset, as to not use the estimated value.
                    stateInfo.CumulativeRewardEstimate = new BoundedValue(0, 0, 0);
                    stateInfo.SubplanIsComplete        = true;
                    stateInfoLookup[stateKey]          = stateInfo;
                    return(true);
                }

                // Terminal state. No update required.
                return(false);
            }

            var originalValue          = stateInfo.CumulativeRewardEstimate;
            var originalCompleteStatus = stateInfo.SubplanIsComplete;

            stateInfo.CumulativeRewardEstimate = new BoundedValue(float.MinValue, float.MinValue, float.MinValue);
            stateInfo.SubplanIsComplete        = true;
            var maxLowerBound = float.MinValue;

            // Pick max action; find max lower bound
            do
            {
                var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey);
                var actionInfo      = actionInfoLookup[stateActionPair];

                stateInfo.CumulativeRewardEstimate = stateInfo.CumulativeRewardEstimate.Average < actionInfo.CumulativeRewardEstimate.Average ?
                                                     actionInfo.CumulativeRewardEstimate :
                                                     stateInfo.CumulativeRewardEstimate;

                maxLowerBound = math.max(maxLowerBound, actionInfo.CumulativeRewardEstimate.LowerBound);
            }while (actionLookup.TryGetNextValue(out actionKey, ref iterator));

            // Update complete status (ignore pruned actions)
            actionLookup.TryGetFirstValue(stateKey, out actionKey, out iterator);
            do
            {
                var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey);
                var actionInfo      = actionInfoLookup[stateActionPair];

                if (actionInfo.CumulativeRewardEstimate.UpperBound >= maxLowerBound)
                {
                    stateInfo.SubplanIsComplete &= actionInfo.SubplanIsComplete;
                }
            }while (actionLookup.TryGetNextValue(out actionKey, ref iterator));

            // Reassign
            stateInfoLookup[stateKey] = stateInfo;

            return(!originalValue.Approximately(stateInfo.CumulativeRewardEstimate) || originalCompleteStatus != stateInfo.SubplanIsComplete);
        }
Exemple #3
0
        public bool TryGetOptimalAction(TStateKey stateKey, out TActionKey action)
        {
            action = default;

            bool actionsFound = ActionLookup.TryGetFirstValue(stateKey, out var actionKey, out var iterator);

            if (!actionsFound)
            {
                return(false);
            }

            var maxCumulativeReward = float.MinValue;

            do
            {
                var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey);
                ActionInfoLookup.TryGetValue(stateActionPair, out var actionInfo);
                if (actionInfo.CumulativeRewardEstimate.Average > maxCumulativeReward)
                {
                    action = actionKey;
                    maxCumulativeReward = actionInfo.CumulativeRewardEstimate.Average;
                }
            } while (ActionLookup.TryGetNextValue(out actionKey, ref iterator));

            return(true);
        }
        bool UpdateCumulativeReward(StateActionPair <TStateKey, TActionKey> stateActionPair,
                                    NativeMultiHashMap <StateActionPair <TStateKey, TActionKey>, TStateKey> resultingStateLookup,
                                    NativeHashMap <TStateKey, StateInfo> stateInfoLookup,
                                    NativeHashMap <StateActionPair <TStateKey, TActionKey>, ActionInfo> actionInfoLookup,
                                    NativeHashMap <StateTransition <TStateKey, TActionKey>, StateTransitionInfo> stateTransitionInfoLookup)
        {
            var actionInfo             = actionInfoLookup[stateActionPair];
            var originalValue          = actionInfo.CumulativeRewardEstimate;
            var originalCompleteStatus = actionInfo.SubplanIsComplete;

            actionInfo.CumulativeRewardEstimate = default;
            actionInfo.SubplanIsComplete        = true;

            resultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var iterator);
            do
            {
                var stateTransitionInfo = stateTransitionInfoLookup[new StateTransition <TStateKey, TActionKey>(stateActionPair, resultingStateKey)];
                var resultingStateInfo  = stateInfoLookup[resultingStateKey];

                actionInfo.SubplanIsComplete        &= resultingStateInfo.SubplanIsComplete;
                actionInfo.CumulativeRewardEstimate += stateTransitionInfo.Probability *
                                                       (stateTransitionInfo.TransitionUtilityValue + DiscountFactor * resultingStateInfo.CumulativeRewardEstimate);
            } while (resultingStateLookup.TryGetNextValue(out resultingStateKey, ref iterator));

            actionInfoLookup[stateActionPair] = actionInfo;

            return(!originalValue.Approximately(actionInfo.CumulativeRewardEstimate) || originalCompleteStatus != actionInfo.SubplanIsComplete);
        }
Exemple #5
0
 /// <summary>
 /// Returns the Q-value.
 /// </summary>
 /// <returns>The Q-value.</returns>
 /// <param name="saPair">The <see cref="T:StateActionPair"/> whose Q-value is returned.</param>
 public float getQValue(StateActionPair saPair)
 {
     if (!_qTable.ContainsKey(saPair))
     {
         setQValue(saPair, initialValue);
     }
     return(_qTable[saPair]);
 }
 /// <summary>
 /// Returns the eligibility value of the given <see cref="T:StateActionPair"/>.
 /// </summary>
 /// <returns>The eligibility value.</returns>
 /// <param name="saPair">The <see cref="T:StateActionPair"/> whose eligibility value is returned.</param>
 public float GetEligibilityValue(StateActionPair saPair)
 {
     if (!eligibilityTable.ContainsKey(saPair))
     {
         SetEligibilityValue(saPair, initialValue);
     }
     return(eligibilityTable[saPair]);
 }
Exemple #7
0
    public void UpdateQValue(State fromState, State toState, AgentAction action, float reward)
    {
        StateActionPair destPair     = new StateActionPair(toState, action);
        float           learningRate = 1f; //1 / (numVisited[new StateActionPair(fromState, action)]++ + 1);

        QFunctionDic[fromState][destPair] = (1 - learningRate) * QFunctionDic[fromState][destPair] + learningRate * (reward + discountFactor * QMax(toState));
        Debug.Log("(FROM: " + fromState.position[0] + ", " + fromState.position[2] + " TO: " + toState.position[0] + ", " + toState.position[2] + "): " + QFunctionDic[fromState][destPair]);
    }
        void WriteEdgeToState(TStateKey precedingStateKey, TActionKey actionKey, StateTransitionInfo stateTransitionInfo, TStateKey resultingStateKey)
        {
            var stateActionPair = new StateActionPair <TStateKey, TActionKey>(precedingStateKey, actionKey);

            ActionLookup.Add(precedingStateKey, actionKey);
            ActionInfoLookup.TryAdd(stateActionPair, default);
            ResultingStateLookup.Add(stateActionPair, resultingStateKey);
            StateTransitionInfoLookup.TryAdd(new StateTransition <TStateKey, TActionKey>(stateActionPair, resultingStateKey), stateTransitionInfo);
            PredecessorGraph.Add(resultingStateKey, precedingStateKey);
        }
            public override bool Equals(object obj)
            {
                if (obj is StateActionPair)
                {
                    StateActionPair sec = (StateActionPair)obj;
                    return(this.m_actionIndex == sec.m_actionIndex && this.m_stateString == sec.m_stateString);
                }

                return(false);
            }
Exemple #10
0
        /// <include file="documentation.xml" path="/AI/Search/GameSearchBase/IterativeDeepeningSearch/*"/>
        /// <include file="documentation.xml" path="/AI/Search/ISearch/SearchCommon/param[@name='limiter']"/>
        /// <include file="documentation.xml" path="/AI/Search/ISearch/Search_State/param[@name='initialState']"/>
        public SearchResult IterativeDeepeningSearch(StateType initialState, SearchLimiter limiter,
                                                     out StateActionPair <StateType, ActionType> solution)
        {
            SearchResult result;
            int          userDepthLimit = DepthLimit; // save the original depth limit so we can restore it later

            if (limiter == null)                      // if we have an no limit, we might as well do a regular search
            {
                DepthLimit = Infinite;                // with unlimited depth because we have unlimited time
                result     = Search(initialState, limiter, out solution);
            }
            else
            {
                PrepareToStartSearch(initialState); // otherwise, verify that the search is valid

                if (limiter != null)
                {
                    limiter.Start();
                }
                BeginIterativeDeepeningSearch(initialState);

                result   = SearchResult.Failed; // assume that we couldn't complete a single iteration
                solution = new StateActionPair <StateType, ActionType>();

                // gradually increase the depth limit, starting from 1
                for (DepthLimit = 1; ; DepthLimit = DepthLimit == int.MaxValue ? Infinite : DepthLimit + 1)
                {
                    // start a new search with the given depth limit, and run it until it completes or the time expires
                    StateActionPair <StateType, ActionType> currentSolution;
                    SearchResult currentResult = PerformSearch(initialState, limiter, out currentSolution);

                    // Failed, in this case, means that the search couldn't complete because of the limiter, while LimitReached
                    // means that the search completed but was limited by the depth limit
                    if (currentResult == SearchResult.Failed)
                    {
                        break;
                    }

                    // the search completed, so store the result and solution
                    result   = currentResult;
                    solution = currentSolution;

                    // if the search was not limited by depth, increasing the depth won't help, so we're done
                    if (currentResult != SearchResult.LimitReached)
                    {
                        break;
                    }
                }
                EndIterativeDeepeningSearch();
            }

            DepthLimit = userDepthLimit; // restore the previous depth limit
            return(result);
        }
Exemple #11
0
        public static void GetExpandedDepthMap <TStateKey, TStateInfo, TActionKey, TActionInfo, TStateTransitionInfo>(this PlanGraph <TStateKey, TStateInfo, TActionKey, TActionInfo, TStateTransitionInfo> planGraph, TStateKey rootKey, NativeHashMap <TStateKey, int> depthMap, NativeQueue <StateHorizonPair <TStateKey> > queue)
            where TStateKey : struct, IEquatable <TStateKey>
            where TStateInfo : struct, IStateInfo
            where TActionKey : struct, IEquatable <TActionKey>
            where TActionInfo : struct, IActionInfo
            where TStateTransitionInfo : struct
        {
            depthMap.Clear();
            queue.Clear();
            var actionLookup         = planGraph.ActionLookup;
            var resultingStateLookup = planGraph.ResultingStateLookup;

            depthMap.TryAdd(rootKey, 0);
            queue.Enqueue(new StateHorizonPair <TStateKey> {
                StateKey = rootKey, Horizon = 0
            });

            while (queue.TryDequeue(out var stateHorizonPair))
            {
                var stateKey    = stateHorizonPair.StateKey;
                var horizon     = stateHorizonPair.Horizon;
                var nextHorizon = horizon + 1;

                if (actionLookup.TryGetFirstValue(stateKey, out var actionKey, out var iterator))
                {
                    do
                    {
                        var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey);
                        if (resultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var resultIterator))
                        {
                            do
                            {
                                // Skip unexpanded states
                                if (!actionLookup.TryGetFirstValue(resultingStateKey, out _, out _))
                                {
                                    continue;
                                }

                                // first add will be most shallow due to BFS
                                if (depthMap.TryAdd(resultingStateKey, nextHorizon))
                                {
                                    queue.Enqueue(new StateHorizonPair <TStateKey>()
                                    {
                                        StateKey = resultingStateKey, Horizon = nextHorizon
                                    });
                                }
                            } while (resultingStateLookup.TryGetNextValue(out resultingStateKey, ref resultIterator));
                        }
                    } while (actionLookup.TryGetNextValue(out actionKey, ref iterator));
                }
            }
        }
Exemple #12
0
    public StateActionPair GetNextStateActionPair(State fromState)
    {
        StateActionPair pair   = qFunction.ProbPickAction(fromState);
        float           reward = states[pair.state].reward;

        if (states[pair.state].isGoal)
        {
            Debug.Log("AGENT IN GOAL");
            isAgentInGoalState = true;
            episodeText.text   = "Episode: " + ++curEpisode;
        }
        qFunction.UpdateQValue(fromState, pair.state, pair.action, reward);
        return(pair);
    }
Exemple #13
0
        /// <include file="documentation.xml" path="/AI/Search/ISearch/Search_State/*"/>
        public sealed override SearchResult Search(StateType initialState, SearchLimiter limiter,
                                                   out StateActionPair <StateType, ActionType> solution)
        {
            PrepareToStartSearch(initialState);
            if (limiter != null)
            {
                limiter.Start();
            }
            // from PerformSearch(), Failed means that the limiter caused the search to abort, while LimitReached means that the
            // search completed, but was truncated by the depth limit. we'll convert Failed to LimitReached here.
            SearchResult result = PerformSearch(initialState, limiter, out solution);

            return(result == SearchResult.Failed ? SearchResult.LimitReached : result);
        }
Exemple #14
0
        /// <summary>Given a parent node and a <see cref="StateActionPair{S,A}"/> of one of its sucessors or predecessors,
        /// returns a new <see cref="Node{S,A}"/> object representing the state.
        /// </summary>
        Node <StateType, ActionType> MakeNode(Node <StateType, ActionType> parent, StateActionPair <StateType, ActionType> pair)
        {
            Node <StateType, ActionType> node = new Node <StateType, ActionType>();

            node.Action   = pair.Action;
            node.PathCost = parent.PathCost + Problem.GetActionCost(parent.State, pair.State, pair.Action);
            node.Depth    = parent.Depth + 1;
            node.Parent   = parent;
            node.State    = pair.State;
            if (useHeuristic)
            {
                node.HeuristicCost = Problem.GetHeuristic(pair.State);
            }
            return(node);
        }
 public StateRewardPair getStateRewardPair(StateActionPair saPair)
 {
     if (!_modelTable.ContainsKey(saPair))
     {
         // If the StateRewardPair is not yet in the table,
         // it has to be saved.
         // Therefore the possible next following state has to be saevd in a new
         // StateRewardPair.
         // To do so, all possible Movements for the next state have to extracted
         // -> new method in StateExtractor, that retrieves the possible Movements
         // for any State or Position.
         // Independently of the mower!
         _modelTable[saPair] = new StateRewardPair(saPair.state, _initVal);
     }
     return(_modelTable[saPair]);
 }
        bool UpdateSubplanCompleteStatus(StateActionPair <TStateKey, TActionKey> stateActionPair, out ActionInfo updatedActionInfo)
        {
            updatedActionInfo = planGraph.ActionInfoLookup[stateActionPair];
            var originalCompleteStatus = updatedActionInfo.SubplanIsComplete;

            updatedActionInfo.SubplanIsComplete = true;

            // Update complete status
            planGraph.ResultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var iterator);
            do
            {
                updatedActionInfo.SubplanIsComplete &= planGraph.StateInfoLookup[resultingStateKey].SubplanIsComplete;
            }while (planGraph.ResultingStateLookup.TryGetNextValue(out resultingStateKey, ref iterator));

            return(originalCompleteStatus != updatedActionInfo.SubplanIsComplete);
        }
Exemple #17
0
    /// <summary>
    /// Returns the best action (with highest expected reward) for the given state.
    /// </summary>
    /// <returns>The best action to take in the given state.</returns>
    /// <param name="state">The given state.</param>
    public MovementAction getBestActionForState(State state)
    {
        float          bestReward = float.MinValue;
        MovementAction bestAction = null;

        foreach (MovementAction action in CurActions)
        {
            StateActionPair sap    = new StateActionPair(state, action);
            float           reward = this.getQValue(sap);
            if (bestAction == null || reward > bestReward)
            {
                bestAction = action;
                bestReward = reward;
            }
        }
        return(bestAction);
    }
            public ActionContext UpdateInfo(int?visitCount = null, bool?complete = null, float3?cumulativeReward = null)
            {
                var actionInfoLookup = Builder.planGraph.ActionInfoLookup;
                var stateActionPair  = new StateActionPair <TStateKey, TActionKey>(StateKey, ActionKey);

                if (!actionInfoLookup.TryGetValue(stateActionPair, out var actionInfo))
                {
                    throw new ArgumentException($"Action {ActionKey} for state {StateKey} does not exist in the plan graph.");
                }

                actionInfo.SubplanIsComplete        = complete ?? actionInfo.SubplanIsComplete;
                actionInfo.CumulativeRewardEstimate = cumulativeReward ?? actionInfo.CumulativeRewardEstimate;

                actionInfoLookup.Remove(stateActionPair);
                actionInfoLookup.TryAdd(stateActionPair, actionInfo);

                return(this);
            }
Exemple #19
0
        /// <include file="documentation.xml" path="/AI/Search/GameSearchBase/PerformSearch/*"/>
        protected override SearchResult PerformSearch(StateType initialState, SearchLimiter limiter,
                                                      out StateActionPair <StateType, ActionType> solution)
        {
            solution = new StateActionPair <StateType, ActionType>();

            float bestUtility = float.NegativeInfinity;
            int   player      = Game.GetPlayerToMove(initialState);

            this.limiter  = limiter;
            depthLimitHit = searchAborted = false;

            foreach (Move <StateType, ActionType> move in Game.GetSuccessors(initialState)) // for each move available
            {
                // get the estimated utilities of the move for all players
                float[] utilities = GetExpectedUtilities(move.State, 1);
                if (searchAborted)
                {
                    return(SearchResult.Failed);    // if the limiter caused an abort, just return immediately
                }
                // if the estimated utility of this move for the player moving at the root is the best yet, save it
                if (utilities[player] > bestUtility)
                {
                    bestUtility = utilities[player];
                    solution    = new StateActionPair <StateType, ActionType>(move.State, move.Action);

                    if (bestUtility == MaxUtility) // if the given move is optimal, we don't need to search further
                    {
                        // if a terminal state was reached in the line of search that led to this maximal utility, then we don't
                        // need another round of iterative deepening search because there's no doubt about this particular move,
                        // and since this move is optimal, that's all that matters
                        if (wasTerminal)
                        {
                            depthLimitHit = false;
                        }
                        break;
                    }
                }
            }

            this.limiter = null; // release the limiter

            return(depthLimitHit ? SearchResult.LimitReached : SearchResult.Success);
        }
Exemple #20
0
        /// <inheritdoc cref="IPlan"/>
        public int GetResultingStates(TStateKey planStateKey, TActionKey actionKey, IList <TStateKey> resultingPlanStateKeys)
        {
            planData.CompletePlanningJobs();
            resultingPlanStateKeys?.Clear();

            var count                = 0;
            var stateActionPair      = new StateActionPair <TStateKey, TActionKey>(planStateKey, actionKey);
            var resultingStateLookup = planData.PlanGraph.ResultingStateLookup;

            if (resultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingState, out var iterator))
            {
                do
                {
                    resultingPlanStateKeys?.Add(resultingState);
                    count++;
                } while (resultingStateLookup.TryGetNextValue(out resultingState, ref iterator));
            }

            return(count);
        }
Exemple #21
0
 // Update is called once per frame
 void Update()
 {
     nextMoveTime += Time.deltaTime;
     if (environment.curEpisode < environment.numEpisodes && nextMoveTime >= slowDownTime)
     {
         nextMoveTime = 0;
         if (!environment.isAgentInGoalState && environment != null)
         {
             print("Getting next action");
             StateActionPair nextStateActionPair = environment.GetNextStateActionPair(curState);
             curState = nextStateActionPair.state;
             Move(nextStateActionPair.action);
         }
         else
         {
             MoveToInitialState();
             environment.isAgentInGoalState = false;
         }
     }
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="T:EligibilityTable+EligibilityTableKeyValuePair"/> class.
 /// </summary>
 /// <param name="saPair">The <see cref="T:StateExtractor+StateActionPair"/> key.</param>
 /// <param name="floatValue">The float value.</param>
 public EligibilityTableKeyValuePair(StateActionPair stateActionPair, float floatValue)
     : base(stateActionPair, floatValue)
 {
 }
Exemple #23
0
    /// <summary>
    /// Learn using the observed reward.
    /// </summary>
    /// <param name="reward">The observed reward.</param>
    public override void Learn(float reward)
    {
        if (_showParamLabel)
        {
            _receivers[0].InitialValues(Greediness, DiscountValue, LearnRate, InitialQValue,
                                        Run_with_etraces, Gamma, ModelPlanning, Refined, N);
        }
        // Old state
        StateActionPair oldSAP    = new StateActionPair(_lastState, _lastAction);
        float           oldQvalue = _qTable.getQValue(oldSAP);

        // Current state
        State           currentState      = base.CurrentState;
        MovementAction  bestCurrentAction = _qTable.getBestActionForState(currentState);
        StateActionPair currentSAP        = new StateActionPair(currentState, bestCurrentAction);
        float           bestCurrentQValue = _qTable.getQValue(currentSAP);

        if (Run_with_etraces)
        {
            float delta = reward + DiscountValue * bestCurrentQValue - oldQvalue;
            _eTable.SetEligibilityValue(oldSAP, 1f);
            _qTable.AddScaledValues(LearnRate * delta, _eTable);
            _eTable.ScaleAllEligibilityValues(DiscountValue * Gamma);
        }
        else
        {
            // Standard QLearning
            float newQValue = Mathf.Lerp(oldQvalue, reward + (DiscountValue * bestCurrentQValue), LearnRate);
            _qTable.setQValue(oldSAP, newQValue);
        }
        // Refined Model
        if (ModelPlanning && Refined)
        {
            // Update the model according to the observed state
            _mTable.IncorporateObservedState(currentState);

            State          virtualFromState, virtualToState;
            MovementAction virtualPerformedAction;
            float          virtualReward;
            // Perfrom N virtual steps
            for (int i = 0; i < N; i++)
            {
                // Generated the virtual step
                bool virtualStepGenerated = _mTable.GenerateRandomModelStep(out virtualFromState, out virtualPerformedAction, out virtualToState, out virtualReward);
                if (virtualStepGenerated)
                {
                    StateActionPair virtualFromSAP = new StateActionPair(virtualFromState, virtualPerformedAction);
                    // Standard QLearning
                    float fromStateQVal = _qTable.getQValue(virtualFromSAP);
                    // Get the best action after the virtual step
                    MovementAction  bestAction   = _qTable.getBestActionForState(virtualToState);
                    StateActionPair virtualToSAP = new StateActionPair(virtualToState, bestAction);
                    // Q value update for the virtual step
                    float toStateQVal = _qTable.getQValue(virtualToSAP);
                    float newQVal     = Mathf.Lerp(fromStateQVal, virtualReward + (DiscountValue * toStateQVal), LearnRate);
                    _qTable.setQValue(virtualFromSAP, newQVal);
                }
            }
        }
        // DynaQ Model
        if (ModelPlanning && !Refined)
        {
            _simpleMTable.setStateRewardPairAtStateActionPair(oldSAP, new StateRewardPair(currentState, reward));
            for (int i = 0; i < N; i++)
            {
                StateActionPair randSAP = _qTable.getRandomVisitedStateAndAction();
                StateRewardPair srp     = _simpleMTable.getStateRewardPair(randSAP);
                // Standard QLearning
                float qVal = _qTable.getQValue(randSAP);

                //MovementAction bAct = _qTable.getBestActionForState(currentState);
                MovementAction bAct = _qTable.getBestActionForState(srp.State);

                // new (current) parameters
                //StateActionPair cSAP = new StateActionPair(currentState, bestCurrentAction);
                StateActionPair cSAP    = new StateActionPair(srp.State, bAct);
                float           bQVal   = _qTable.getQValue(cSAP);
                float           newQVal = Mathf.Lerp(qVal, srp.Reward + (DiscountValue * bQVal), LearnRate);
                _qTable.setQValue(randSAP, newQVal);
            }
        }
    }
Exemple #24
0
        /// <include file="documentation.xml" path="/AI/Search/GameSearchBase/PerformSearch/*"/>
        protected override SearchResult PerformSearch(StateType initialState, SearchLimiter limiter,
                                                      out StateActionPair <StateType, ActionType> solution)
        {
            solution = new StateActionPair <StateType, ActionType>();

            // If we're performing an iterative deepening search, rootUtilities and rootSuccessors will be non-null. In that
            // case, we should use them to store utilities and retrieve successor nodes. The purpose is to allow some state,
            // in particular move ordering at the root, to be retained between iterations of the iterative deepening search.
            // The rootUtilities array is used to sort the rootSuccessors array so that the best moves from the previous
            // iteration can be tried first.
            //
            // TODO: should we extend this idea to store the best moves from each ply in the search?
            // TODO: we should probably also implement the killer heuristic and/or history heuristics (this requires some
            //       more complex communication between the game and the search)
            // TODO: we should implement the scout part of negascout
            // this url is quite good: http://www.fierz.ch/strategy.htm
            if (rootUtilities != null)
            {
                for (int i = 0; i < rootUtilities.Length; i++)
                {
                    rootUtilities[i] = float.PositiveInfinity;
                }
            }

            this.limiter  = limiter;
            depthLimitHit = wasTerminal = searchAborted = false;

            float bestUtility = float.NegativeInfinity, best0 = float.NegativeInfinity, best1 = float.NegativeInfinity;
            int   player = Game.GetPlayerToMove(initialState);
            int   index  = 0; // the index of the successor within rootSuccessor that we're currently examining

            foreach (Move <StateType, ActionType> move in
                     rootSuccessors != null ? rootSuccessors : Game.GetSuccessors(initialState))
            {
                // invoke the the alpha-beta search to estimate the utility value of the move's ending state
                float utility = GetUtilityEstimate(move.State, best0, best1, 1, player);

                // we'll use Failed to indicate that the limiter caused the search to abort
                if (searchAborted)
                {
                    return(SearchResult.Failed);
                }

                // the alpha-beta search returns the utility of the move from the perspective of the player whose turn it is in
                // the state resulting from the move. so find out which player that is.
                int otherPlayer = Game.GetPlayerToMove(move.State);

                // if the player moving at the successor is not the same as the player moving at the root, then we need to
                // reverse the utility. however, if the move led to a chance node, represented as a "player" of -1, the utility
                // is already from the correct viewpoint, so we don't need to reverse it.
                if (player != otherPlayer && otherPlayer != -1)
                {
                    utility = -utility;
                }

                // if we're doing an iterative deepening search, store the utility of this move
                if (rootUtilities != null)
                {
                    rootUtilities[index++] = utility;
                }

                // if the move is better than whatever we've got so far, store the move as the new best move
                if (utility > bestUtility)
                {
                    bestUtility = utility;
                    // we'll store the best move in the "solution" parameter
                    solution = new StateActionPair <StateType, ActionType>(move.State, move.Action);
                    // also, if the move was so good that it got the maximum utility, there's no point in searching further, so
                    // we'll end the search immediately
                    if (utility == MaxUtility)
                    {
                        // if a terminal state was reached in the line of search that led to this maximal utility, then we don't
                        // need another round of iterative deepening search because there's no doubt about this particular move,
                        // and since this move is optimal, that's all that matters
                        if (wasTerminal)
                        {
                            depthLimitHit = false;
                        }
                        break;
                    }

                    // update the alpha-beta values for the player at the root
                    if (player == 0)
                    {
                        if (utility > best0)
                        {
                            best0 = utility;
                        }
                    }
                    else
                    {
                        if (utility > best1)
                        {
                            best1 = utility;
                        }
                    }
                }
            }

            // if we're doing iterative deepening, sort the moves by utility
            if (rootSuccessors != null)
            {
                SortRootSuccessors();
            }

            this.limiter = null; // release the limiter

            return(depthLimitHit ? SearchResult.LimitReached : SearchResult.Success);
        }
Exemple #25
0
 /// <include file="documentation.xml" path="/AI/Search/GameSearchBase/PerformSearch/*"/>
 protected abstract SearchResult PerformSearch(StateType initialState, SearchLimiter limiter,
                                               out StateActionPair <StateType, ActionType> solution);
Exemple #26
0
 /// <include file="documentation.xml" path="/AI/Search/GameSearchBase/IterativeDeepeningSearch/*"/>
 /// <include file="documentation.xml" path="/AI/Search/ISearch/SearchCommon/param[@name='limiter']"/>
 public SearchResult IterativeDeepeningSearch(SearchLimiter limiter,
                                              out StateActionPair <StateType, ActionType> solution)
 {
     return(IterativeDeepeningSearch(game.GetInitialState(), limiter, out solution));
 }
 /// <summary>
 /// Sets the eligibility value of the given <see cref="T:StateActionPair"/>.
 /// </summary>
 /// <param name="saPair">The <see cref="T:StateActionPair"/> whose eligibility value is set.</param>
 /// <param name="eligibility">The eligibility value to set.</param>
 public void SetEligibilityValue(StateActionPair saPair, float eligibility)
 {
     eligibilityTable[saPair] = eligibility;
 }
Exemple #28
0
 /// <summary>
 /// Sets the Q-value for the given <see cref="T:StateActionPair"/>.
 /// </summary>
 /// <param name="saPair">The <see cref="T:StateActionPair"/>.</param>
 /// <param name="reward">The Q-value (expected accumulated reward) to set.</param>
 public void setQValue(StateActionPair saPair, float reward)
 {
     _qTable[saPair] = reward;
 }
Exemple #29
0
 /// <include file="documentation.xml" path="/AI/Search/ISearch/Search/*"/>
 public sealed override SearchResult Search(SearchLimiter limiter,
                                            out StateActionPair <StateType, ActionType> solution)
 {
     return(Search(game.GetInitialState(), limiter, out solution));
 }
Exemple #30
0
 /// <include file="documentation.xml" path="/AI/Search/GameSearchBase/IterativeDeepeningSearch/*"/>
 /// <include file="documentation.xml" path="/AI/Search/SearchBase/Search_Timeout/param[@name='msTimeLimit']"/>
 /// <include file="documentation.xml" path="/AI/Search/ISearch/Search_State/param[@name = 'initialState']"/>
 public SearchResult IterativeDeepeningSearch(StateType initialState, int msTimeLimit,
                                              out StateActionPair <StateType, ActionType> solution)
 {
     return(IterativeDeepeningSearch(initialState, msTimeLimit == Infinite ? null : new TimeLimiter(msTimeLimit),
                                     out solution));
 }