internal void RemoveState(TStateKey stateKey) { var predecessorQueue = new NativeQueue <TStateKey>(Allocator.Temp); // State Info StateInfoLookup.Remove(stateKey); // Actions if (ActionLookup.TryGetFirstValue(stateKey, out var actionKey, out var actionIterator)) { do { var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey); // Action Info ActionInfoLookup.Remove(stateActionPair); // Results if (ResultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var resultIterator)) { do { // Remove Predecessor Link if (PredecessorGraph.TryGetFirstValue(resultingStateKey, out var predecessorKey, out var predecessorIterator)) { predecessorQueue.Clear(); do { if (!stateKey.Equals(predecessorKey)) { predecessorQueue.Enqueue(predecessorKey); } } while (PredecessorGraph.TryGetNextValue(out predecessorKey, ref predecessorIterator)); // Reset Predecessors PredecessorGraph.Remove(resultingStateKey); // Requeue Predecessors while (predecessorQueue.TryDequeue(out var queuedPredecessorKey)) { PredecessorGraph.Add(resultingStateKey, queuedPredecessorKey); } } // Action Result Info StateTransitionInfoLookup.Remove(new StateTransition <TStateKey, TActionKey>(stateKey, stateActionPair.ActionKey, resultingStateKey)); } while (ResultingStateLookup.TryGetNextValue(out resultingStateKey, ref resultIterator)); ResultingStateLookup.Remove(stateActionPair); } } while (ActionLookup.TryGetNextValue(out actionKey, ref actionIterator)); ActionLookup.Remove(stateKey); } // Predecessors PredecessorGraph.Remove(stateKey); predecessorQueue.Dispose(); }
bool UpdateStateValue(TStateKey stateKey, NativeMultiHashMap <TStateKey, TActionKey> actionLookup, NativeHashMap <TStateKey, StateInfo> stateInfoLookup, NativeHashMap <StateActionPair <TStateKey, TActionKey>, ActionInfo> actionInfoLookup) { var stateInfo = stateInfoLookup[stateKey]; // Handle case of no valid actions (mark complete) if (!actionLookup.TryGetFirstValue(stateKey, out var actionKey, out var iterator)) { if (!stateInfo.SubplanIsComplete) { // State was not marked terminal, so the value should be reset, as to not use the estimated value. stateInfo.CumulativeRewardEstimate = new BoundedValue(0, 0, 0); stateInfo.SubplanIsComplete = true; stateInfoLookup[stateKey] = stateInfo; return(true); } // Terminal state. No update required. return(false); } var originalValue = stateInfo.CumulativeRewardEstimate; var originalCompleteStatus = stateInfo.SubplanIsComplete; stateInfo.CumulativeRewardEstimate = new BoundedValue(float.MinValue, float.MinValue, float.MinValue); stateInfo.SubplanIsComplete = true; var maxLowerBound = float.MinValue; // Pick max action; find max lower bound do { var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey); var actionInfo = actionInfoLookup[stateActionPair]; stateInfo.CumulativeRewardEstimate = stateInfo.CumulativeRewardEstimate.Average < actionInfo.CumulativeRewardEstimate.Average ? actionInfo.CumulativeRewardEstimate : stateInfo.CumulativeRewardEstimate; maxLowerBound = math.max(maxLowerBound, actionInfo.CumulativeRewardEstimate.LowerBound); }while (actionLookup.TryGetNextValue(out actionKey, ref iterator)); // Update complete status (ignore pruned actions) actionLookup.TryGetFirstValue(stateKey, out actionKey, out iterator); do { var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey); var actionInfo = actionInfoLookup[stateActionPair]; if (actionInfo.CumulativeRewardEstimate.UpperBound >= maxLowerBound) { stateInfo.SubplanIsComplete &= actionInfo.SubplanIsComplete; } }while (actionLookup.TryGetNextValue(out actionKey, ref iterator)); // Reassign stateInfoLookup[stateKey] = stateInfo; return(!originalValue.Approximately(stateInfo.CumulativeRewardEstimate) || originalCompleteStatus != stateInfo.SubplanIsComplete); }
public bool TryGetOptimalAction(TStateKey stateKey, out TActionKey action) { action = default; bool actionsFound = ActionLookup.TryGetFirstValue(stateKey, out var actionKey, out var iterator); if (!actionsFound) { return(false); } var maxCumulativeReward = float.MinValue; do { var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey); ActionInfoLookup.TryGetValue(stateActionPair, out var actionInfo); if (actionInfo.CumulativeRewardEstimate.Average > maxCumulativeReward) { action = actionKey; maxCumulativeReward = actionInfo.CumulativeRewardEstimate.Average; } } while (ActionLookup.TryGetNextValue(out actionKey, ref iterator)); return(true); }
bool UpdateCumulativeReward(StateActionPair <TStateKey, TActionKey> stateActionPair, NativeMultiHashMap <StateActionPair <TStateKey, TActionKey>, TStateKey> resultingStateLookup, NativeHashMap <TStateKey, StateInfo> stateInfoLookup, NativeHashMap <StateActionPair <TStateKey, TActionKey>, ActionInfo> actionInfoLookup, NativeHashMap <StateTransition <TStateKey, TActionKey>, StateTransitionInfo> stateTransitionInfoLookup) { var actionInfo = actionInfoLookup[stateActionPair]; var originalValue = actionInfo.CumulativeRewardEstimate; var originalCompleteStatus = actionInfo.SubplanIsComplete; actionInfo.CumulativeRewardEstimate = default; actionInfo.SubplanIsComplete = true; resultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var iterator); do { var stateTransitionInfo = stateTransitionInfoLookup[new StateTransition <TStateKey, TActionKey>(stateActionPair, resultingStateKey)]; var resultingStateInfo = stateInfoLookup[resultingStateKey]; actionInfo.SubplanIsComplete &= resultingStateInfo.SubplanIsComplete; actionInfo.CumulativeRewardEstimate += stateTransitionInfo.Probability * (stateTransitionInfo.TransitionUtilityValue + DiscountFactor * resultingStateInfo.CumulativeRewardEstimate); } while (resultingStateLookup.TryGetNextValue(out resultingStateKey, ref iterator)); actionInfoLookup[stateActionPair] = actionInfo; return(!originalValue.Approximately(actionInfo.CumulativeRewardEstimate) || originalCompleteStatus != actionInfo.SubplanIsComplete); }
/// <summary> /// Returns the Q-value. /// </summary> /// <returns>The Q-value.</returns> /// <param name="saPair">The <see cref="T:StateActionPair"/> whose Q-value is returned.</param> public float getQValue(StateActionPair saPair) { if (!_qTable.ContainsKey(saPair)) { setQValue(saPair, initialValue); } return(_qTable[saPair]); }
/// <summary> /// Returns the eligibility value of the given <see cref="T:StateActionPair"/>. /// </summary> /// <returns>The eligibility value.</returns> /// <param name="saPair">The <see cref="T:StateActionPair"/> whose eligibility value is returned.</param> public float GetEligibilityValue(StateActionPair saPair) { if (!eligibilityTable.ContainsKey(saPair)) { SetEligibilityValue(saPair, initialValue); } return(eligibilityTable[saPair]); }
public void UpdateQValue(State fromState, State toState, AgentAction action, float reward) { StateActionPair destPair = new StateActionPair(toState, action); float learningRate = 1f; //1 / (numVisited[new StateActionPair(fromState, action)]++ + 1); QFunctionDic[fromState][destPair] = (1 - learningRate) * QFunctionDic[fromState][destPair] + learningRate * (reward + discountFactor * QMax(toState)); Debug.Log("(FROM: " + fromState.position[0] + ", " + fromState.position[2] + " TO: " + toState.position[0] + ", " + toState.position[2] + "): " + QFunctionDic[fromState][destPair]); }
void WriteEdgeToState(TStateKey precedingStateKey, TActionKey actionKey, StateTransitionInfo stateTransitionInfo, TStateKey resultingStateKey) { var stateActionPair = new StateActionPair <TStateKey, TActionKey>(precedingStateKey, actionKey); ActionLookup.Add(precedingStateKey, actionKey); ActionInfoLookup.TryAdd(stateActionPair, default); ResultingStateLookup.Add(stateActionPair, resultingStateKey); StateTransitionInfoLookup.TryAdd(new StateTransition <TStateKey, TActionKey>(stateActionPair, resultingStateKey), stateTransitionInfo); PredecessorGraph.Add(resultingStateKey, precedingStateKey); }
public override bool Equals(object obj) { if (obj is StateActionPair) { StateActionPair sec = (StateActionPair)obj; return(this.m_actionIndex == sec.m_actionIndex && this.m_stateString == sec.m_stateString); } return(false); }
/// <include file="documentation.xml" path="/AI/Search/GameSearchBase/IterativeDeepeningSearch/*"/> /// <include file="documentation.xml" path="/AI/Search/ISearch/SearchCommon/param[@name='limiter']"/> /// <include file="documentation.xml" path="/AI/Search/ISearch/Search_State/param[@name='initialState']"/> public SearchResult IterativeDeepeningSearch(StateType initialState, SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution) { SearchResult result; int userDepthLimit = DepthLimit; // save the original depth limit so we can restore it later if (limiter == null) // if we have an no limit, we might as well do a regular search { DepthLimit = Infinite; // with unlimited depth because we have unlimited time result = Search(initialState, limiter, out solution); } else { PrepareToStartSearch(initialState); // otherwise, verify that the search is valid if (limiter != null) { limiter.Start(); } BeginIterativeDeepeningSearch(initialState); result = SearchResult.Failed; // assume that we couldn't complete a single iteration solution = new StateActionPair <StateType, ActionType>(); // gradually increase the depth limit, starting from 1 for (DepthLimit = 1; ; DepthLimit = DepthLimit == int.MaxValue ? Infinite : DepthLimit + 1) { // start a new search with the given depth limit, and run it until it completes or the time expires StateActionPair <StateType, ActionType> currentSolution; SearchResult currentResult = PerformSearch(initialState, limiter, out currentSolution); // Failed, in this case, means that the search couldn't complete because of the limiter, while LimitReached // means that the search completed but was limited by the depth limit if (currentResult == SearchResult.Failed) { break; } // the search completed, so store the result and solution result = currentResult; solution = currentSolution; // if the search was not limited by depth, increasing the depth won't help, so we're done if (currentResult != SearchResult.LimitReached) { break; } } EndIterativeDeepeningSearch(); } DepthLimit = userDepthLimit; // restore the previous depth limit return(result); }
public static void GetExpandedDepthMap <TStateKey, TStateInfo, TActionKey, TActionInfo, TStateTransitionInfo>(this PlanGraph <TStateKey, TStateInfo, TActionKey, TActionInfo, TStateTransitionInfo> planGraph, TStateKey rootKey, NativeHashMap <TStateKey, int> depthMap, NativeQueue <StateHorizonPair <TStateKey> > queue) where TStateKey : struct, IEquatable <TStateKey> where TStateInfo : struct, IStateInfo where TActionKey : struct, IEquatable <TActionKey> where TActionInfo : struct, IActionInfo where TStateTransitionInfo : struct { depthMap.Clear(); queue.Clear(); var actionLookup = planGraph.ActionLookup; var resultingStateLookup = planGraph.ResultingStateLookup; depthMap.TryAdd(rootKey, 0); queue.Enqueue(new StateHorizonPair <TStateKey> { StateKey = rootKey, Horizon = 0 }); while (queue.TryDequeue(out var stateHorizonPair)) { var stateKey = stateHorizonPair.StateKey; var horizon = stateHorizonPair.Horizon; var nextHorizon = horizon + 1; if (actionLookup.TryGetFirstValue(stateKey, out var actionKey, out var iterator)) { do { var stateActionPair = new StateActionPair <TStateKey, TActionKey>(stateKey, actionKey); if (resultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var resultIterator)) { do { // Skip unexpanded states if (!actionLookup.TryGetFirstValue(resultingStateKey, out _, out _)) { continue; } // first add will be most shallow due to BFS if (depthMap.TryAdd(resultingStateKey, nextHorizon)) { queue.Enqueue(new StateHorizonPair <TStateKey>() { StateKey = resultingStateKey, Horizon = nextHorizon }); } } while (resultingStateLookup.TryGetNextValue(out resultingStateKey, ref resultIterator)); } } while (actionLookup.TryGetNextValue(out actionKey, ref iterator)); } } }
public StateActionPair GetNextStateActionPair(State fromState) { StateActionPair pair = qFunction.ProbPickAction(fromState); float reward = states[pair.state].reward; if (states[pair.state].isGoal) { Debug.Log("AGENT IN GOAL"); isAgentInGoalState = true; episodeText.text = "Episode: " + ++curEpisode; } qFunction.UpdateQValue(fromState, pair.state, pair.action, reward); return(pair); }
/// <include file="documentation.xml" path="/AI/Search/ISearch/Search_State/*"/> public sealed override SearchResult Search(StateType initialState, SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution) { PrepareToStartSearch(initialState); if (limiter != null) { limiter.Start(); } // from PerformSearch(), Failed means that the limiter caused the search to abort, while LimitReached means that the // search completed, but was truncated by the depth limit. we'll convert Failed to LimitReached here. SearchResult result = PerformSearch(initialState, limiter, out solution); return(result == SearchResult.Failed ? SearchResult.LimitReached : result); }
/// <summary>Given a parent node and a <see cref="StateActionPair{S,A}"/> of one of its sucessors or predecessors, /// returns a new <see cref="Node{S,A}"/> object representing the state. /// </summary> Node <StateType, ActionType> MakeNode(Node <StateType, ActionType> parent, StateActionPair <StateType, ActionType> pair) { Node <StateType, ActionType> node = new Node <StateType, ActionType>(); node.Action = pair.Action; node.PathCost = parent.PathCost + Problem.GetActionCost(parent.State, pair.State, pair.Action); node.Depth = parent.Depth + 1; node.Parent = parent; node.State = pair.State; if (useHeuristic) { node.HeuristicCost = Problem.GetHeuristic(pair.State); } return(node); }
public StateRewardPair getStateRewardPair(StateActionPair saPair) { if (!_modelTable.ContainsKey(saPair)) { // If the StateRewardPair is not yet in the table, // it has to be saved. // Therefore the possible next following state has to be saevd in a new // StateRewardPair. // To do so, all possible Movements for the next state have to extracted // -> new method in StateExtractor, that retrieves the possible Movements // for any State or Position. // Independently of the mower! _modelTable[saPair] = new StateRewardPair(saPair.state, _initVal); } return(_modelTable[saPair]); }
bool UpdateSubplanCompleteStatus(StateActionPair <TStateKey, TActionKey> stateActionPair, out ActionInfo updatedActionInfo) { updatedActionInfo = planGraph.ActionInfoLookup[stateActionPair]; var originalCompleteStatus = updatedActionInfo.SubplanIsComplete; updatedActionInfo.SubplanIsComplete = true; // Update complete status planGraph.ResultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingStateKey, out var iterator); do { updatedActionInfo.SubplanIsComplete &= planGraph.StateInfoLookup[resultingStateKey].SubplanIsComplete; }while (planGraph.ResultingStateLookup.TryGetNextValue(out resultingStateKey, ref iterator)); return(originalCompleteStatus != updatedActionInfo.SubplanIsComplete); }
/// <summary> /// Returns the best action (with highest expected reward) for the given state. /// </summary> /// <returns>The best action to take in the given state.</returns> /// <param name="state">The given state.</param> public MovementAction getBestActionForState(State state) { float bestReward = float.MinValue; MovementAction bestAction = null; foreach (MovementAction action in CurActions) { StateActionPair sap = new StateActionPair(state, action); float reward = this.getQValue(sap); if (bestAction == null || reward > bestReward) { bestAction = action; bestReward = reward; } } return(bestAction); }
public ActionContext UpdateInfo(int?visitCount = null, bool?complete = null, float3?cumulativeReward = null) { var actionInfoLookup = Builder.planGraph.ActionInfoLookup; var stateActionPair = new StateActionPair <TStateKey, TActionKey>(StateKey, ActionKey); if (!actionInfoLookup.TryGetValue(stateActionPair, out var actionInfo)) { throw new ArgumentException($"Action {ActionKey} for state {StateKey} does not exist in the plan graph."); } actionInfo.SubplanIsComplete = complete ?? actionInfo.SubplanIsComplete; actionInfo.CumulativeRewardEstimate = cumulativeReward ?? actionInfo.CumulativeRewardEstimate; actionInfoLookup.Remove(stateActionPair); actionInfoLookup.TryAdd(stateActionPair, actionInfo); return(this); }
/// <include file="documentation.xml" path="/AI/Search/GameSearchBase/PerformSearch/*"/> protected override SearchResult PerformSearch(StateType initialState, SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution) { solution = new StateActionPair <StateType, ActionType>(); float bestUtility = float.NegativeInfinity; int player = Game.GetPlayerToMove(initialState); this.limiter = limiter; depthLimitHit = searchAborted = false; foreach (Move <StateType, ActionType> move in Game.GetSuccessors(initialState)) // for each move available { // get the estimated utilities of the move for all players float[] utilities = GetExpectedUtilities(move.State, 1); if (searchAborted) { return(SearchResult.Failed); // if the limiter caused an abort, just return immediately } // if the estimated utility of this move for the player moving at the root is the best yet, save it if (utilities[player] > bestUtility) { bestUtility = utilities[player]; solution = new StateActionPair <StateType, ActionType>(move.State, move.Action); if (bestUtility == MaxUtility) // if the given move is optimal, we don't need to search further { // if a terminal state was reached in the line of search that led to this maximal utility, then we don't // need another round of iterative deepening search because there's no doubt about this particular move, // and since this move is optimal, that's all that matters if (wasTerminal) { depthLimitHit = false; } break; } } } this.limiter = null; // release the limiter return(depthLimitHit ? SearchResult.LimitReached : SearchResult.Success); }
/// <inheritdoc cref="IPlan"/> public int GetResultingStates(TStateKey planStateKey, TActionKey actionKey, IList <TStateKey> resultingPlanStateKeys) { planData.CompletePlanningJobs(); resultingPlanStateKeys?.Clear(); var count = 0; var stateActionPair = new StateActionPair <TStateKey, TActionKey>(planStateKey, actionKey); var resultingStateLookup = planData.PlanGraph.ResultingStateLookup; if (resultingStateLookup.TryGetFirstValue(stateActionPair, out var resultingState, out var iterator)) { do { resultingPlanStateKeys?.Add(resultingState); count++; } while (resultingStateLookup.TryGetNextValue(out resultingState, ref iterator)); } return(count); }
// Update is called once per frame void Update() { nextMoveTime += Time.deltaTime; if (environment.curEpisode < environment.numEpisodes && nextMoveTime >= slowDownTime) { nextMoveTime = 0; if (!environment.isAgentInGoalState && environment != null) { print("Getting next action"); StateActionPair nextStateActionPair = environment.GetNextStateActionPair(curState); curState = nextStateActionPair.state; Move(nextStateActionPair.action); } else { MoveToInitialState(); environment.isAgentInGoalState = false; } } }
/// <summary> /// Initializes a new instance of the <see cref="T:EligibilityTable+EligibilityTableKeyValuePair"/> class. /// </summary> /// <param name="saPair">The <see cref="T:StateExtractor+StateActionPair"/> key.</param> /// <param name="floatValue">The float value.</param> public EligibilityTableKeyValuePair(StateActionPair stateActionPair, float floatValue) : base(stateActionPair, floatValue) { }
/// <summary> /// Learn using the observed reward. /// </summary> /// <param name="reward">The observed reward.</param> public override void Learn(float reward) { if (_showParamLabel) { _receivers[0].InitialValues(Greediness, DiscountValue, LearnRate, InitialQValue, Run_with_etraces, Gamma, ModelPlanning, Refined, N); } // Old state StateActionPair oldSAP = new StateActionPair(_lastState, _lastAction); float oldQvalue = _qTable.getQValue(oldSAP); // Current state State currentState = base.CurrentState; MovementAction bestCurrentAction = _qTable.getBestActionForState(currentState); StateActionPair currentSAP = new StateActionPair(currentState, bestCurrentAction); float bestCurrentQValue = _qTable.getQValue(currentSAP); if (Run_with_etraces) { float delta = reward + DiscountValue * bestCurrentQValue - oldQvalue; _eTable.SetEligibilityValue(oldSAP, 1f); _qTable.AddScaledValues(LearnRate * delta, _eTable); _eTable.ScaleAllEligibilityValues(DiscountValue * Gamma); } else { // Standard QLearning float newQValue = Mathf.Lerp(oldQvalue, reward + (DiscountValue * bestCurrentQValue), LearnRate); _qTable.setQValue(oldSAP, newQValue); } // Refined Model if (ModelPlanning && Refined) { // Update the model according to the observed state _mTable.IncorporateObservedState(currentState); State virtualFromState, virtualToState; MovementAction virtualPerformedAction; float virtualReward; // Perfrom N virtual steps for (int i = 0; i < N; i++) { // Generated the virtual step bool virtualStepGenerated = _mTable.GenerateRandomModelStep(out virtualFromState, out virtualPerformedAction, out virtualToState, out virtualReward); if (virtualStepGenerated) { StateActionPair virtualFromSAP = new StateActionPair(virtualFromState, virtualPerformedAction); // Standard QLearning float fromStateQVal = _qTable.getQValue(virtualFromSAP); // Get the best action after the virtual step MovementAction bestAction = _qTable.getBestActionForState(virtualToState); StateActionPair virtualToSAP = new StateActionPair(virtualToState, bestAction); // Q value update for the virtual step float toStateQVal = _qTable.getQValue(virtualToSAP); float newQVal = Mathf.Lerp(fromStateQVal, virtualReward + (DiscountValue * toStateQVal), LearnRate); _qTable.setQValue(virtualFromSAP, newQVal); } } } // DynaQ Model if (ModelPlanning && !Refined) { _simpleMTable.setStateRewardPairAtStateActionPair(oldSAP, new StateRewardPair(currentState, reward)); for (int i = 0; i < N; i++) { StateActionPair randSAP = _qTable.getRandomVisitedStateAndAction(); StateRewardPair srp = _simpleMTable.getStateRewardPair(randSAP); // Standard QLearning float qVal = _qTable.getQValue(randSAP); //MovementAction bAct = _qTable.getBestActionForState(currentState); MovementAction bAct = _qTable.getBestActionForState(srp.State); // new (current) parameters //StateActionPair cSAP = new StateActionPair(currentState, bestCurrentAction); StateActionPair cSAP = new StateActionPair(srp.State, bAct); float bQVal = _qTable.getQValue(cSAP); float newQVal = Mathf.Lerp(qVal, srp.Reward + (DiscountValue * bQVal), LearnRate); _qTable.setQValue(randSAP, newQVal); } } }
/// <include file="documentation.xml" path="/AI/Search/GameSearchBase/PerformSearch/*"/> protected override SearchResult PerformSearch(StateType initialState, SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution) { solution = new StateActionPair <StateType, ActionType>(); // If we're performing an iterative deepening search, rootUtilities and rootSuccessors will be non-null. In that // case, we should use them to store utilities and retrieve successor nodes. The purpose is to allow some state, // in particular move ordering at the root, to be retained between iterations of the iterative deepening search. // The rootUtilities array is used to sort the rootSuccessors array so that the best moves from the previous // iteration can be tried first. // // TODO: should we extend this idea to store the best moves from each ply in the search? // TODO: we should probably also implement the killer heuristic and/or history heuristics (this requires some // more complex communication between the game and the search) // TODO: we should implement the scout part of negascout // this url is quite good: http://www.fierz.ch/strategy.htm if (rootUtilities != null) { for (int i = 0; i < rootUtilities.Length; i++) { rootUtilities[i] = float.PositiveInfinity; } } this.limiter = limiter; depthLimitHit = wasTerminal = searchAborted = false; float bestUtility = float.NegativeInfinity, best0 = float.NegativeInfinity, best1 = float.NegativeInfinity; int player = Game.GetPlayerToMove(initialState); int index = 0; // the index of the successor within rootSuccessor that we're currently examining foreach (Move <StateType, ActionType> move in rootSuccessors != null ? rootSuccessors : Game.GetSuccessors(initialState)) { // invoke the the alpha-beta search to estimate the utility value of the move's ending state float utility = GetUtilityEstimate(move.State, best0, best1, 1, player); // we'll use Failed to indicate that the limiter caused the search to abort if (searchAborted) { return(SearchResult.Failed); } // the alpha-beta search returns the utility of the move from the perspective of the player whose turn it is in // the state resulting from the move. so find out which player that is. int otherPlayer = Game.GetPlayerToMove(move.State); // if the player moving at the successor is not the same as the player moving at the root, then we need to // reverse the utility. however, if the move led to a chance node, represented as a "player" of -1, the utility // is already from the correct viewpoint, so we don't need to reverse it. if (player != otherPlayer && otherPlayer != -1) { utility = -utility; } // if we're doing an iterative deepening search, store the utility of this move if (rootUtilities != null) { rootUtilities[index++] = utility; } // if the move is better than whatever we've got so far, store the move as the new best move if (utility > bestUtility) { bestUtility = utility; // we'll store the best move in the "solution" parameter solution = new StateActionPair <StateType, ActionType>(move.State, move.Action); // also, if the move was so good that it got the maximum utility, there's no point in searching further, so // we'll end the search immediately if (utility == MaxUtility) { // if a terminal state was reached in the line of search that led to this maximal utility, then we don't // need another round of iterative deepening search because there's no doubt about this particular move, // and since this move is optimal, that's all that matters if (wasTerminal) { depthLimitHit = false; } break; } // update the alpha-beta values for the player at the root if (player == 0) { if (utility > best0) { best0 = utility; } } else { if (utility > best1) { best1 = utility; } } } } // if we're doing iterative deepening, sort the moves by utility if (rootSuccessors != null) { SortRootSuccessors(); } this.limiter = null; // release the limiter return(depthLimitHit ? SearchResult.LimitReached : SearchResult.Success); }
/// <include file="documentation.xml" path="/AI/Search/GameSearchBase/PerformSearch/*"/> protected abstract SearchResult PerformSearch(StateType initialState, SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution);
/// <include file="documentation.xml" path="/AI/Search/GameSearchBase/IterativeDeepeningSearch/*"/> /// <include file="documentation.xml" path="/AI/Search/ISearch/SearchCommon/param[@name='limiter']"/> public SearchResult IterativeDeepeningSearch(SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution) { return(IterativeDeepeningSearch(game.GetInitialState(), limiter, out solution)); }
/// <summary> /// Sets the eligibility value of the given <see cref="T:StateActionPair"/>. /// </summary> /// <param name="saPair">The <see cref="T:StateActionPair"/> whose eligibility value is set.</param> /// <param name="eligibility">The eligibility value to set.</param> public void SetEligibilityValue(StateActionPair saPair, float eligibility) { eligibilityTable[saPair] = eligibility; }
/// <summary> /// Sets the Q-value for the given <see cref="T:StateActionPair"/>. /// </summary> /// <param name="saPair">The <see cref="T:StateActionPair"/>.</param> /// <param name="reward">The Q-value (expected accumulated reward) to set.</param> public void setQValue(StateActionPair saPair, float reward) { _qTable[saPair] = reward; }
/// <include file="documentation.xml" path="/AI/Search/ISearch/Search/*"/> public sealed override SearchResult Search(SearchLimiter limiter, out StateActionPair <StateType, ActionType> solution) { return(Search(game.GetInitialState(), limiter, out solution)); }
/// <include file="documentation.xml" path="/AI/Search/GameSearchBase/IterativeDeepeningSearch/*"/> /// <include file="documentation.xml" path="/AI/Search/SearchBase/Search_Timeout/param[@name='msTimeLimit']"/> /// <include file="documentation.xml" path="/AI/Search/ISearch/Search_State/param[@name = 'initialState']"/> public SearchResult IterativeDeepeningSearch(StateType initialState, int msTimeLimit, out StateActionPair <StateType, ActionType> solution) { return(IterativeDeepeningSearch(initialState, msTimeLimit == Infinite ? null : new TimeLimiter(msTimeLimit), out solution)); }