public State CalculateState(Location a, IGameState state) { State s = new State(); Location l; Tile t; for (int x = -radius; x <= radius; x++) { for (int y = -radius; y <= radius; y++) { if (x == 0 && y == 0) continue; l = (a + new Location(y, x)) % new Location(state.Height, state.Width); t = state[l]; if (t == Tile.Ant) { if(state.MyAnts.Contains(new Ant(l.Row, l.Col, state.MyAnts[0].Team))) s.MyAnt = true; else s.EnemyAnt = true; } else if (t == Tile.Food) s.Food = true; else if (t == Tile.Hill) { if (state.MyHills.Contains(new AntHill(l.Row, l.Col, state.MyHills[0].Team))) s.MyHill = true; else s.EnemyHill = true; } } } s.AirSuperiority = state.MyAnts.Count > state.EnemyAnts.Count; return s; }
public static State FromInt(int n) { State x = new State(); x.Food = (n % 2 == 1); n /= 2; x.EnemyAnt = (n % 2 == 1); n /= 2; x.MyAnt = (n % 2 == 1); n /= 2; x.EnemyHill = (n % 2 == 1); n /= 2; x.MyHill = (n % 2 == 1); n /= 2; x.AirSuperiority = (n % 2 == 1); n /= 2; return x; }
public void PerformAction(State s, Action a) { decisionLog.AddDecision(s, a); currentAction = a; }
public void PerformAction(Agent agent, State state, Action action, IGameState gamestate) { Location tgt = null; switch (action) { case Action.RunAwayFromEnemy: case Action.AttackEnemyAnt: tgt = GetNearestEnemy(agent.location, gamestate); break; case Action.AttackEnemyHill: tgt = GetNearestEnemyHill(agent.location, gamestate); break; case Action.DefendHill: tgt = GetNearestHill(agent.location, gamestate); break; case Action.RandomMove: tgt = GetRandomLocation(agent.location, gamestate); break; case Action.RunAwayFromFriend: case Action.GoToFriend: tgt = GetNearestFriend(agent.location, gamestate); break; case Action.StandStill: tgt = agent.location; break; case Action.TakeFood: tgt = GetNearestFood(agent.location, gamestate); break; default: break; } if (tgt == null) throw new Exception("B tgt is null >>> " + action); if (action == Action.RunAwayFromEnemy || action == Action.RunAwayFromFriend) { tgt = (2 * agent.location - tgt) % new Location(gamestate.Height, gamestate.Width); } if (tgt == null) throw new Exception("A tgt is null >>> " + action); tgt = GetNearestPassable(tgt, gamestate); agent.path = Pathfinding.FindPath(agent.location, tgt, gamestate); if (agent.path == null) agent.path = new List<Location>(); agent.PerformAction(state, action); }
public void AddResult(State s1, Action a, double reward, int freq) { int hash = s1.GetHashCode(); double n = (double)Frequencies[hash, (int)a]; double n2 = n + (double)freq; ExpectedReward[hash, (int)a] = ExpectedReward[hash, (int)a] * (n / n2) + reward * ((double)freq / n2); Frequencies[hash, (int)a] += freq; }
public double[] GetDesirabilities(State s1) { return Desirability[s1.GetHashCode()]; }
public double MinRelativeQ(State state) { double minQ = double.MaxValue; foreach (Action action in actions) { double q = Qrelative[state][action]; if (q < minQ) minQ = q; } return minQ; }
public void AddDecision(State s1, Action a) { Decisions.AddLast(new Tuple<State, Action>(s1, a)); if (!Rewards.ContainsKey(s1)) { Rewards.Add(s1, new Dictionary<Action, double>()); Frequencies.Add(s1, new Dictionary<Action, int>()); } if (!Rewards[s1].ContainsKey(a)) { Rewards[s1].Add(a, 0); Frequencies[s1].Add(a, 0); } Frequencies[s1][a]++; }
/// <summary> /// Gets an action that is possible in a particular state. /// </summary> /// <param name="state">The state to choose an action in.</param> /// <param name="rho">The probability of choosing a random action.</param> /// <returns>the best possible action in State <paramref name="state"/> with probability /// <paramref name="rho"/> to return a random action.</returns> public Action GetAction(State state, float rho) { List<Action> actions = new List<Action> { Action.North, Action.East, Action.South, Action.West, Action.None }; if (this.random.NextDouble() < rho) return PickRandomAction(actions); else return this.store.GetBestAction(state); }
public void initRelativeQ() { if (Qrelative != null) return; Dictionary<int, Dictionary<int, double>> knownQ = new Dictionary<int, Dictionary<int, double>>(); if (File.Exists(logRelative)) { // Read the known absolute Q-values from the log. FileStream fs = new FileStream(logRelative, FileMode.Open, FileAccess.Read); StreamReader sr = new StreamReader(fs); string line; while ((line = sr.ReadLine()) != null) { string[] parts = line.Split(','); int stateCode = int.Parse(parts[0]); int actionCode = int.Parse(parts[1]); double q = double.Parse(parts[2]); if (!knownQ.ContainsKey(stateCode)) { knownQ[stateCode] = new Dictionary<int, double>(); } knownQ[stateCode][actionCode] = q; } sr.Close(); fs.Close(); } // Initialize the absolute Q-table, taking known Q-values into account. Qrelative = new Dictionary<State, Dictionary<Action, double>>(); // Determine all possible actions, that is [towards] and [away from] for every state param. foreach (StateParameter parameter in (StateParameter[]) Enum.GetValues(typeof(StateParameter))) { Action a = new Action(parameter, ActionDirection.Towards); actions.Add(a); a = new Action(parameter, ActionDirection.AwayFrom); actions.Add(a); } // This code is not actually prepared to handle multiple StateParameters, // but this is not an issue because right now we only have one anyway: OwnAnt. Dictionary<StateParameter, int> distances = new Dictionary<StateParameter, int>(); for (int i = 0; i <= maxDistance; i++) { distances[StateParameter.OwnAnt] = i; State s = new State(new Dictionary<StateParameter, int>(distances)); Qrelative[s] = new Dictionary<Action, double>(); foreach (Action a in actions) { int stateCode = s.GetHashCode(); int actionCode = a.GetHashCode(); double q = 0.0; // Take known Q-value into account if (knownQ.ContainsKey(stateCode) && knownQ[stateCode].ContainsKey(actionCode)) { q = knownQ[stateCode][actionCode]; } Qrelative[s][a] = q; } } }
/// <summary> /// Gets the best possible action in a certain state. /// </summary> /// <param name="s">The state to choose an action in.</param> /// <returns>Returns the action with the highest Q-value. If all values are zero then /// a random value will be returned.</returns> public Action GetBestAction(State s) { float maxQ = float.MinValue; Action result = default(Action); List<Action> bestActions = new List<Action>(); if (this.Set.ContainsKey(s)) { foreach (Action a in Enum.GetValues(typeof(Action))) { float Q = this.Set[s][a]; if (Q > maxQ) { bestActions = new List<Action>(); maxQ = Q; bestActions.Add(a); } else if (Q == maxQ) { bestActions.Add(a); } } //pick random action from list of best actions result = bestActions[random.Next(bestActions.Count)]; } else { result = (Action)random.Next(5); } return result; }
public void Add(State s, QSetItem item) { this.Set.Add(s, item); }
public float this[State s, Action a] { get { if (this.Set.ContainsKey(s)) return this.Set[s][a]; return 0; } set { if (this.Set.ContainsKey(s)) { this.Set[s][a] = value; } else { QSetItem newItem = new QSetItem(); newItem[a] = value; this.Set.Add(s, newItem); } } }
/// <summary> /// Porcesses a given reward. /// </summary> /// <param name="reward">The reward that was earned by taking the action.</param> /// <param name="oldState">The state before the <paramref name="action"/> was taken.</param> /// <param name="newState">The state we ended up in after taking the <paramref name="action"/>.</param> /// <param name="action">The action that was taken.</param> /// <param name="alpha">The Learning rate.</param> /// <param name="gamma">The Discount rate.</param> public void ProcessReward(float reward, State oldState, State newState, Action action, float alpha, float gamma) { float Q = store[oldState, action]; float maxQ = store[newState, store.GetBestAction(newState)]; Q = (1 - alpha) * Q + alpha * (reward + gamma * maxQ); store[oldState, action] = Q; }
//file format: //1 int (state) //5 floats (5 Q-values for the actions) /// <summary> /// Loads a Q-learning file containing Q-values. /// </summary> /// <param name="filename">The path of the Q-learning file.</param> public void LoadFile(string filename) { FileStream fs = File.Open(filename, FileMode.OpenOrCreate); BinaryReader br = new BinaryReader(fs); while (fs.Position < fs.Length) { State state = new State(br.ReadUInt64()); QSetItem newItem = new QSetItem(); newItem[Action.North] = br.ReadSingle(); newItem[Action.South] = br.ReadSingle(); newItem[Action.East] = br.ReadSingle(); newItem[Action.West] = br.ReadSingle(); newItem[Action.None] = br.ReadSingle(); this.store.Add(state, newItem); } br.Close(); fs.Close(); }