/// <summary> /// Gets an action that is possible in a particular state. /// </summary> /// <param name="state">The state to choose an action in.</param> /// <param name="rho">The probability of choosing a random action.</param> /// <returns>the best possible action in State <paramref name="state"/> with probability /// <paramref name="rho"/> to return a random action.</returns> public Action GetAction(State state, float rho) { List<Action> actions = new List<Action> { Action.North, Action.East, Action.South, Action.West, Action.None }; if (this.random.NextDouble() < rho) return PickRandomAction(actions); else return this.store.GetBestAction(state); }
static void Main(string[] args) { /*#if DEBUG System.Diagnostics.Debugger.Launch(); while (!System.Diagnostics.Debugger.IsAttached) { } #endif*/ string learnFile = args[0]; string lastStateFile = args[1]; string gamelog = args[2]; float alpha = float.Parse(args[3], System.Globalization.CultureInfo.InvariantCulture); float gamma = float.Parse(args[4], System.Globalization.CultureInfo.InvariantCulture); //initialize Q-Learning QLearning learn = new QLearning(); learn.LoadFile(learnFile); int reward = GetReward(gamelog); FileStream fs = new FileStream(lastStateFile, FileMode.Open); BinaryReader br = new BinaryReader(fs); //read all last states from the laststate file. // //file format: //ulong (state before action was taken) //byte (action that was taken) //ulong (state after action was taken) while (fs.Position < fs.Length) { State oldState = new State((uint)br.ReadInt64()); Action action = (Action)br.ReadByte(); State newState = new State((uint)br.ReadInt64()); //process reward for every action taken in last turn learn.ProcessReward(reward, oldState, newState, action, alpha, gamma); } br.Close(); fs.Close(); learn.SaveFile(learnFile); //check whether a log file is passed as a parameter to the program, if so: //write the reward and length of the game to the log file. if (args.Length > 5) { string log = args[5]; using(StreamWriter sw = new StreamWriter(log, true)) { sw.WriteLine(reward + "\t" + GetGameLength(gamelog)); } } }
/// <summary> /// Gets the best possible action in a certain state. /// </summary> /// <param name="s">The state to choose an action in.</param> /// <returns>Returns the action with the highest Q-value. If all values are zero then /// a random value will be returned.</returns> public Action GetBestAction(State s) { float maxQ = float.MinValue; Action result = default(Action); List<Action> bestActions = new List<Action>(); if (this.Set.ContainsKey(s)) { foreach (Action a in Enum.GetValues(typeof(Action))) { float Q = this.Set[s][a]; if (Q > maxQ) { bestActions = new List<Action>(); maxQ = Q; bestActions.Add(a); } else if (Q == maxQ) { bestActions.Add(a); } } //pick random action from list of best actions result = bestActions[random.Next(bestActions.Count)]; } else { result = (Action)random.Next(5); } return result; }
public void Add(State s, QSetItem item) { this.Set.Add(s, item); }
public float this[State s, Action a] { get { if (this.Set.ContainsKey(s)) return this.Set[s][a]; return 0; } set { if (this.Set.ContainsKey(s)) { this.Set[s][a] = value; } else { QSetItem newItem = new QSetItem(); newItem[a] = value; this.Set.Add(s, newItem); } } }
/// <summary> /// Porcesses a given reward. /// </summary> /// <param name="reward">The reward that was earned by taking the action.</param> /// <param name="oldState">The state before the <paramref name="action"/> was taken.</param> /// <param name="newState">The state we ended up in after taking the <paramref name="action"/>.</param> /// <param name="action">The action that was taken.</param> /// <param name="alpha">The Learning rate.</param> /// <param name="gamma">The Discount rate.</param> public void ProcessReward(float reward, State oldState, State newState, Action action, float alpha, float gamma) { float Q = store[oldState, action]; float maxQ = store[newState, store.GetBestAction(newState)]; Q = (1 - alpha) * Q + alpha * (reward + gamma * maxQ); store[oldState, action] = Q; }
//file format: //1 int (state) //5 floats (5 Q-values for the actions) /// <summary> /// Loads a Q-learning file containing Q-values. /// </summary> /// <param name="filename">The path of the Q-learning file.</param> public void LoadFile(string filename) { FileStream fs = File.Open(filename, FileMode.OpenOrCreate); BinaryReader br = new BinaryReader(fs); while (fs.Position < fs.Length) { State state = new State(br.ReadUInt64()); QSetItem newItem = new QSetItem(); newItem[Action.North] = br.ReadSingle(); newItem[Action.South] = br.ReadSingle(); newItem[Action.East] = br.ReadSingle(); newItem[Action.West] = br.ReadSingle(); newItem[Action.None] = br.ReadSingle(); this.store.Add(state, newItem); } br.Close(); fs.Close(); }