private void NextEpisode(SMDPAgent agent) { config = RLConfig.Load("RLConfig.xml"); if (this.Episodes > config.numEpisodes) { return; } Console.WriteLine("Gets to NextEpisode() {0} times", cnt++); if (this.Episodes == 0) { fitness = 1; sum = 0; counter = 0; } this.Episodes++; agent.setEpsilon(config.lambda); fitness = RLGameWorld.FitnessValue(agent); reward = fitness; // This is to send a reward to endEpisode sum += fitness; if (this.Episodes % count_fit == 0) { counter++; average_fitness = sum / count_fit; // recordPerformance(this.Episodes, fitness); recordPerformance(counter, average_fitness); sum = 0; } //this.Previous = this.Current; //this.Current = this.Next; //this.Next = new KPEnvironment(this.Episode + 1); }
public void AllEpisodes() { config = RLConfig.Load("RLConfig.xml"); double[] widthArray = new double[config.numFeatures]; for (int i = 0; i < config.numFeatures; i++) { widthArray[i] = 1 / config.numFeatures; } switch (config.learningMethod) { case "Q_Learning": { Q_LearningAgent qlp = new Q_LearningAgent(config.numFeatures, config.numActions, true, fitness, widthArray, "weightsFile.data", "weightsFile.data"); Console.WriteLine("Gets to call AllEpisodes"); while (Episodes <= config.numEpisodes) { qlp.saveWeights("weightsFile.data"); this.OneEpisode(qlp); } break; } case "SARSA": { SarsaAgent sap = new SarsaAgent(config.numFeatures, config.numActions, true, fitness, widthArray, "weightsFile.data", "weightsFile.data"); Console.WriteLine("Gets to call AllEpisodes"); while (Episodes <= config.numEpisodes) { if (Episodes % 50 == 0) { sap.saveWeights("weightsFile.data"); } this.OneEpisode(sap); } break; } default: { Console.WriteLine("Default case does not exist"); break; } } }
public static void Main(string[] args) { RLGameWorld ng = new RLGameWorld(); Thread play = new Thread(new ThreadStart(ng.Game)); play.IsBackground = true; play.Start(); config = RLConfig.Load("RLConfig.xml"); RLProgram mainprog = new RLProgram(); Console.WriteLine("Gets here before it calls main program visualize = {0} and learning ={1}", config.visualize, config.learning); if (config.visualize == 0 && config.learning == 1) { Console.WriteLine("Gets to call main program "); mainprog.AllEpisodes(); } Console.WriteLine("It got here"); Console.ReadLine(); }
public static double FitnessValue(SMDPAgent policy) { int cycles = 0; config = RLConfig.Load("RLConfig.xml"); switch (config.learningMethod) { case "Q_Learning": { for (int i = 0; i < players.Count; i++) { players[i].qlPolicy = (Q_LearningAgent)policy; } break; } case "SARSA": { for (int i = 0; i < players.Count; i++) { players[i].saPolicy = (SarsaAgent)policy; } break; } default: Console.WriteLine("Default case does not exist"); break; } do { timer.RunCycle(); cycles++; } while (!kref.episodeEnded); kref.episodeEnded = false; return(cycles / (10.0)); }
public override void keeper() { config = RLConfig.Load("RLConfig.xml"); if (config.visualize == 1) { if (RLVisualisation.epi != 0 && RLVisualisation.epi != episodesCount) { episodesCount = RLVisualisation.epi; if (config.learningMethod == "SARSA") { saPolicy.endEpisode(-1);//(RLVisualisation.reward); } else { qlPolicy.endEpisode(-1);// (RLVisualisation.reward); } //set time start episode //last_action to be set to -1 } } else { if (RLProgram.epi != 0 && RLProgram.epi != episodesCount) { episodesCount = RLProgram.epi; if (config.learningMethod == "SARSA") { saPolicy.endEpisode(-1);//(RLProgram.reward); } else { qlPolicy.endEpisode(-1);// (RLProgram.reward); } //set time start episode //last_action to be set to -1 } } // If the ball is kickable, // call main action selection routine. if (isBallKickable()) { turns = 0; trajectoryChanges = 0; trajectorySame = 2; keeperWithBall(); } // Get fastest to ball int iTmp = 0; int fastest = getFastestInSetTo(teammates, ballp, ref iTmp); // If fastest, intercept the ball. if (fastest == Unum) { intercept(); return; } // Not fastest, get open turns = 0; keeperSupport(fastest); }
public override void keeperWithBall() { config = RLConfig.Load("RLConfig.xml"); count++; // Console.WriteLine("Trial : {0}", count); // base.keeperWithBall(); Vector[] tmatesp = null; Vector[] oppPlayersp = null; Vector posToPass = new Vector(0, 0); Command soc = new Command(); soc.Com = "illegal"; int action = 0, j = 0, h = 0, v = 0, g = 0; double ang1, ang2, ang3; tmatesp = myTeammatesPlayers(teammatesp, config.num_Keepers); oppPlayersp = myTeammatesPlayers(opp, config.num_Takers); tmatesp[config.num_Keepers - 1] = mySelfp; int possibleStates = config.numFeatures; double[] statesVector = new double[possibleStates]; double[] prevStateVector = new double[possibleStates]; //statesVector = keeperstateVars(possibleStates); double[] closestmateOpp = new double[teammatesp.Length - 1]; try { if (inputs == null) { inputs = new double[config.numFeatures]; } for (int i = 0; i < closestmateOpp.Length; i++) { closestmateOpp[i] = 1000; } for (int i = 0; i < tmatesp.Length; i++) { if (i != Unum - 1) { inputs[j++] = tmatesp[i].distance(mySelfp); getClosestInSetTo(oppPlayersp, tmatesp[i], ref closestmateOpp[h]); inputs[j++] = closestmateOpp[h++]; inputs[j++] = DistFromCenter(tmatesp[i]); ang1 = mySelfp.angle(tmatesp[i]); ang2 = mySelfp.angle(oppPlayersp[0]); ang3 = mySelfp.angle(oppPlayersp[1]); inputs[j++] = Math.Min(Magnitude(ang1 - ang2), Magnitude(ang1 - ang3)); } } for (int i = 0; i < oppPlayersp.Length; i++) { inputs[v++] = oppPlayersp[i].distance(mySelfp); inputs[v++] = DistFromCenter(oppPlayersp[i]); } inputs[v] = DistFromCenter(mySelfp); } catch (IndexOutOfRangeException e) { Console.WriteLine("Exception:Array index out of range :-> Increase the num of Features"); throw e; } statesVector = inputs; switch (config.learningMethod) { case "Q_Learning": { // qlPolicy = new Q_LearningAgent(); // turn on when running SARSA otherwise off if (qlPolicy == null) { Console.WriteLine("The Policy object is null"); base.keeperWithBall(); } //Reading the saved xml file so as to use it in the next episode step /* * XDocument loadVector = XDocument.Load("Previous_State.xml"); * var varVector = loadVector.Element("state-variables").Elements("variable"); * prevStateVector = varVector.Select(x => Double.Parse(x.Value)).ToArray(); * //do the same with last action * XDocument loadValue = XDocument.Load("lactAction.xml"); * string data = loadValue.Root.Element("lastAction").Value; * lastAction = int.Parse(data); * * * //verify by printing * Console.WriteLine("!!!!!! ---- last action is = {0}", lastAction); */ //testing if it has read the xml file back to an array by saving it again as different file XDocument testState = new XDocument(); testState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x)))); testState.Save("Test_Previous_State.xml"); /////////// if (statesVector.Length > 0) { // if we can calculate state vars // Call startEpisode() on the first SMDP step if (timeLastAction == 0) { //temp_reward = 0; action = qlPolicy.startEpisode(statesVector); } else if (timeLastAction == std.Time - 1 && lastAction > 0) { // if we were in the middle of a pass last cycle temp_reward = body.Time - timeLastAction; action = lastAction; // then we follow through with it } // Call step() on all but first SMDP step else { temp_reward = body.Time - timeLastAction; //action = saPolicy.step(std.Time - timeLastAction, statesVector); action = saPolicy.step(temp_reward, statesVector); // action = qlPolicy.step(temp_reward, statesVector); } lastAction = action; // temp_reward += 1; //Save the previous state to an xml file XDocument prevState = new XDocument(); prevState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x)))); prevState.Save("Previous_State.xml"); //Save the last action to xml file XDocument prevAction = new XDocument(); prevAction.Add(new XElement("Actions", new XElement("lastAction", lastAction))); prevAction.Save("lactAction.xml"); } else { // if we don't have enough info to calculate state vars action = 1; // hold ball temp_reward = body.Time - timeLastAction; // return; } //action = saPolicy.GetAction(inputs.Length); // action = saPolicy.; //temp_reward += 1; max = Unum - 1; posToPass = mySelfp; if (action < 0) { lastAction = 0; } else { lastAction = action; } timeLastAction = body.Time; break; } case "SARSA": { //saPolicy = new SarsaAgent(); // Turn on when running Q_Learning otherwise off if (saPolicy == null) { Console.WriteLine("The Policy object is null"); base.keeperWithBall(); } //Reading the saved xml file so as to use it in the next episode step /* * XDocument loadVector = XDocument.Load("Previous_State.xml"); * var varVector = loadVector.Element("state-variables").Elements("variable"); * prevStateVector = varVector.Select(x => Double.Parse(x.Value)).ToArray(); * //do the same with last action * XDocument loadValue = XDocument.Load("lactAction.xml"); * string data = loadValue.Root.Element("lastAction").Value; * lastAction = int.Parse(data); * * * //verify by printing * Console.WriteLine("!!!!!! ---- last action is = {0}", lastAction); */ //testing if it has read the xml file back to an array by saving it again as different file //XDocument testState = new XDocument(); //testState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x)))); // testState.Save("Test_Previous_State.xml"); /////////// if (statesVector.Length > 0) { // if we can calculate state vars // Call startEpisode() on the first SMDP step if (timeLastAction == 0) { //temp_reward = 0; action = saPolicy.startEpisode(statesVector); } else if (timeLastAction == std.Time - 1 && lastAction > 0) { // if we were in the middle of a pass last cycle temp_reward = body.Time - timeLastAction; action = lastAction; // then we follow through with it } // Call step() on all but first SMDP step else { //action = saPolicy.step(std.Time - timeLastAction, statesVector); //action = saPolicy.step(saPolicy.lastReward, statesVector); temp_reward = body.Time - timeLastAction; action = saPolicy.step(temp_reward, statesVector); } lastAction = action; // temp_reward += 1; //Save the previous state to an xml file // XDocument prevState = new XDocument(); // prevState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x)))); // prevState.Save("Previous_State.xml"); //Save the last action to xml file // XDocument prevAction = new XDocument(); // prevAction.Add( new XElement("Actions", new XElement("lastAction",lastAction))); // prevAction.Save("lactAction.xml"); } else { // if we don't have enough info to calculate state vars action = 1; // hold ball temp_reward = body.Time - timeLastAction; // return; } //action = saPolicy.GetAction(inputs.Length); // action = saPolicy.; //temp_reward += 1; max = Unum - 1; posToPass = mySelfp; if (action < 0) { lastAction = 0; } else { lastAction = action; } timeLastAction = body.Time; break; } default: Console.WriteLine("Default Case does not exist"); break; } //end of a switch if (action == Unum - 1) { holdBall(0.7); return; } else { Vector pos = new Vector(0, 0); pos.assign(teammates[action + 1].X, teammates[action + 1].Y); //pos.assign(predictPlayerPosAfterNrCycles(teammates[action + 1], 4, 30, null, null, false)); //Sabre commented out this to check if it can solve issue of passing wrongly pos.assign((float)Math.Min(20 / 2, Math.Max(-20 / 2, pos.X)), (float)Math.Min(20 / 2, Math.Max(-20 / 2, pos.Y))); directPass(pos, "fast"); /* To be added on this RL algorithm is the estimate of Q and update of Qvalues */ // get a new state using keeperStates() // then update the Q-Value return; } // int possibleStates = 13; double[] vec = new double[possibleStates]; //vec = keeperstateVars(possibleStates); //Console.WriteLine("States are : {0:f} {1:f} {2:f} {3:f} {4:f} {5:f} {6:f} {7:f} {8:f} {9:f} {10:f} {11:f} {12:f} ", vec[0], vec[1], vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8], vec[9], vec[10], vec[11], vec[12]); // base.keeperWithBall(); }
double[] keeperstateVars(int stateCount) { config = RLConfig.Load("RLConfig.xml"); Vector[] tmatesp = null; Vector[] oppPlayersp = null; Vector posToPass = new Vector(0, 0); Command soc = new Command(); inputs = new double[stateCount]; soc.Com = "illegal"; int j = 0, h = 0, v = 0, g = 0; double ang1, ang2, ang3; tmatesp = myTeammatesPlayers(teammatesp, config.num_Keepers); oppPlayersp = myTeammatesPlayers(opp, config.num_Takers); tmatesp[config.num_Keepers - 1] = mySelfp; double[] closestmateOpp = new double[teammatesp.Length - 1]; // Brain.ResetState(); double[] widthArray = new double[config.numFeatures]; for (int i = 0; i < config.numFeatures; i++) { widthArray[i] = 1 / config.numFeatures; } if (inputs == null) { inputs = new double[stateCount]; } for (int i = 0; i < closestmateOpp.Length; i++) { closestmateOpp[i] = 1000; } for (int i = 0; i < tmatesp.Length; i++) { if (i != Unum - 1) { inputs[j++] = tmatesp[i].distance(mySelfp); getClosestInSetTo(oppPlayersp, tmatesp[i], ref closestmateOpp[h]); inputs[j++] = closestmateOpp[h++]; inputs[j++] = DistFromCenter(tmatesp[i]); ang1 = mySelfp.angle(tmatesp[i]); ang2 = mySelfp.angle(oppPlayersp[0]); ang3 = mySelfp.angle(oppPlayersp[1]); inputs[j++] = Math.Min(Magnitude(ang1 - ang2), Magnitude(ang1 - ang3)); } } for (int i = 0; i < oppPlayersp.Length; i++) { inputs[j++] = oppPlayersp[i].distance(mySelfp); inputs[j++] = DistFromCenter(oppPlayersp[i]); } // RoboCup.Server srv = new RoboCup.Server(std); inputs[j] = DistFromCenter(mySelfp); Console.Write("The state inputs are: "); for (int i = 0; i < inputs.Length; i++) { Console.Write("\t {0:f}", inputs[i]); } return(inputs); }
public void Game() { #region Initialization timer.AddReferee(kref); config = RLConfig.Load("RLConfig.xml"); #endregion if (config.visualize == 1 && config.learning == 0) //Heuristic Policy with Visualisation { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); RoboCup.FieldVisualizer f = new RoboCup.FieldVisualizer(); std.Realtime = false; timer.OnCycle += f.OnStadiumUpdate; for (int i = 0; i < config.num_Keepers; i++) { std.addPlayer(new FixedKeepawayPlayer(std, "keepers", i + 1, "l", config.num_Keepers, config.num_Takers)); } for (int i = 0; i < config.num_Takers; i++) { std.addPlayer(new FixedKeepawayPlayer(std, "takers", i + 1, "r", config.num_Keepers, config.num_Takers)); } f.std = std; f.backgroundWorker1.DoWork += new System.ComponentModel.DoWorkEventHandler(backgroundWorker1_DoWork); Application.Run(f); } else if (config.visualize == 1 && config.learning == 1) //Reinforcement Learning with Visualisation { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); //Visualizer form = new Visualizer(); // rlv.OneEpisode += form.UpdateGraph; RoboCup.FieldVisualizer f = new RoboCup.FieldVisualizer(); std.Realtime = false; timer.OnCycle += f.OnStadiumUpdate; for (int i = 0; i < config.num_Keepers; i++) { players.Add(new RLKeepawayPlayer(std, "keepers", i + 1, "l", config.num_Keepers, config.num_Takers)); } for (int i = 0; i < config.num_Takers; i++) { players.Add(new RLKeepawayPlayer(std, "takers", i + 1, "r", config.num_Keepers, config.num_Takers)); } for (int i = 0; i < players.Count; i++) { std.addPlayer(players[i]); } f.std = std; //invokes an event handler that calls RLVisualisation methods f.backgroundWorker1.DoWork += new System.ComponentModel.DoWorkEventHandler(backgroundWorker2_DoWork); Application.Run(f); } else //Reinforcement learning without Visualisation { Console.WriteLine("Initialises the players well "); std.Realtime = false; for (int i = 0; i < config.num_Keepers; i++) { players.Add(new RLKeepawayPlayer(std, "keepers", i + 1, "l", config.num_Keepers, config.num_Takers)); } for (int i = 0; i < config.num_Takers; i++) { players.Add(new RLKeepawayPlayer(std, "takers", i + 1, "r", config.num_Keepers, config.num_Takers)); } for (int i = 0; i < players.Count; i++) { std.addPlayer(players[i]); } } }
public Q_LearningAgent(int numFeatures, int numActions, bool bLearn, double rewards, double [] widths, string loadWeightsFile, string saveWeightsFile) : base(numFeatures, numActions) { this.numActions = numActions; this.numFeatures = numFeatures; numTiles = numFeatures * numDimTiles; tileWidths = new double[numFeatures]; Q = new double[numActions]; //weights = new double[numActions,numTiles]; //weightsRaw=new double[numActions,numTiles]; //weights = weightsRaw; traces = new double[numActions, numTiles]; traceAll = new double[numTiles]; // number of tiles in each tiling tile = new int[numFeatures, numDimTiles]; actionTiles = new int[numActions, rl_memory_Size]; tiless = new Tile[numFeatures, numDimTiles]; nonzeroTraces = new int[rl_max_nonzero_Traces]; nonzeroTracesInverse = new int[rl_memory_Size]; currentState = new int[numActions, numDimTiles]; previousState = new int[numActions, numDimTiles]; weightsFile = saveWeightsFile; bLearning = bLearn; lastReward = rewards; for (int i = 0; i < getNumFeatures(); i++) { tileWidths[i] = widths[i]; } config = RLConfig.Load("RLConfig.xml"); alpha = config.alpha; gamma = config.gamma; lambda = config.lambda; epsilon = config.epsilon; minimumTrace = config.traceability; //for (int i=0; i < rl_memory_Size;i++) for (int j = 0; j < numActions; j++) //yet to do ... each Q(s,a) should represent { Q[j] = 0; } epochNum = 0; lastAction = -1; numNonzeroTraces = 0; //Sabre changed this to initial weights for all tiles per action //need to check if this yields to weights being always zero when ever you call a SarsaAgent object //if it is remove this initialisation and put it in the start-episode method if (config.transfer == 1) { weights = loadWeights(loadWeightsFile); //load traces too //at the moment initial the traces but need to explore saving them too for (int j = 0; j < numActions; j++) { for (int i = 0; i < numTiles; i++) { traces[j, i] = 0; } } } else { for (int j = 0; j < numActions; j++) { for (int i = 0; i < numTiles; i++) { weights[j, i] = 0; traces[j, i] = 0; } } } int [] tmp = new int[2]; float [] tmpf = new float[2]; colTab = new collision_table(rl_memory_Size, 1); tiles obj_tiles = new tiles(); }
//inter-task mapping public double[,] loadWeights(string fileToLoad) { config = RLConfig.Load("RLConfig.xml"); double[,] currentWeight = new double[numActions, numTiles]; if (File.Exists(fileToLoad)) { BinaryFormatter bf = new BinaryFormatter(); FileStream file = File.Open(fileToLoad, FileMode.Open); weights = (double[, ])bf.Deserialize(file); file.Close(); Console.WriteLine("Loading weights here-----------------------"); // weights; } else { return(null); } //XDocument loadVector = XDocument.Load("weightsFile.xml"); // var varVector = loadVector.Element("weight-variables").Elements("variable"); // weights = varVector.Select(x => Double.Parse(x.Value)).ToArray(); for (int jj = 0; jj < numActions; jj++) { int j = 0; int v = 0; if (jj < sourceActions) //check for direct transfer w_source = w_target for actions { //number of actions for (int k = 0; k < config.num_Keepers - 1; k++) { if (k < config.source_Keepers - 1) { for (int s = 0; s < 4; s++) { for (int tl = 0; tl < tilesPerTiling; tl++) { currentWeight[jj, v++] = weights[jj, j++]; } } } else { j -= (4 * tilesPerTiling); for (int s = 0; s < 4; s++) { for (int tl = 0; tl < tilesPerTiling; tl++) { currentWeight[jj, v++] = weights[jj, j++]; } } } } for (int ii = 0; ii < config.num_Takers; ii++) { if (ii < config.source_Takers) { for (int s = 0; s < 2; s++) { for (int tl = 0; tl < tilesPerTiling; tl++) { currentWeight[jj, v++] = weights[jj, j++]; } } } else { j -= (2 * tilesPerTiling); for (int s = 0; s < 2; s++) { for (int tl = 0; tl < tilesPerTiling; tl++) { currentWeight[jj, v++] = weights[jj, j++]; } } } } for (int tl = 0; tl < tilesPerTiling; tl++) { currentWeight[jj, v] = weights[jj, j]; } } else //its an added action not available in the source task { for (int i = 0; i < numTiles; i++) { currentWeight[jj, i] = currentWeight[jj, i]; } } } return(currentWeight); }