private void Init(QAgent agent, QOption option) { if (Benchmark) { BenchmarkSave.CurrentTestID = _instance.BenchmarkID; BenchmarkSave.Runs = _instance.BenchmarkRuns; } else if (Mode == QAIMode.Testing && BenchmarkID != null && !BenchmarkID.Equals("")) { BenchmarkSave.ModelPath = _instance.BenchmarkID; } else { BenchmarkSave.CurrentTestID = agent.AI_ID().ID; BenchmarkSave.Runs = 1; } Debug.Log("Running " + BenchmarkSave.ModelPath + " in mode " + Mode); _stopwatch = Stopwatch.StartNew(); if (Tester != null) { Tester.Init(); } DontDestroyOnLoad(gameObject); switch (Mode) { case QAIMode.Imitating: { _imitation = new QImitation(); break; } default: { var qlCNN = new QLearningCNN(PrioritizedSweeping, option); _qlearning = qlCNN; _qlearning.Reset(agent); if (Remake) { _qlearning.RemakeModel(agent.GetState()); } else { _qlearning.LoadModel(); } if (VisualizeNetwork) { _visualizer = _qlearning.CreateVisualizer(); } qlCNN.CNN.ValuesComputed += (data, isTraining) => { if (NetworkValuesUpdated != null) { NetworkValuesUpdated(data, isTraining); } }; break; } } }
// Use this for initialization void Start() { BoardGame.setDebug(debug); game = TicTacToe.Instance; crossAgent = new TicTacToeAgent(1); notAgent = new TicTacToeAgent(-1); game.init(ForLearning, dim, 0, ComputerSymbol); brain = QLearning.Instance; brain.init(game, dim, epsilon, alpha, gamma); if (!ForLearning) { InitEventTriggers(); //player.playTicTacToe(dim, NumEpisodes, epsilon, alpha, gamma); SetupTiles(); game.StateMachine.Enable(BackgroundPlane, theTiles, notAgent, crossAgent, BrainType.Player, You); // FirstTime pubInstance.NotifyListeners("ResetGame"); } }
// On "Start" learning button click private void startLearningButton_Click(object sender, EventArgs e) { // get settings GetSettings(); ShowSettings(); iterationBox.Text = string.Empty; // destroy algorithms qLearning = null; sarsa = null; if (algorithmCombo.SelectedIndex == 0) { // create new QLearning algorithm's instance qLearning = new QLearning(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate))); workerThread = new Thread(new ThreadStart(QLearningThread)); } else { // create new Sarsa algorithm's instance sarsa = new Sarsa(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate))); workerThread = new Thread(new ThreadStart(SarsaThread)); } // disable all settings controls except "Stop" button EnableControls(false); // run worker thread needToStop = false; workerThread.Start(); }
public void Create( List <MapTile> mapSearching, List <MapTile> mapDelivering, Transform parentSearching, Transform parentDelivering, QLearning qLSearching, QLearning qLDelivering) { this.mapSearching = mapSearching; this.mapDelivering = mapDelivering; this.parentSearching = parentSearching; this.parentDelivering = parentDelivering; this.qLSearching = qLSearching; this.qLDelivering = qLDelivering; parentDelivering.gameObject.SetActive(false); activeQLearning = qLSearching; searching = true; playing = false; this.x = qLSearching.initialState.x; this.y = qLSearching.initialState.y; UpdatePosition(); // remove useExplorationFactor = true; playing = true; }
// Use this for initialization void Start() { relativePath = Application.dataPath; Application.runInBackground = true; Application.targetFrameRate = 60; //instantiate prey prey = Instantiate(preyprefab) as GameObject; Controller preyController = prey.GetComponent <Controller>(); preyStateArray = new StateArray(); preyController.stateArray = preyStateArray; preyQlearning = new QLearning(preyController, preyStateArray, prey.GetComponent <PreyScript>().actionSize); preyQlearning.bestaction = true; preyController.qAlgorithm = preyQlearning; if (predatorsolo) { preyController.dummy = true; } //instantiate predator predator = Instantiate(predatorprefab) as GameObject; Controller predatorController = predator.GetComponent <Controller>(); predatorStateArray = new StateArray(); predatorController.stateArray = predatorStateArray; predatorQlearning = new QLearning(predatorController, predatorStateArray, predator.GetComponent <PredatorScript>().actionSize); predatorQlearning.bestaction = true; predatorController.qAlgorithm = predatorQlearning; if (preysolo) { predatorController.dummy = true; } } // end of start
/// <summary> /// Creates the learner /// </summary> protected override ILearningAlgorithm <MouseState> CreateLearner() { double alpha = 1; double gamma = 0.1; int stopDecayAt = (int)(0.9 * this.Environment.Config.MaxEpisodes); double epsilon = 0.4; var selectionPolicy = new EGreedy( epsilon, this.Environment.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); return(QLearning <MouseState> .New( Enum.GetValues(typeof(MouseAction)).Length, selectionPolicy, alpha, gamma, this.Environment.Config.Random)); //return Sarsa<MouseState>.New( // Enum.GetValues(typeof(MouseAction)).Length, // selectionPolicy, // alpha, // gamma, // this.Environment.Config.Random); }
override public void learn(int numEpisodes = 10000) { int numCrossWins = 0, numNotWins = 0, numDraws = 0; BoardGame.Print(" learning ...."); QLearning brain = QLearning.Instance; for (int i = 0; i < numEpisodes; i++) { episode( ); if (winner == TicTacToe.CrossVal) { numCrossWins++; } else if (winner == TicTacToe.NotVal) { numNotWins++; } else { numDraws++; } } brain.writeTrainingData(); Debug.Log("Total Episodes " + numEpisodes + " Cum Results: NumCrossWins:" + numCrossWins + " NumNotWins: " + numNotWins + " numDraws: " + numDraws); }
public void UpdateTile(QLearning qLearning) { state = qLearning.statesMap[x, y]; if (state.IsFinalState()) { displayText.text = "F"; } else if (state.IsBlockState()) { displayText.text = "B"; } else { displayText.text = state.GreaterQFactor().ToString("0.0"); } if (!state.IsInitialState() && !state.IsFinalState() && !state.IsBlockState()) { float c = state.GreaterQFactor() / QLData.FINAL_REWARD; image.color = new Color(1f, 1f, 1 - c, 1f); } }
private void StartLearning_OnClick() { // reset learning class values _qLearning = null; _sarsa = null; _qLearning_FDGS = null; if (References.LearningAlgorithm.value == 0) { // create new QLearning algorithm's instance _qLearning = new QLearning(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate))); _workerThread = new Thread(new ThreadStart(QLearningThread)); } else if (References.LearningAlgorithm.value == 1) { // create new Sarsa algorithm's instance _sarsa = new Sarsa(256, 4, new TabuSearchExploration(4, new EpsilonGreedyExploration(explorationRate))); _workerThread = new Thread(new ThreadStart(SarsaThread)); } else { // init QLearn _qLearning_FDGS = new QLearning_FDGS(actions, _agentStopX, _agentStopY, _map, new TabuSearchExploration(actions, new EpsilonGreedyExploration(Convert.ToDouble(explorationRate)))); _workerThread = new Thread(new ThreadStart(QLearningThread_FDGS)); } // disable all settings controls except "Stop" button References.EnableControls(false); // run worker thread _needToStop = false; _workerThread.Start(); Debug.Log("Learning started. Please wait until training is finished."); }
void Start() { QLearning robot = new QLearning(); robot.CreateMap(10, 10, 0, 0, 9, 9); robot.Loop(6381, true); }
void Instantiate() { this.Movement = GetComponent <CarMovement>(); sensors = GetComponentsInChildren <Sensor>(); Learning = new QLearning(); Points = new Points(); this.ShowSensors(); Init(); }
public async Task <MapModel> Post([FromBody] GameRequest gameRequest) { var aiSign = gameRequest.AiSign.Equals("X") ? 'X' : 'O'; var playerSign = gameRequest.AiSign.Equals("X") ? 'O' : 'X'; const char emptySign = '_'; Move bestMove; switch (gameRequest.ChosenAlgorithm) { case "MinMax": var ticTacToeMinMax = new TicTacToeMinMax(aiSign, playerSign, emptySign); Mapper.FillBoardRequest(gameRequest, board, emptySign); bestMove = ticTacToeMinMax.FindBestMove(board); Mapper.MapAiMove(bestMove, gameRequest, aiSign); return(gameRequest.MapModel); case "Q-Learning": string response; if (Game.BoardIsEmpty(gameRequest, emptySign)) { response = await QLearning.AIStart(); } else { var position = Mapper.FindCurrentMovePosition(gameRequest, board, emptySign); response = await QLearning.Move(position); } Mapper.PythonResponseToMapModel(response, gameRequest.MapModel); if (Game.BoardIsFull(gameRequest, emptySign)) { await QLearning.Start(); } Mapper.FillBoardRequest(gameRequest, board, emptySign); return(gameRequest.MapModel); case "Monkey": var ticTacToeMonkeyAI = new TicTacToeMonkeyAI(emptySign); Mapper.FillBoardRequest(gameRequest, board, emptySign); bestMove = ticTacToeMonkeyAI.FindBestMove(board); Mapper.MapAiMove(bestMove, gameRequest, aiSign); return(gameRequest.MapModel); default: return(Simulator.Simulate(gameRequest)); } }
private void qLearning(State currentState, Strategie strategie) { var qValue = strategie.getQValueForStrategie(currentState, qValues); ExecuteAction(qValue.getAction()); if (!lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin)) { qValues = QLearning.updateTable(lastQValue, qValue, qValues, getReward()); } lastState = currentState; lastQValue = qValue; }
public void FindPath() { boardStateGraph = board.GetBoardGraph(); Debug.Log(Graph.ToString(boardStateGraph)); QLearning qLearning = new QLearning(epochs, actions, rewards, boardStateGraph, learningRate, discountFactor); qLearning.Train(); solution = qLearning.GetPath(); StartMove(); }
// Use this for initialization void Start() { QLearning qlSearch = new QLearning(); QLearning qlDeliver = new QLearning(); qlSearch.CreateMap(10, 10, 0, 0, 9, 9); qlDeliver.CreateMap(10, 10, 9, 9, 0, 0); qlSearch.Loop(4572, true); qlDeliver.Loop(4572, true); GetComponent <Map>().CreateMap(qlSearch, qlDeliver); }
public void AgentTurn(Rules _rules) { rules = _rules; boardStateGraph = board.GetBoardGraph(); Debug.Log(Graph.ToString(boardStateGraph)); rules.GetRewards(); this.actions = rules.actions; this.rewards = rules.rewardTiles; QLearning qLearning = new QLearning(epochs, actions, rewards, boardStateGraph, learningRate, discountFactor); // qLearning.Train(); }
/// <summary> /// Runs the example /// </summary> public static void Run() { var slotMachines = new List <SlotMachine>(); slotMachines.Add(new SlotMachine(20, 120)); slotMachines.Add(new SlotMachine(5, 100)); slotMachines.Add(new SlotMachine(40, 150)); slotMachines.Add(new SlotMachine(25, 130)); slotMachines.Add(new SlotMachine(25, 120)); slotMachines.Add(new SlotMachine(60, 120)); var random = new Random(1337); int trainingEpisodes = 10000; double decayRatio = 0.4; var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines); var agent = new StatelessAgent <MultiArmedBanditEnvironment>(env => { double alpha = 0.05; double gamma = 0.1; int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes); double epsilon = 0.1; var selectionPolicy = new EGreedy( epsilon, env.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); return(QLearning <EmptyState> .New( slotMachines.Count, selectionPolicy, alpha, gamma, env.Config.Random)); }); environment.AddAgent(agent); environment.Initialize(); for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++) { environment.Reset(episode); environment.Update(episode); } Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward)); Console.ReadLine(); }
private void OnTriggerEnter(Collider collider) { if (collider.GetComponent <QLearning>()) { auto = collider.GetComponent <QLearning>(); auto.snelheid = 0; auto.gebotst = true; auto.Reset(); } if (collider.GetComponent <AutoScript1>()) { auto1 = collider.GetComponent <AutoScript1>(); auto1.moveSpeed = 0; auto1.gebotst = true; } }
// On "Stop" button click private void stopButton_Click(object sender, EventArgs e) { if (workerThread != null) { // stop worker thread needToStop = true; while (!workerThread.Join(100)) { Application.DoEvents(); } workerThread = null; } // reset learning class values qLearning = null; sarsa = null; }
//En caso de que el script exista lo cargamos void load() { //Si el archivo existe lo cargamos y actualizamos los valores del maximo de iteraciones a 600 y reducimos epsilon if (File.Exists(Application.dataPath + "/Scripts/Grupo/Solucion3/QLearningInfo" + seed + ".dat")) { BinaryFormatter bf = new BinaryFormatter(); FileStream file = File.OpenRead(Application.dataPath + "/Scripts/Grupo/Solucion3/QLearningInfo" + seed + ".dat"); aprender = (QLearning)bf.Deserialize(file); file.Close(); aprender.setE(0.05f); aprender.maxIterations = 600; //Si queremos ver los valores de la tabla QTable podemos activar esta opción //aprender.printTable(); Debug.Log("Loaded"); } }
void SwapMap() { if (searching) { searching = false; activeQLearning = qLDelivering; parentDelivering.gameObject.SetActive(true); parentSearching.gameObject.SetActive(false); } else { searching = true; activeQLearning = qLSearching; parentDelivering.gameObject.SetActive(false); parentSearching.gameObject.SetActive(true); } }
private void OnTriggerEnter(Collider collider) { if (collider.GetComponent <QLearning>()) { auto = collider.GetComponent <QLearning>(); auto.aantalRonden += 1; if (auto.aantalRonden > qTable.ronden) { qTable.ronden = auto.aantalRonden; qTable.voegToePublic(); if (auto.aantalRonden == 10) { qTable.Reset(); } } } if (collider.GetComponent <AutoScript1>()) { auto1 = collider.GetComponent <AutoScript1>(); auto1.aantalRonden += 1; if (auto1.aantalRonden > NeuralNetwork.ronden) { print(NeuralNetwork.frame + ", " + auto1.aantalRonden); NeuralNetwork.ronden = auto1.aantalRonden; NeuralNetwork.voegToePublic(); if (auto1.aantalRonden == 10) { NeuralNetwork.Resette(); } } //if (auto.aantalRonden == 10) //{ // NeuralNetwork.Reset(); //} } }
public void CreateMap(QLearning qLSearching, QLearning qLDelivering) { List <MapTile> mapSearching = new List <MapTile>(); for (int x = 0; x < qLSearching.sizeX; x++) { for (int y = 0; y < qLSearching.sizeY; y++) { MapTile newMapTile = Instantiate <MapTile>(objectMapTile); newMapTile.transform.SetParent(this.parentSearching, false); newMapTile.Create(qLSearching.statesMap[x, y]); mapSearching.Add(newMapTile); } } List <MapTile> mapDelivering = new List <MapTile>(); for (int x = 0; x < qLDelivering.sizeX; x++) { for (int y = 0; y < qLDelivering.sizeY; y++) { MapTile newMapTile = Instantiate <MapTile>(objectMapTile); newMapTile.transform.SetParent(this.parentDelivering, false); newMapTile.Create(qLDelivering.statesMap[x, y]); mapDelivering.Add(newMapTile); } } Robot robot = Instantiate <Robot>(objectRobot); robot.transform.SetParent(this.transform, false); robot.Create( mapSearching, mapDelivering, parentSearching, parentDelivering, qLSearching, qLDelivering); }
private void StartButton_Click(object sender, RoutedEventArgs e) { // Make the buttons inactive AStarRadioButton.IsEnabled = false; QLearningRadioButton.IsEnabled = false; LoadGridButton.IsEnabled = false; StartButton.IsEnabled = false; searchTimer = new DispatcherTimer(); searchTimer.Interval = TimeSpan.FromMilliseconds(UPDATE_FREQUENCY); if ((bool)AStarRadioButton.IsChecked) { if (aStarSearch == null) { aStarSearch = new AStar(gridWorld, startingCell, rewardCell); } searchTimer.Tick += new EventHandler(aStarSearch.Run); searchTimer.Tick += new EventHandler(UpdateAgentPosition); } else // Q-Learning is checked { if (qLearningSearch == null) { qLearningSearch = new QLearning(gridWorld, startingCell, rewardCell); } searchTimer.Tick += new EventHandler(qLearningSearch.Run); searchTimer.Tick += new EventHandler(UpdateAgentPosition); } searchTimer.Start(); }
public void QLearningGridPathTest() { var random = new Random(1337); var gridSize = 16; var grid = new int[gridSize, gridSize]; var ql = new QLearning(gridSize * gridSize, 4, new EpsilonGreedyExplorationPolicy(0.0), 0.3, 0.8); var pathLength = 20; var maxReward = 0.0; for (var x = 0; x < gridSize; x++) { for (var y = 0; y < gridSize; y++) { grid[x, y] = random.Next(0, 100); } } // train for (var i = 0; i < 100; i++) { var x = random.Next(0, gridSize); var y = random.Next(0, gridSize); var currentReward = 0.0; ql.Begin(x + y * gridSize); for (var j = 0; j < pathLength; j++) { switch ((Action)ql.SelectedAction) { case Action.UP: y++; break; case Action.DOWN: y--; break; case Action.LEFT: x--; break; case Action.RIGHT: x++; break; } var r = 0; if (x < 0 || x >= gridSize || y < 0 || y >= gridSize) { x = x < 0 ? 0 : x >= gridSize ? gridSize - 1 : x; y = y < 0 ? 0 : y >= gridSize ? gridSize - 1 : y; } else { r = grid[x, y]; } currentReward += r; var nextState = x + y * gridSize; ql.Step(r, nextState); } if (currentReward > maxReward) { maxReward = currentReward; } } Assert.AreEqual(896, maxReward); }
/* * given by the runGame */ public void setQLearning(QLearning algorithm) { qAlgorithm = algorithm; }
/// <summary> /// Main constructor, currently only calls AForge QLearning class constructor /// </summary> /// <param name="states">number of states</param> /// <param name="actions">number of actions</param> /// <param name="explorationRate">exploration rate in epsilon greedy exploration</param> public Q(int states, int actions, EpsilonGreedyExploration exploration) { qLearning = new QLearning(states, actions, exploration, false); }
public void learn_test() { #region doc_main // Fix the random number generator Accord.Math.Random.Generator.Seed = 0; // In this example, we will be using the QLearning algorithm // to make a robot learn how to navigate a map. The map is // shown below, where a 1 denotes a wall and 0 denotes areas // where the robot can navigate: // int[,] map = { { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, { 1, 1, 0, 0, 0, 0, 0, 0, 1 }, { 1, 1, 0, 0, 0, 1, 1, 0, 1 }, { 1, 0, 0, 1, 0, 0, 0, 0, 1 }, { 1, 0, 0, 1, 1, 1, 1, 0, 1 }, { 1, 0, 0, 1, 1, 0, 0, 0, 1 }, { 1, 1, 0, 1, 0, 0, 0, 0, 1 }, { 1, 1, 0, 1, 0, 1, 1, 0, 1 }, { 1, 1, 1, 1, 1, 1, 1, 1, 1 }, }; // Now, we define the initial and target points from which the // robot will be spawn and where it should go, respectively: int agentStartX = 1; int agentStartY = 4; int agentStopX = 7; int agentStopY = 4; // The robot is able to sense the environment though 8 sensors // that capture whether the robot is near a wall or not. Based // on the robot's current location, the sensors will return an // integer number representing which sensors have detected walls Func <int, int, int> getState = (int x, int y) => { int c1 = (map[y - 1, x - 1] != 0) ? 1 : 0; int c2 = (map[y - 1, x + 0] != 0) ? 1 : 0; int c3 = (map[y - 1, x + 1] != 0) ? 1 : 0; int c4 = (map[y + 0, x + 1] != 0) ? 1 : 0; int c5 = (map[y + 1, x + 1] != 0) ? 1 : 0; int c6 = (map[y + 1, x + 0] != 0) ? 1 : 0; int c7 = (map[y + 1, x - 1] != 0) ? 1 : 0; int c8 = (map[y + 0, x - 1] != 0) ? 1 : 0; return(c1 | (c2 << 1) | (c3 << 2) | (c4 << 3) | (c5 << 4) | (c6 << 5) | (c7 << 6) | (c8 << 7)); }; // The actions are the possible directions the robot can go: // // - case 0: go to north (up) // - case 1: go to east (right) // - case 2: go to south (down) // - case 3: go to west (left) // int learningIterations = 1000; double explorationRate = 0.5; double learningRate = 0.5; double moveReward = 0; double wallReward = -1; double goalReward = 1; // The function below specifies how the robot should perform an action given its // current position and an action number. This will cause the robot to update its // current X and Y locations given the direction (above) it was instructed to go: Func <int, int, int, Tuple <double, int, int> > doAction = (int currentX, int currentY, int action) => { // default reward is equal to moving reward double reward = moveReward; // moving direction int dx = 0, dy = 0; switch (action) { case 0: // go to north (up) dy = -1; break; case 1: // go to east (right) dx = 1; break; case 2: // go to south (down) dy = 1; break; case 3: // go to west (left) dx = -1; break; } int newX = currentX + dx; int newY = currentY + dy; // check new agent's coordinates if ((map[newY, newX] != 0) || (newX < 0) || (newX >= map.Columns()) || (newY < 0) || (newY >= map.Rows())) { // we found a wall or got outside of the world reward = wallReward; } else { currentX = newX; currentY = newY; // check if we found the goal if ((currentX == agentStopX) && (currentY == agentStopY)) { reward = goalReward; } } return(Tuple.Create(reward, currentX, currentY)); }; // After defining all those functions, we create a new Sarsa algorithm: var explorationPolicy = new EpsilonGreedyExploration(explorationRate); var tabuPolicy = new TabuSearchExploration(4, explorationPolicy); var qLearning = new QLearning(256, 4, tabuPolicy); // curent coordinates of the agent int agentCurrentX = -1; int agentCurrentY = -1; bool needToStop = false; int iteration = 0; // loop while ((!needToStop) && (iteration < learningIterations)) { // set exploration rate for this iteration explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate; // set learning rate for this iteration qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate; // clear tabu list tabuPolicy.ResetTabuList(); // reset agent's coordinates to the starting position agentCurrentX = agentStartX; agentCurrentY = agentStartY; // previous state and action int previousState = getState(agentCurrentX, agentCurrentY); int previousAction = qLearning.GetAction(previousState); // update agent's current position and get his reward var r = doAction(agentCurrentX, agentCurrentY, previousAction); double reward = r.Item1; agentCurrentX = r.Item2; agentCurrentY = r.Item3; // loop while ((!needToStop) && (iteration < learningIterations)) { // set exploration rate for this iteration explorationPolicy.Epsilon = explorationRate - ((double)iteration / learningIterations) * explorationRate; // set learning rate for this iteration qLearning.LearningRate = learningRate - ((double)iteration / learningIterations) * learningRate; // clear tabu list tabuPolicy.ResetTabuList(); // reset agent's coordinates to the starting position agentCurrentX = agentStartX; agentCurrentY = agentStartY; // steps performed by agent to get to the goal int steps = 0; while ((!needToStop) && ((agentCurrentX != agentStopX) || (agentCurrentY != agentStopY))) { steps++; // get agent's current state int currentState = getState(agentCurrentX, agentCurrentY); // get the action for this state int action = qLearning.GetAction(currentState); // update agent's current position and get his reward r = doAction(agentCurrentX, agentCurrentY, action); reward = r.Item1; agentCurrentX = r.Item2; agentCurrentY = r.Item3; // get agent's next state int nextState = getState(agentCurrentX, agentCurrentY); // do learning of the agent - update his Q-function qLearning.UpdateState(currentState, action, reward, nextState); // set tabu action tabuPolicy.SetTabuAction((action + 2) % 4, 1); } System.Diagnostics.Debug.WriteLine(steps); iteration++; } } // The end position for the robot will be (7, 4): int finalPosX = agentCurrentX; // 7 int finalPosY = agentCurrentY; // 4; #endregion Assert.AreEqual(7, finalPosX); Assert.AreEqual(4, finalPosY); }
private void Update() { if (!GameManager.instance.playerTurn) { return; } timer += Time.deltaTime; if (timer > 0.1f) { int horizontal = 0; int vertical = 0; int oneMove = 0; int chooseRandom = 0; double reward = -1; QLearning q = new QLearning(); //QAction fromTo; //QState state; string stateName; string stateNameNext; //string which_action; System.Random rnd = new System.Random(); double Qmax = 0; double q0 = 0; double q1 = 0; double q2 = 0; double q3 = 0; if (!QValue.ContainsKey(preX + ',' + preY)) { double[] defaults = new double[] { 0.0, 0.0, 0.0, 0.0 }; QValue.Add(preX + ',' + preY, defaults); } stateName = preX + "," + preY; chooseRandom = (int)rnd.Next(0, 10); if (chooseRandom <= 2) { oneMove = (int)rnd.Next(0, 4); if (oneMove == 0) { horizontal = 1; vertical = 0; cx = cx + 1; AttemptMove <Wall>(horizontal, vertical); } else if (oneMove == 1) { horizontal = -1; vertical = 0; cx = cx - 1; AttemptMove <Wall>(horizontal, vertical); } else if (oneMove == 2) { vertical = 1; horizontal = 0; cy = cy + 1; AttemptMove <Wall>(horizontal, vertical); } else { vertical = -1; horizontal = 0; cy = cy - 1; AttemptMove <Wall>(horizontal, vertical); } } else { q0 = QValue[stateName][0]; q1 = QValue[stateName][1]; q2 = QValue[stateName][2]; q3 = QValue[stateName][3]; if (q0 >= q1 && q0 >= q1 && q0 >= q2 && q0 >= q3) { horizontal = 1; vertical = 0; cx = cx + 1; AttemptMove <Wall>(horizontal, vertical); } else if (q1 >= q2 && q1 >= q3) { horizontal = -1; vertical = 0; cx = cx - 1; AttemptMove <Wall>(horizontal, vertical); } else if (q2 >= q3) { vertical = 1; horizontal = 0; cy = cy + 1; AttemptMove <Wall>(horizontal, vertical); } else { vertical = -1; horizontal = 0; cy = cy - 1; AttemptMove <Wall>(horizontal, vertical); } } currentX = cx.ToString(); currentY = cy.ToString(); if (!QValue.ContainsKey(currentX + ',' + currentY)) { double[] defaults = new double[] { 0.0, 0.0, 0.0, 0.0 }; QValue.Add(currentX + ',' + currentY, defaults); } // State Bedroom stateName = preX + "," + preY; stateNameNext = currentX + "," + currentY; if (stateNameNext == "7,7") { reward = 100; } else { reward = -1; } Qmax = QValue[stateNameNext].Max(); QValue[preX + ',' + preY][oneMove] = QValue[currentX + ',' + currentY][oneMove] + alpha * (reward + gamma * Qmax - QValue[currentX + ',' + currentY][oneMove]); preX = currentX; preY = currentY; horizontal = 0; vertical = 0; timer = 0.0f; } }
void Update() { // Detect if a game or algo are change if (selectedGame != oldGame || selectedAlgo != oldAlgo || selectedSokobanLevel != oldSokobanLevel) { try { btnComponent.onClick.RemoveListener(game.TaskOnClick); } catch { Debug.Log("No listener attach to the play button."); } // update old values oldGame = selectedGame; oldAlgo = selectedAlgo; oldSokobanLevel = selectedSokobanLevel; // Select the RL algorithm Base algoType; if (selectedAlgo == Algo.MarkovPolicy) { algoType = new MarkovPolicy(); } else if (selectedAlgo == Algo.MarkovValue) { algoType = new MarkovValue(); } else if (selectedAlgo == Algo.MonteCarlo) { algoType = new MonteCarlo(); } else if (selectedAlgo == Algo.SARSA) { algoType = new SARSA(); } else { algoType = new QLearning(); } // Select the game Type gameType; if (selectedGame == Game.GridWorld) { gameType = typeof(GridWorld.GridWorld <>); } else if (selectedGame == Game.TicTacToe) { gameType = typeof(TicTacToe.TicTacToe <>); } else { gameType = typeof(Sokoban.Sokoban <>); } // Create the game instance for the RL algorithm var type = gameType.MakeGenericType(algoType.GetType()); game = (IGame)Activator.CreateInstance(type); // Clear the screen GameObject[] gos = GameObject.FindGameObjectsWithTag("Tile"); foreach (GameObject go in gos) { var tile = go.GetComponent <SpriteRenderer>(); tile.color = new Color(0, 0, 0, 0); } // Hide the player goPlayer.transform.position = new Vector3(50, 50, 0); // Start the game var watch = System.Diagnostics.Stopwatch.StartNew(); game.Start(); watch.Stop(); elapsedMs = watch.ElapsedMilliseconds; // Debug to move the game step by step btnComponent.onClick.AddListener(game.TaskOnClick); } game.Update(); }