private const double GAMMA = 0.99; // The value of gamma to use in the Q-Learning rule public QLearning(GridWorld gridWorld, Cell startingCell, Cell rewardCell) { this.currentCell = startingCell; this.rewardCell = rewardCell; this.gridWorld = gridWorld; this.qTable = new double[gridWorld.GetCells().GetLength(0), gridWorld.GetCells().GetLength(1), NUMBER_OF_MOVES]; }
private bool training = true; // boolean flag to indicate training versus testing #endregion Fields #region Constructors public QLearning(GridWorld gridWorld, Cell startingCell, Cell rewardCell) { this.currentCell = startingCell; this.rewardCell = rewardCell; this.gridWorld = gridWorld; this.qTable = new double[gridWorld.GetCells().GetLength(0),gridWorld.GetCells().GetLength(1),NUMBER_OF_MOVES]; }
/// <summary> /// Runs a tick of the A* search /// </summary> /// <param name="sender"></param> /// <param name="e"></param> public void Run(object sender, EventArgs e) { System.Console.Out.WriteLine("Running A*; Step Count: " + stepCount++); while (true) { if (unvisitedCells.Count == 0) { return; } currentCell = unvisitedCells.Last.Value; unvisitedCells.RemoveLast(); if (currentCell.HasBeenSearched()) { continue; } break; } currentCell.SetHasBeenSearched(true); visitedCells.AddLast(currentCell); if (currentCell.IsRewardCell()) { System.Console.Out.WriteLine("Found the reward"); return; } // Check if cell is a neighbor of the currentCell // If so, set the cost and add to unvisitedCells in ProcessNeighbor() if (gridWorld.CanMove(Direction.UP, currentCell.GetRowIndex(), currentCell.GetColumnIndex())) { ProcessNeighbor(gridWorld.GetCells()[currentCell.GetRowIndex() - 1, currentCell.GetColumnIndex()]); } if (gridWorld.CanMove(Direction.DOWN, currentCell.GetRowIndex(), currentCell.GetColumnIndex())) { ProcessNeighbor(gridWorld.GetCells()[currentCell.GetRowIndex() + 1, currentCell.GetColumnIndex()]); } if (gridWorld.CanMove(Direction.LEFT, currentCell.GetRowIndex(), currentCell.GetColumnIndex())) { ProcessNeighbor(gridWorld.GetCells()[currentCell.GetRowIndex(), currentCell.GetColumnIndex() - 1]); } if (gridWorld.CanMove(Direction.RIGHT, currentCell.GetRowIndex(), currentCell.GetColumnIndex())) { ProcessNeighbor(gridWorld.GetCells()[currentCell.GetRowIndex(), currentCell.GetColumnIndex() + 1]); } }
/// <summary> /// Restarts an episode of Q-Learning and moves the current position to the provided starting position if one is provided. /// </summary> /// <param name="startingPosition">The starting position for the episode. If null is provided a random starting position will be determined.</param> public void RestartEpisode(Cell startingPosition) { // Reset the step count for the next episode stepCount = 0; // Increment the episode counter episodeCount++; List <Cell> availableCells = new List <Cell>(); // Place the agent at a random tile (not an obstacle) foreach (Cell cell in gridWorld.GetCells()) { if (!cell.IsObstacle() && !cell.IsRewardCell()) { availableCells.Add(cell); } } if (startingPosition == null) { int nextCellIndex = (new Random()).Next(0, availableCells.Count - 1); currentCell = availableCells[nextCellIndex]; } else { currentCell = startingPosition; } }