Esempio n. 1
0
        /// <summary>
        /// Runs a tick of the Q-Learning search
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        public void Run(object sender, EventArgs e)
        {
            // TODO: Implement
            System.Console.Out.WriteLine("Running Q-Learning; Step Count: " + stepCount++);

            if (stepCount > MAX_SEARCH_STEPS)
            {
                RestartEpisode(null);
            }

            if (episodeCount > NUMBER_OF_EPISODES)
            {
                training = false;
            }

            // Check to see if we found the reward
            if (currentCell.IsRewardCell())
            {
                System.Console.Out.WriteLine("Found the reward");
                return;
            }

            Direction bestDirection = GetBestDirection(currentCell);

            // In this stochastic implementation, use the transition function to get the actual direction to move
            Direction actualDirection = Transition(currentCell, bestDirection);

            currentCell = gridWorld.GetCell(currentCell, actualDirection);
            currentCell.SetHasBeenSearched(true);

            if (training)
            {
                // Update the Q-Table with the current cell
                foreach (Direction direction in Enum.GetValues(typeof(Direction)))
                {
                    if (direction != Direction.NONE &&
                        gridWorld.CanMove(direction, currentCell.GetRowIndex(), currentCell.GetColumnIndex()))
                    {
                        UpdateQValue(currentCell, direction);
                    }
                }
            }
        }