Ejemplo n.º 1
0
        public String getPolicyAsString()
        {
            String build = "";

            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < width; x++)
                {
                    String num = "" + ((int)get(x, y).estimatedValue);

                    String buf  = " ";
                    int    size = 4 - num.Count();

                    for (int i = 0; i < size; i++)
                    {
                        buf += " ";
                    }

                    build += buf + num + " ";
                    MarkovState state = get(x, y);
                    state.calculatePolicy();
                    if (state.policy != null)
                    {
                        build += state.policy.getName() + " ";
                    }
                }
                build += "\n";
            }
            return(build);
        }
Ejemplo n.º 2
0
        private void createActions(int x, int y)
        {
            MarkovState state = get(x, y);

            state.addAction(createAction(Direction.up, x, y));
            state.addAction(createAction(Direction.down, x, y));
            state.addAction(createAction(Direction.left, x, y));
            state.addAction(createAction(Direction.right, x, y));
        }
Ejemplo n.º 3
0
        private bool runEpoch()
        {
            bool        policyHasChanged = false;
            int         usedActions      = 0;
            MarkovState current          = problem.getStartState();

            while (!current.isTerminal && usedActions < maxActionsInEpoch)
            {
                double oldEstimatedValue = current.estimatedValue;

                current.calculatePolicy();
                double bestActionValue = 0;
                foreach (ActionResult result in current.policy.getPossibleResults())
                {
                    bestActionValue += result.state.estimatedValue * result.probability;
                }

                double newEstimatedValue = oldEstimatedValue + learningRate * (current.value + gamma * bestActionValue - oldEstimatedValue);
                current.nextEstimatedValue = newEstimatedValue;

                if (rand.NextDouble() > epsilon)
                {
                    // Explore over Exploit

                    int actionIndex = rand.Next(current.actions.Count());

                    current = executer.getResult(current.actions[actionIndex]);
                }
                else
                {
                    // Exploit over Explore
                    current = executer.getResult(current.policy);
                }

                if (oldEstimatedValue != newEstimatedValue)
                {
                    policyHasChanged = true;
                }

                usedActions++;
            }

            problem.update();

            Console.WriteLine(problem.ToString());

            return(policyHasChanged);
        }
Ejemplo n.º 4
0
        public String getOptimalPath()
        {
            Boolean[,] path = new Boolean[height, width];
            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < width; x++)
                {
                    path[y, x] = false;
                }
            }

            MarkovState currentState = getStartState();

            while (!currentState.isTerminal)
            {
            }

            String build = "";

            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < width; x++)
                {
                    String buf = " ";
                    if (path[y, x])
                    {
                        build += "  O";
                    }
                    else
                    {
                    }

                    build += buf + ((int)get(x, y).estimatedValue) + " ";
                }
                build += "\n";
            }
            return(build);
        }
Ejemplo n.º 5
0
        private void intializeGrid()
        {
            this.states = new List <MarkovState>();
            this.grid   = new MarkovState[height, width];
            int index = 0;

            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < width; x++)
                {
                    grid[y, x]       = new MarkovState(index, 0);
                    grid[y, x].value = -1;
                    states.Add(grid[y, x]);
                    index++;
                }
            }

            grid[height / 2 - 1, width / 2 - 1].isTerminal = true;
            grid[height / 2 - 1, width / 2 - 1].value      = -100;
            grid[height - 1, width - 1].isTerminal         = true;
            grid[height - 1, width - 1].value = 100;

            intializeActions();
        }
Ejemplo n.º 6
0
 public ActionResult(MarkovState markovState)
 {
     this.state = markovState;
 }