public String getPolicyAsString() { String build = ""; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { String num = "" + ((int)get(x, y).estimatedValue); String buf = " "; int size = 4 - num.Count(); for (int i = 0; i < size; i++) { buf += " "; } build += buf + num + " "; MarkovState state = get(x, y); state.calculatePolicy(); if (state.policy != null) { build += state.policy.getName() + " "; } } build += "\n"; } return(build); }
private void createActions(int x, int y) { MarkovState state = get(x, y); state.addAction(createAction(Direction.up, x, y)); state.addAction(createAction(Direction.down, x, y)); state.addAction(createAction(Direction.left, x, y)); state.addAction(createAction(Direction.right, x, y)); }
private bool runEpoch() { bool policyHasChanged = false; int usedActions = 0; MarkovState current = problem.getStartState(); while (!current.isTerminal && usedActions < maxActionsInEpoch) { double oldEstimatedValue = current.estimatedValue; current.calculatePolicy(); double bestActionValue = 0; foreach (ActionResult result in current.policy.getPossibleResults()) { bestActionValue += result.state.estimatedValue * result.probability; } double newEstimatedValue = oldEstimatedValue + learningRate * (current.value + gamma * bestActionValue - oldEstimatedValue); current.nextEstimatedValue = newEstimatedValue; if (rand.NextDouble() > epsilon) { // Explore over Exploit int actionIndex = rand.Next(current.actions.Count()); current = executer.getResult(current.actions[actionIndex]); } else { // Exploit over Explore current = executer.getResult(current.policy); } if (oldEstimatedValue != newEstimatedValue) { policyHasChanged = true; } usedActions++; } problem.update(); Console.WriteLine(problem.ToString()); return(policyHasChanged); }
public String getOptimalPath() { Boolean[,] path = new Boolean[height, width]; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { path[y, x] = false; } } MarkovState currentState = getStartState(); while (!currentState.isTerminal) { } String build = ""; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { String buf = " "; if (path[y, x]) { build += " O"; } else { } build += buf + ((int)get(x, y).estimatedValue) + " "; } build += "\n"; } return(build); }
private void intializeGrid() { this.states = new List <MarkovState>(); this.grid = new MarkovState[height, width]; int index = 0; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { grid[y, x] = new MarkovState(index, 0); grid[y, x].value = -1; states.Add(grid[y, x]); index++; } } grid[height / 2 - 1, width / 2 - 1].isTerminal = true; grid[height / 2 - 1, width / 2 - 1].value = -100; grid[height - 1, width - 1].isTerminal = true; grid[height - 1, width - 1].value = 100; intializeActions(); }
public ActionResult(MarkovState markovState) { this.state = markovState; }