State4 getState4() { State4 s = new State4(); Vector2 left_pos = snake_pos + directions[(snake_dir_index + directions.Length - 1) % directions.Length]; s.is_left_valid = map[(int)left_pos.x, (int)left_pos.y] == 'e' || map[(int)left_pos.x, (int)left_pos.y] == 'a'; Vector2 fwd_pos = snake_pos + directions[snake_dir_index]; try { s.is_fwd_valid = map[(int)fwd_pos.x, (int)fwd_pos.y] == 'e' || map[(int)fwd_pos.x, (int)fwd_pos.y] == 'a'; } catch { s.is_fwd_valid = false; } Vector2 right_pos = snake_pos + directions[(snake_dir_index + 1) % directions.Length]; s.is_right_valid = map[(int)right_pos.x, (int)right_pos.y] == 'e' || map[(int)right_pos.x, (int)right_pos.y] == 'a'; Vector2 appleVec = (Vector2)apple.transform.position - snake_pos; s.angle_rounded = (int)(Vector2.Angle(directions[snake_dir_index], appleVec.normalized) / 15); if (Vector2.Dot(appleVec, directions[(snake_dir_index + directions.Length - 1) % directions.Length]) < 0) { s.angle_rounded *= -1; } return(s); }
/// <summary> /// Initializes a new instance of ObjectBinder class. /// </summary> public ObjectBinder() { state0 = new State0(this); state1 = new State1(this); state2 = new State2(this); state3 = new State3(this); state4 = new State4(this); state5 = new State5(this); }
int getBestActionQ() { State4 s = getState4(); //State2 s = getState2(); List <int> actions = new List <int>(); //all possible actions from this state actions.Add((snake_dir_index + directions.Length - 1) % directions.Length); actions.Add(snake_dir_index); actions.Add((snake_dir_index + 1) % directions.Length); int best = 0; float bestQval = float.NegativeInfinity; foreach (int act in actions) { Key k; k.s = s; //k.a = act; float newVal = getQ(k); if (newVal > bestQval) { best = act; bestQval = newVal; } else if (newVal == bestQval) { int r = Random.Range(0, 2); if (r > 0) { best = act; bestQval = newVal; } } } Key k_prime; k_prime.s = s; //k_prime.a = best; last = k_prime; if (print_debug) { print("best q: " + bestQval); } return(best); }
void update_world() { if (snake_dead) { last_score = 0; // DEATH /* * Key t_key; * //t_key.s = getState(); * //t_key.s = getState2(); * t_key.s = getState3(); * //t_key.a = snake_dir_index; * float n_Q = getQ(t_key); * float reward = -1; * if (learn) * { * float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q); * setQ(last, newQ); * } */ learn = false; if (!replay_on_death) { return; } else { // set the board back up, but don't initialize new things MakeWorld makeWorld = GetComponent <MakeWorld>(); map = makeWorld.createMap(false); map_w = makeWorld.map_w; map_h = makeWorld.map_h; apple = makeWorld.real_apple; for (int i = 0; i < map_w + 2; ++i) { for (int j = 0; j < map_h + 2; ++j) { if (map[i, j] == 'h') { snake_pos.x = i; snake_pos.y = j; } } } snake_dir_index = 1; // set up tail Vector2 tail_vec2 = controller.transform.position - (Vector3)directions[snake_dir_index]; tailPos = new List <Vector2>(); tailPos.Add(tail_vec2); // destroy all old tail objects foreach (GameObject g in tailObjs) { Destroy(g); } GameObject tail = Instantiate(tailObj, tail_vec2, Quaternion.identity); tailObjs = new List <GameObject>(); tailObjs.Add(tail); map[(int)tailPos[0].x, (int)tailPos[0].y] = 't'; print("score: " + score); if (numberSessions > 0) { numberSessions -= 1; } if ((numberSessions < 1) && (firstPause == true)) { EditorApplication.isPaused = true; firstPause = false; numberSessions = 101; } //print("num moves: " + num_moves_this_game); num_moves_this_game = 0; score = 0; snake_dead = false; } } if (controll_tye == ControllType.SimpleAI) { simple_AI_controlls(); } else if (controll_tye == ControllType.RandomAI) { List <int> actions = new List <int>(); //all possible actions from this state actions.Add((snake_dir_index + directions.Length - 1) % directions.Length); actions.Add(snake_dir_index); actions.Add((snake_dir_index + 1) % directions.Length); int rng_index = Random.Range(0, actions.Count); snake_dir_index = actions[rng_index]; } else if (controll_tye == ControllType.Player) { if (isTurnLeft()) { turn_left(); } else if (isTurnRight()) { turn_right(); } } else if (controll_tye == ControllType.QLearn) { Key t_key; //t_key.s = getState(); t_key.s = getState4(); //t_key.a = getBestActionQ(); float n_Q = getQ(t_key); float reward = score - last_score; if (learn) { float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q); setQ(last, newQ); } else { learn = true; } Q_learning_controlls(); } else if (controll_tye == ControllType.QLearnV2) { State4 cur_state = qLearnGetState(snake_pos, snake_dir_index); List <float> action_list; if (!qLearnV2Dict.ContainsKey(cur_state)) { action_list = new List <float>(); action_list.Add(0.0f); action_list.Add(0.0f); action_list.Add(0.0f); qLearnV2Dict.Add(cur_state, action_list); } else { qLearnV2Dict.TryGetValue(cur_state, out action_list); } // update previous q value float best_q = float.MinValue; int best_index = -1; for (int i = 0; i < 3; ++i) { if (action_list[i] > best_q) { best_q = action_list[i]; best_index = i; } else if (action_list[i] == best_q) { // randomly pick one of the two if (Random.Range(0.0f, 1.0f) < 0.5f) { best_q = action_list[i]; best_index = i; } } } if (!is_first_turn && qLearn_update_value) { //qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q; qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q + qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] / 2.0f; reward = 0.0f; } is_first_turn = false; // see if we should do something random or do the real thing if (Random.Range(0.0f, 1.0f) < qLearn_random_change) { // do a random action best_index = Random.Range(0, 3); } if (best_index == 0) { // turn left turn_left(); } else if (best_index == 1) { // straight } else { // turn right turn_right(); } qLearn_prev_action_index = best_index; qLearn_prev_state = cur_state; } else if (controll_tye == ControllType.MediumAI) { State4 m_state = getState4(); /* * if(m_state.angle_rounded < -2 && m_state.is_left_valid) * { * turn_left(); * } else if(m_state.angle_rounded > 2 && m_state.is_right_valid) * { * turn_right(); * } else if(m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid) * { * // go forward * } * else * { * simple_AI_controlls(); * } */ if (m_state.angle_rounded < -2 && m_state.is_right_valid) { turn_right(); } else if (m_state.angle_rounded > 2 && m_state.is_left_valid) { turn_left(); } else if (m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid) { // go forward } else { simple_AI_controlls(); } } snake_old_pos = snake_pos; //snake_pos += snake_dir; snake_pos += directions[snake_dir_index]; check_snake_pos(); // update board map[(int)snake_pos.x, (int)snake_pos.y] = 'h'; // update tail map[(int)tailPos[tailPos.Count - 1].x, (int)tailPos[tailPos.Count - 1].y] = 'e'; for (int i = tailPos.Count - 1; i >= 1; --i) { tailPos[i] = tailPos[i - 1]; tailObjs[i].transform.position = tailObjs[i - 1].transform.position; map[(int)tailPos[i].x, (int)tailPos[i].y] = 't'; } tailPos[0] = snake_old_pos; tailObjs[0].transform.position = snake_old_pos; map[(int)tailPos[0].x, (int)tailPos[0].y] = 't'; if (addTailPiece) { tailPos.Add(oldAppleLoc); GameObject tail = Instantiate(tailObj, oldAppleLoc, Quaternion.identity); tailObjs.Add(tail); map[(int)oldAppleLoc.x, (int)oldAppleLoc.y] = 't'; } controller.transform.position = snake_pos; num_moves_this_game++; // debug: print all indexes with h //int count = 0; //for (int j = 0; j < map_h; ++j) //{ // for (int i = 0; i < map_w; ++i) // { // if (map[i, j] == 't') // { // print("t at : " + i + " , " + j); // count++; // } // } //} //print("count: " + count); }