void update_world() { if (snake_dead) { last_score = 0; // DEATH /* * Key t_key; * //t_key.s = getState(); * //t_key.s = getState2(); * t_key.s = getState3(); * //t_key.a = snake_dir_index; * float n_Q = getQ(t_key); * float reward = -1; * if (learn) * { * float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q); * setQ(last, newQ); * } */ learn = false; if (!replay_on_death) { return; } else { // set the board back up, but don't initialize new things MakeWorld makeWorld = GetComponent <MakeWorld>(); map = makeWorld.createMap(false); map_w = makeWorld.map_w; map_h = makeWorld.map_h; apple = makeWorld.real_apple; for (int i = 0; i < map_w + 2; ++i) { for (int j = 0; j < map_h + 2; ++j) { if (map[i, j] == 'h') { snake_pos.x = i; snake_pos.y = j; } } } snake_dir_index = 1; // set up tail Vector2 tail_vec2 = controller.transform.position - (Vector3)directions[snake_dir_index]; tailPos = new List <Vector2>(); tailPos.Add(tail_vec2); // destroy all old tail objects foreach (GameObject g in tailObjs) { Destroy(g); } GameObject tail = Instantiate(tailObj, tail_vec2, Quaternion.identity); tailObjs = new List <GameObject>(); tailObjs.Add(tail); map[(int)tailPos[0].x, (int)tailPos[0].y] = 't'; print("score: " + score); if (numberSessions > 0) { numberSessions -= 1; } if ((numberSessions < 1) && (firstPause == true)) { EditorApplication.isPaused = true; firstPause = false; numberSessions = 101; } //print("num moves: " + num_moves_this_game); num_moves_this_game = 0; score = 0; snake_dead = false; } } if (controll_tye == ControllType.SimpleAI) { simple_AI_controlls(); } else if (controll_tye == ControllType.RandomAI) { List <int> actions = new List <int>(); //all possible actions from this state actions.Add((snake_dir_index + directions.Length - 1) % directions.Length); actions.Add(snake_dir_index); actions.Add((snake_dir_index + 1) % directions.Length); int rng_index = Random.Range(0, actions.Count); snake_dir_index = actions[rng_index]; } else if (controll_tye == ControllType.Player) { if (isTurnLeft()) { turn_left(); } else if (isTurnRight()) { turn_right(); } } else if (controll_tye == ControllType.QLearn) { Key t_key; //t_key.s = getState(); t_key.s = getState4(); //t_key.a = getBestActionQ(); float n_Q = getQ(t_key); float reward = score - last_score; if (learn) { float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q); setQ(last, newQ); } else { learn = true; } Q_learning_controlls(); } else if (controll_tye == ControllType.QLearnV2) { State4 cur_state = qLearnGetState(snake_pos, snake_dir_index); List <float> action_list; if (!qLearnV2Dict.ContainsKey(cur_state)) { action_list = new List <float>(); action_list.Add(0.0f); action_list.Add(0.0f); action_list.Add(0.0f); qLearnV2Dict.Add(cur_state, action_list); } else { qLearnV2Dict.TryGetValue(cur_state, out action_list); } // update previous q value float best_q = float.MinValue; int best_index = -1; for (int i = 0; i < 3; ++i) { if (action_list[i] > best_q) { best_q = action_list[i]; best_index = i; } else if (action_list[i] == best_q) { // randomly pick one of the two if (Random.Range(0.0f, 1.0f) < 0.5f) { best_q = action_list[i]; best_index = i; } } } if (!is_first_turn && qLearn_update_value) { //qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q; qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q + qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] / 2.0f; reward = 0.0f; } is_first_turn = false; // see if we should do something random or do the real thing if (Random.Range(0.0f, 1.0f) < qLearn_random_change) { // do a random action best_index = Random.Range(0, 3); } if (best_index == 0) { // turn left turn_left(); } else if (best_index == 1) { // straight } else { // turn right turn_right(); } qLearn_prev_action_index = best_index; qLearn_prev_state = cur_state; } else if (controll_tye == ControllType.MediumAI) { State4 m_state = getState4(); /* * if(m_state.angle_rounded < -2 && m_state.is_left_valid) * { * turn_left(); * } else if(m_state.angle_rounded > 2 && m_state.is_right_valid) * { * turn_right(); * } else if(m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid) * { * // go forward * } * else * { * simple_AI_controlls(); * } */ if (m_state.angle_rounded < -2 && m_state.is_right_valid) { turn_right(); } else if (m_state.angle_rounded > 2 && m_state.is_left_valid) { turn_left(); } else if (m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid) { // go forward } else { simple_AI_controlls(); } } snake_old_pos = snake_pos; //snake_pos += snake_dir; snake_pos += directions[snake_dir_index]; check_snake_pos(); // update board map[(int)snake_pos.x, (int)snake_pos.y] = 'h'; // update tail map[(int)tailPos[tailPos.Count - 1].x, (int)tailPos[tailPos.Count - 1].y] = 'e'; for (int i = tailPos.Count - 1; i >= 1; --i) { tailPos[i] = tailPos[i - 1]; tailObjs[i].transform.position = tailObjs[i - 1].transform.position; map[(int)tailPos[i].x, (int)tailPos[i].y] = 't'; } tailPos[0] = snake_old_pos; tailObjs[0].transform.position = snake_old_pos; map[(int)tailPos[0].x, (int)tailPos[0].y] = 't'; if (addTailPiece) { tailPos.Add(oldAppleLoc); GameObject tail = Instantiate(tailObj, oldAppleLoc, Quaternion.identity); tailObjs.Add(tail); map[(int)oldAppleLoc.x, (int)oldAppleLoc.y] = 't'; } controller.transform.position = snake_pos; num_moves_this_game++; // debug: print all indexes with h //int count = 0; //for (int j = 0; j < map_h; ++j) //{ // for (int i = 0; i < map_w; ++i) // { // if (map[i, j] == 't') // { // print("t at : " + i + " , " + j); // count++; // } // } //} //print("count: " + count); }
// Use this for initialization void Start() { // set up the world MakeWorld makeWorld = GetComponent <MakeWorld>(); map = makeWorld.createMap(true); map_w = makeWorld.map_w; map_h = makeWorld.map_h; apple = makeWorld.real_apple; controller = FindObjectOfType <SnakeController>(); //snake_dir.x = 1; //snake_dir.y = 0; QValueStore = new Dictionary <Key, float>(); for (int i = 0; i < map_w + 2; ++i) { for (int j = 0; j < map_h + 2; ++j) { if (map[i, j] == 'h') { snake_pos.x = i; snake_pos.y = j; } } } prev_dist = Vector2.Distance(snake_pos, (Vector2)apple.transform.position); // set up tail Vector2 tail_vec2 = controller.transform.position - (Vector3)directions[snake_dir_index];//(Vector3)snake_dir; tailPos = new List <Vector2>(); tailPos.Add(tail_vec2); GameObject tail = Instantiate(tailObj, tail_vec2, Quaternion.identity); tailObjs = new List <GameObject>(); tailObjs.Add(tail); if (map[(int)tailPos[0].x, (int)tailPos[0].y] == 'a') { // find a new random place for apple map[(int)tailPos[0].x, (int)tailPos[0].y] = 't'; int a_i = Random.Range(1, map_w); int a_j = Random.Range(1, map_h); while (map[a_i, a_j] != 'e') { a_i = Random.Range(1, map_w); a_j = Random.Range(1, map_h); } map[a_i, a_j] = 'a'; // move apple in world apple.transform.position = new Vector3(a_i, a_j); } map[(int)tailPos[0].x, (int)tailPos[0].y] = 't'; qLearnV2Dict = new Dictionary <State4, List <float> >(); }