예제 #1
0
    void update_world()
    {
        if (snake_dead)
        {
            last_score = 0;

            // DEATH

            /*
             * Key t_key;
             * //t_key.s = getState();
             * //t_key.s = getState2();
             * t_key.s = getState3();
             * //t_key.a = snake_dir_index;
             * float n_Q = getQ(t_key);
             * float reward =  -1;
             * if (learn)
             * {
             *  float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q);
             *  setQ(last, newQ);
             * }
             */
            learn = false;

            if (!replay_on_death)
            {
                return;
            }
            else
            {
                // set the board back up, but don't initialize new things
                MakeWorld makeWorld = GetComponent <MakeWorld>();
                map   = makeWorld.createMap(false);
                map_w = makeWorld.map_w;
                map_h = makeWorld.map_h;
                apple = makeWorld.real_apple;

                for (int i = 0; i < map_w + 2; ++i)
                {
                    for (int j = 0; j < map_h + 2; ++j)
                    {
                        if (map[i, j] == 'h')
                        {
                            snake_pos.x = i;
                            snake_pos.y = j;
                        }
                    }
                }

                snake_dir_index = 1;

                // set up tail
                Vector2 tail_vec2 = controller.transform.position - (Vector3)directions[snake_dir_index];
                tailPos = new List <Vector2>();
                tailPos.Add(tail_vec2);

                // destroy all old tail objects
                foreach (GameObject g in tailObjs)
                {
                    Destroy(g);
                }

                GameObject tail = Instantiate(tailObj, tail_vec2, Quaternion.identity);
                tailObjs = new List <GameObject>();
                tailObjs.Add(tail);

                map[(int)tailPos[0].x, (int)tailPos[0].y] = 't';

                print("score: " + score);
                if (numberSessions > 0)
                {
                    numberSessions -= 1;
                }
                if ((numberSessions < 1) && (firstPause == true))
                {
                    EditorApplication.isPaused = true;
                    firstPause     = false;
                    numberSessions = 101;
                }
                //print("num moves: " + num_moves_this_game);
                num_moves_this_game = 0;
                score      = 0;
                snake_dead = false;
            }
        }


        if (controll_tye == ControllType.SimpleAI)
        {
            simple_AI_controlls();
        }
        else if (controll_tye == ControllType.RandomAI)
        {
            List <int> actions = new List <int>();

            //all possible actions from this state

            actions.Add((snake_dir_index + directions.Length - 1) % directions.Length);
            actions.Add(snake_dir_index);
            actions.Add((snake_dir_index + 1) % directions.Length);

            int rng_index = Random.Range(0, actions.Count);
            snake_dir_index = actions[rng_index];
        }
        else if (controll_tye == ControllType.Player)
        {
            if (isTurnLeft())
            {
                turn_left();
            }
            else if (isTurnRight())
            {
                turn_right();
            }
        }
        else if (controll_tye == ControllType.QLearn)
        {
            Key t_key;
            //t_key.s = getState();
            t_key.s = getState4();
            //t_key.a = getBestActionQ();
            float n_Q    = getQ(t_key);
            float reward = score - last_score;
            if (learn)
            {
                float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q);
                setQ(last, newQ);
            }
            else
            {
                learn = true;
            }
            Q_learning_controlls();
        }
        else if (controll_tye == ControllType.QLearnV2)
        {
            State4 cur_state = qLearnGetState(snake_pos, snake_dir_index);

            List <float> action_list;

            if (!qLearnV2Dict.ContainsKey(cur_state))
            {
                action_list = new List <float>();
                action_list.Add(0.0f);
                action_list.Add(0.0f);
                action_list.Add(0.0f);

                qLearnV2Dict.Add(cur_state, action_list);
            }
            else
            {
                qLearnV2Dict.TryGetValue(cur_state, out action_list);
            }

            // update previous q value
            float best_q     = float.MinValue;
            int   best_index = -1;
            for (int i = 0; i < 3; ++i)
            {
                if (action_list[i] > best_q)
                {
                    best_q     = action_list[i];
                    best_index = i;
                }
                else if (action_list[i] == best_q)
                {
                    // randomly pick one of the two
                    if (Random.Range(0.0f, 1.0f) < 0.5f)
                    {
                        best_q     = action_list[i];
                        best_index = i;
                    }
                }
            }
            if (!is_first_turn && qLearn_update_value)
            {
                //qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q;
                qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q + qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] / 2.0f;
                reward = 0.0f;
            }
            is_first_turn = false;

            // see if we should do something random or do the real thing
            if (Random.Range(0.0f, 1.0f) < qLearn_random_change)
            {
                // do a random action
                best_index = Random.Range(0, 3);
            }

            if (best_index == 0)
            {
                // turn left
                turn_left();
            }
            else if (best_index == 1)
            {
                // straight
            }
            else
            {
                // turn right
                turn_right();
            }

            qLearn_prev_action_index = best_index;
            qLearn_prev_state        = cur_state;
        }
        else if (controll_tye == ControllType.MediumAI)
        {
            State4 m_state = getState4();

            /*
             * if(m_state.angle_rounded < -2 && m_state.is_left_valid)
             * {
             *  turn_left();
             * } else if(m_state.angle_rounded > 2 && m_state.is_right_valid)
             * {
             *  turn_right();
             * } else if(m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid)
             * {
             *  // go forward
             * }
             * else
             * {
             *  simple_AI_controlls();
             * }
             */
            if (m_state.angle_rounded < -2 && m_state.is_right_valid)
            {
                turn_right();
            }
            else if (m_state.angle_rounded > 2 && m_state.is_left_valid)
            {
                turn_left();
            }
            else if (m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid)
            {
                // go forward
            }
            else
            {
                simple_AI_controlls();
            }
        }


        snake_old_pos = snake_pos;
        //snake_pos += snake_dir;
        snake_pos += directions[snake_dir_index];

        check_snake_pos();

        // update board
        map[(int)snake_pos.x, (int)snake_pos.y] = 'h';

        // update tail
        map[(int)tailPos[tailPos.Count - 1].x, (int)tailPos[tailPos.Count - 1].y] = 'e';
        for (int i = tailPos.Count - 1; i >= 1; --i)
        {
            tailPos[i] = tailPos[i - 1];
            tailObjs[i].transform.position            = tailObjs[i - 1].transform.position;
            map[(int)tailPos[i].x, (int)tailPos[i].y] = 't';
        }

        tailPos[0] = snake_old_pos;
        tailObjs[0].transform.position            = snake_old_pos;
        map[(int)tailPos[0].x, (int)tailPos[0].y] = 't';

        if (addTailPiece)
        {
            tailPos.Add(oldAppleLoc);

            GameObject tail = Instantiate(tailObj, oldAppleLoc, Quaternion.identity);
            tailObjs.Add(tail);

            map[(int)oldAppleLoc.x, (int)oldAppleLoc.y] = 't';
        }



        controller.transform.position = snake_pos;
        num_moves_this_game++;

        // debug: print all indexes with h
        //int count = 0;
        //for (int j = 0; j < map_h; ++j)
        //{
        //    for (int i = 0; i < map_w; ++i)
        //    {
        //        if (map[i, j] == 't')
        //        {
        //            print("t at : " + i + " , " + j);
        //            count++;
        //        }
        //    }
        //}
        //print("count: " + count);
    }
예제 #2
0
    // Use this for initialization
    void Start()
    {
        // set up the world
        MakeWorld makeWorld = GetComponent <MakeWorld>();

        map   = makeWorld.createMap(true);
        map_w = makeWorld.map_w;
        map_h = makeWorld.map_h;
        apple = makeWorld.real_apple;

        controller = FindObjectOfType <SnakeController>();
        //snake_dir.x = 1;
        //snake_dir.y = 0;

        QValueStore = new Dictionary <Key, float>();

        for (int i = 0; i < map_w + 2; ++i)
        {
            for (int j = 0; j < map_h + 2; ++j)
            {
                if (map[i, j] == 'h')
                {
                    snake_pos.x = i;
                    snake_pos.y = j;
                }
            }
        }
        prev_dist = Vector2.Distance(snake_pos, (Vector2)apple.transform.position);
        // set up tail
        Vector2 tail_vec2 = controller.transform.position - (Vector3)directions[snake_dir_index];//(Vector3)snake_dir;

        tailPos = new List <Vector2>();
        tailPos.Add(tail_vec2);

        GameObject tail = Instantiate(tailObj, tail_vec2, Quaternion.identity);

        tailObjs = new List <GameObject>();
        tailObjs.Add(tail);

        if (map[(int)tailPos[0].x, (int)tailPos[0].y] == 'a')
        {
            // find a new random place for apple
            map[(int)tailPos[0].x, (int)tailPos[0].y] = 't';

            int a_i = Random.Range(1, map_w);
            int a_j = Random.Range(1, map_h);

            while (map[a_i, a_j] != 'e')
            {
                a_i = Random.Range(1, map_w);
                a_j = Random.Range(1, map_h);
            }

            map[a_i, a_j] = 'a';

            // move apple in world
            apple.transform.position = new Vector3(a_i, a_j);
        }
        map[(int)tailPos[0].x, (int)tailPos[0].y] = 't';

        qLearnV2Dict = new Dictionary <State4, List <float> >();
    }