State4 getState4()
    {
        State4 s = new State4();

        Vector2 left_pos = snake_pos + directions[(snake_dir_index + directions.Length - 1) % directions.Length];

        s.is_left_valid = map[(int)left_pos.x, (int)left_pos.y] == 'e' || map[(int)left_pos.x, (int)left_pos.y] == 'a';

        Vector2 fwd_pos = snake_pos + directions[snake_dir_index];

        try
        {
            s.is_fwd_valid = map[(int)fwd_pos.x, (int)fwd_pos.y] == 'e' || map[(int)fwd_pos.x, (int)fwd_pos.y] == 'a';
        }
        catch
        {
            s.is_fwd_valid = false;
        }


        Vector2 right_pos = snake_pos + directions[(snake_dir_index + 1) % directions.Length];

        s.is_right_valid = map[(int)right_pos.x, (int)right_pos.y] == 'e' || map[(int)right_pos.x, (int)right_pos.y] == 'a';

        Vector2 appleVec = (Vector2)apple.transform.position - snake_pos;

        s.angle_rounded = (int)(Vector2.Angle(directions[snake_dir_index], appleVec.normalized) / 15);

        if (Vector2.Dot(appleVec, directions[(snake_dir_index + directions.Length - 1) % directions.Length]) < 0)
        {
            s.angle_rounded *= -1;
        }

        return(s);
    }
Exemple #2
0
 /// <summary>
 /// Initializes a new instance of ObjectBinder class.
 /// </summary>
 public ObjectBinder()
 {
     state0 = new State0(this);
     state1 = new State1(this);
     state2 = new State2(this);
     state3 = new State3(this);
     state4 = new State4(this);
     state5 = new State5(this);
 }
    int getBestActionQ()
    {
        State4 s = getState4();
        //State2 s = getState2();
        List <int> actions = new List <int>();

        //all possible actions from this state
        actions.Add((snake_dir_index + directions.Length - 1) % directions.Length);
        actions.Add(snake_dir_index);
        actions.Add((snake_dir_index + 1) % directions.Length);

        int   best     = 0;
        float bestQval = float.NegativeInfinity;

        foreach (int act in actions)
        {
            Key k;
            k.s = s;
            //k.a = act;
            float newVal = getQ(k);
            if (newVal > bestQval)
            {
                best     = act;
                bestQval = newVal;
            }
            else if (newVal == bestQval)
            {
                int r = Random.Range(0, 2);
                if (r > 0)
                {
                    best     = act;
                    bestQval = newVal;
                }
            }
        }
        Key k_prime;

        k_prime.s = s;
        //k_prime.a = best;
        last = k_prime;
        if (print_debug)
        {
            print("best q: " + bestQval);
        }
        return(best);
    }
    void update_world()
    {
        if (snake_dead)
        {
            last_score = 0;

            // DEATH

            /*
             * Key t_key;
             * //t_key.s = getState();
             * //t_key.s = getState2();
             * t_key.s = getState3();
             * //t_key.a = snake_dir_index;
             * float n_Q = getQ(t_key);
             * float reward =  -1;
             * if (learn)
             * {
             *  float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q);
             *  setQ(last, newQ);
             * }
             */
            learn = false;

            if (!replay_on_death)
            {
                return;
            }
            else
            {
                // set the board back up, but don't initialize new things
                MakeWorld makeWorld = GetComponent <MakeWorld>();
                map   = makeWorld.createMap(false);
                map_w = makeWorld.map_w;
                map_h = makeWorld.map_h;
                apple = makeWorld.real_apple;

                for (int i = 0; i < map_w + 2; ++i)
                {
                    for (int j = 0; j < map_h + 2; ++j)
                    {
                        if (map[i, j] == 'h')
                        {
                            snake_pos.x = i;
                            snake_pos.y = j;
                        }
                    }
                }

                snake_dir_index = 1;

                // set up tail
                Vector2 tail_vec2 = controller.transform.position - (Vector3)directions[snake_dir_index];
                tailPos = new List <Vector2>();
                tailPos.Add(tail_vec2);

                // destroy all old tail objects
                foreach (GameObject g in tailObjs)
                {
                    Destroy(g);
                }

                GameObject tail = Instantiate(tailObj, tail_vec2, Quaternion.identity);
                tailObjs = new List <GameObject>();
                tailObjs.Add(tail);

                map[(int)tailPos[0].x, (int)tailPos[0].y] = 't';

                print("score: " + score);
                if (numberSessions > 0)
                {
                    numberSessions -= 1;
                }
                if ((numberSessions < 1) && (firstPause == true))
                {
                    EditorApplication.isPaused = true;
                    firstPause     = false;
                    numberSessions = 101;
                }
                //print("num moves: " + num_moves_this_game);
                num_moves_this_game = 0;
                score      = 0;
                snake_dead = false;
            }
        }


        if (controll_tye == ControllType.SimpleAI)
        {
            simple_AI_controlls();
        }
        else if (controll_tye == ControllType.RandomAI)
        {
            List <int> actions = new List <int>();

            //all possible actions from this state

            actions.Add((snake_dir_index + directions.Length - 1) % directions.Length);
            actions.Add(snake_dir_index);
            actions.Add((snake_dir_index + 1) % directions.Length);

            int rng_index = Random.Range(0, actions.Count);
            snake_dir_index = actions[rng_index];
        }
        else if (controll_tye == ControllType.Player)
        {
            if (isTurnLeft())
            {
                turn_left();
            }
            else if (isTurnRight())
            {
                turn_right();
            }
        }
        else if (controll_tye == ControllType.QLearn)
        {
            Key t_key;
            //t_key.s = getState();
            t_key.s = getState4();
            //t_key.a = getBestActionQ();
            float n_Q    = getQ(t_key);
            float reward = score - last_score;
            if (learn)
            {
                float newQ = (1 - alpha) * oldQ + alpha * (reward + gamma * n_Q);
                setQ(last, newQ);
            }
            else
            {
                learn = true;
            }
            Q_learning_controlls();
        }
        else if (controll_tye == ControllType.QLearnV2)
        {
            State4 cur_state = qLearnGetState(snake_pos, snake_dir_index);

            List <float> action_list;

            if (!qLearnV2Dict.ContainsKey(cur_state))
            {
                action_list = new List <float>();
                action_list.Add(0.0f);
                action_list.Add(0.0f);
                action_list.Add(0.0f);

                qLearnV2Dict.Add(cur_state, action_list);
            }
            else
            {
                qLearnV2Dict.TryGetValue(cur_state, out action_list);
            }

            // update previous q value
            float best_q     = float.MinValue;
            int   best_index = -1;
            for (int i = 0; i < 3; ++i)
            {
                if (action_list[i] > best_q)
                {
                    best_q     = action_list[i];
                    best_index = i;
                }
                else if (action_list[i] == best_q)
                {
                    // randomly pick one of the two
                    if (Random.Range(0.0f, 1.0f) < 0.5f)
                    {
                        best_q     = action_list[i];
                        best_index = i;
                    }
                }
            }
            if (!is_first_turn && qLearn_update_value)
            {
                //qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q;
                qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] = reward + qLearn_gamma * best_q + qLearnV2Dict[qLearn_prev_state][qLearn_prev_action_index] / 2.0f;
                reward = 0.0f;
            }
            is_first_turn = false;

            // see if we should do something random or do the real thing
            if (Random.Range(0.0f, 1.0f) < qLearn_random_change)
            {
                // do a random action
                best_index = Random.Range(0, 3);
            }

            if (best_index == 0)
            {
                // turn left
                turn_left();
            }
            else if (best_index == 1)
            {
                // straight
            }
            else
            {
                // turn right
                turn_right();
            }

            qLearn_prev_action_index = best_index;
            qLearn_prev_state        = cur_state;
        }
        else if (controll_tye == ControllType.MediumAI)
        {
            State4 m_state = getState4();

            /*
             * if(m_state.angle_rounded < -2 && m_state.is_left_valid)
             * {
             *  turn_left();
             * } else if(m_state.angle_rounded > 2 && m_state.is_right_valid)
             * {
             *  turn_right();
             * } else if(m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid)
             * {
             *  // go forward
             * }
             * else
             * {
             *  simple_AI_controlls();
             * }
             */
            if (m_state.angle_rounded < -2 && m_state.is_right_valid)
            {
                turn_right();
            }
            else if (m_state.angle_rounded > 2 && m_state.is_left_valid)
            {
                turn_left();
            }
            else if (m_state.angle_rounded < 2 && m_state.angle_rounded > -2 && m_state.is_fwd_valid)
            {
                // go forward
            }
            else
            {
                simple_AI_controlls();
            }
        }


        snake_old_pos = snake_pos;
        //snake_pos += snake_dir;
        snake_pos += directions[snake_dir_index];

        check_snake_pos();

        // update board
        map[(int)snake_pos.x, (int)snake_pos.y] = 'h';

        // update tail
        map[(int)tailPos[tailPos.Count - 1].x, (int)tailPos[tailPos.Count - 1].y] = 'e';
        for (int i = tailPos.Count - 1; i >= 1; --i)
        {
            tailPos[i] = tailPos[i - 1];
            tailObjs[i].transform.position            = tailObjs[i - 1].transform.position;
            map[(int)tailPos[i].x, (int)tailPos[i].y] = 't';
        }

        tailPos[0] = snake_old_pos;
        tailObjs[0].transform.position            = snake_old_pos;
        map[(int)tailPos[0].x, (int)tailPos[0].y] = 't';

        if (addTailPiece)
        {
            tailPos.Add(oldAppleLoc);

            GameObject tail = Instantiate(tailObj, oldAppleLoc, Quaternion.identity);
            tailObjs.Add(tail);

            map[(int)oldAppleLoc.x, (int)oldAppleLoc.y] = 't';
        }



        controller.transform.position = snake_pos;
        num_moves_this_game++;

        // debug: print all indexes with h
        //int count = 0;
        //for (int j = 0; j < map_h; ++j)
        //{
        //    for (int i = 0; i < map_w; ++i)
        //    {
        //        if (map[i, j] == 't')
        //        {
        //            print("t at : " + i + " , " + j);
        //            count++;
        //        }
        //    }
        //}
        //print("count: " + count);
    }