Exemple #1
0
    // Fixed update called reliably on timer
    void FixedUpdate()
    {
        // Slow down! *snap* my man
        skippy_ = (skippy_ + 1) % slowDown;
        if (skippy_ != 0)
        {
            return;
        }

        Action action = null;

        for (int i = 0; i < superSpeed; i++)
        {
            if (validationRun_ || !learning || Random.Range(0.0f, 1.0f) > epsilon_)
            {
                // Greedy choice is the on-policy action
                action    = q_.ArgMax(currentState_);
                expected_ = q_.Max(currentState_);
            }
            else
            {
                // Off-policy exploration
                action = q_.ArgRand(currentState_);
            }
            TakeAction(action);
        }
    }
Exemple #2
0
    public void RefreshFloorTexture(TabQ q, State s)
    {
        State temp = new State();

        temp.Set(s);
        for (int z = 0; z < depth; z++)
        {
            temp.z = z;
            for (int x = 0; x < width; x++)
            {
                temp.x = x;
                Color color;
                if (displayExpectedReward)
                {
                    float val = (float)q.Max(temp);
                    if (val > 0 && val < 10)
                    {
                        color = Color.LerpUnclamped(Color.blue, Color.yellow, val / 10);
                    }
                    else if (val >= 10)
                    {
                        color = Color.LerpUnclamped(Color.yellow, Color.white, (val - 10) / 10);
                    }
                    else
                    {
                        color = Color.LerpUnclamped(Color.black, Color.red, -val / 10);
                    }
                }
                else
                {
                    color = Color.black;
                }
                // Apply color
                for (int i = 0; i < 3; i++)
                {
                    for (int j = 0; j < 3; j++)
                    {
                        floorTexture_.SetPixel(x * 3 + j, z * 3 + i, color);
                    }
                }
                // Show Q direction
                if (displayPolicy)
                {
                    Direction d = (Direction)q.ArgMax(temp).iVal;
                    switch (d)
                    {
                    case Direction.NORTH:
                        floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray);
                        floorTexture_.SetPixel(x * 3 + 1, z * 3 + 2, Color.white);
                        break;

                    case Direction.SOUTH:
                        floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray);
                        floorTexture_.SetPixel(x * 3 + 1, z * 3, Color.white);
                        break;

                    case Direction.EAST:
                        floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray);
                        floorTexture_.SetPixel(x * 3 + 2, z * 3 + 1, Color.white);
                        break;

                    case Direction.WEST:
                        floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray);
                        floorTexture_.SetPixel(x * 3, z * 3 + 1, Color.white);
                        break;
                    }
                }
            }
        }
        floorTexture_.Apply();
    }