Example #1
0
    void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();

        states.Add(this.transform.rotation.x * 10);
        states.Add(this.transform.rotation.y * 10);
        states.Add(this.transform.rotation.z * 10);
        states.Add(ball.transform.position.x / 2.5f);
        states.Add(ball.transform.position.z / 2.5f);
        states.Add(ball.GetComponent <Rigidbody>().velocity.x / 4.0f);
        states.Add(ball.GetComponent <Rigidbody>().velocity.z / 4.0f);

        List <double> qs = ann.Predict(states);

        qs1 = new List <double>();
        qs2 = new List <double>();
        qs1.Add(qs[0]);
        qs1.Add(qs[1]);
        qs2.Add(qs[2]);
        qs2.Add(qs[3]);
        List <double> qs1SoftMax = SoftMax(qs1);
        List <double> qs2SoftMax = SoftMax(qs2);
        double        maxQ1      = qs1.Max();
        double        maxQ2      = qs2.Max();

        maxQIndex1 = qs1.ToList().IndexOf(maxQ1);
        maxQIndex2 = qs2.ToList().IndexOf(maxQ2);

        if (Random.Range(1, 100) < exploreRate)
        {
            maxQIndex1 = Random.Range(0, 2);
            maxQIndex2 = Random.Range(0, 2);
        }

        if (maxQIndex1 == 0 && this.transform.rotation.z >= -0.25f)
        {
            transform.Rotate(Vector3.forward, tiltSpeed);// * (float)qs1SoftMax[maxQIndex1]);
        }
        else if (maxQIndex1 == 1 && this.transform.rotation.z <= 0.25f)
        {
            transform.Rotate(Vector3.forward, -tiltSpeed);// * (float)qs1SoftMax[maxQIndex1]);
        }

        if (maxQIndex2 == 0 && this.transform.rotation.x <= 0.25f)
        {
            transform.Rotate(Vector3.right, tiltSpeed);// * (float)qs2SoftMax[maxQIndex2]);
        }
        else if (maxQIndex2 == 1 && this.transform.rotation.x <= 0.25f)
        {
            transform.Rotate(Vector3.right, -tiltSpeed);// * (float)qs2SoftMax[maxQIndex2]);
        }

        float reward = 0.0f;

        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -1.0f; // * (Mathf.Sqrt(Mathf.Pow(ball.GetComponent<Rigidbody>().angularVelocity.z, 2.0f)) / 4.0f);
        }
        else
        {
            reward = 0.1f; // - Mathf.Sqrt(Mathf.Pow(ball.transform.position.x, 2.0f));
        }

        if (maxQIndex1 != -1 && maxQIndex2 != -1)
        {
            Replay3D lastMemory = new Replay3D(states[0],
                                               states[1],
                                               states[2],
                                               states[3],
                                               states[4],
                                               states[5],
                                               states[6],
                                               maxQIndex1,
                                               maxQIndex2,
                                               reward);

            /*if (replayMemory.Count > mCapacity)
             * {
             *  replayMemory.RemoveAt(0);
             * }*/

            replayMemory.Add(lastMemory);
        }

        if (ball.GetComponent <BallState>().dropped || replayMemory.Count > 5000)
        {
            if (exploreRate > minExploreRate)
            {
                exploreRate -= exploreDecay;
            }

            int batchSize = 32;
            if (replayMemory.Count < batchSize)
            {
                batchSize = replayMemory.Count;
            }

            List <int> samples = new List <int>();
            while (samples.Count < batchSize)
            {
                int rand = Random.Range(0, replayMemory.Count);
                if (!samples.Contains(rand))
                {
                    samples.Add(rand);
                }
            }

            List <List <double> > batchInputs  = new List <List <double> >();
            List <List <double> > batchOutputs = new List <List <double> >();

            for (int samp = 0; samp < samples.Count; samp++)
            //for (int i = replayMemory.Count - 1; i >= 0; i--)
            {
                int i = samples[samp];

                double feedback1;
                double feedback2;

                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
                {
                    feedback1 = replayMemory[i].reward;
                    feedback2 = replayMemory[i].reward;
                }
                else
                {
                    List <double> toutputsNext  = ann.Predict(replayMemory[i + 1].states);
                    List <double> toutputs1Next = new List <double>();
                    List <double> toutputs2Next = new List <double>();
                    toutputs1Next.Add(toutputsNext[0]);
                    toutputs1Next.Add(toutputsNext[1]);
                    toutputs2Next.Add(toutputsNext[2]);
                    toutputs2Next.Add(toutputsNext[3]);
                    double maxQ1Next = toutputs1Next.Max();
                    double maxQ2Next = toutputs2Next.Max();
                    feedback1 = (replayMemory[i].reward + (discount * maxQ1Next));
                    feedback2 = (replayMemory[i].reward + (discount * maxQ2Next));
                }

                List <double> toutputsNow = ann.Predict(replayMemory[i].states);
                //List<double> toutputsNowOld = new List<double>(toutputsNow);

                int action1Now = replayMemory[i].action1;
                int action2Now = replayMemory[i].action2;

                // thisQ = thisQ + learnRate * [thisReward + discount * nextQMax - thisQ];
                //toutputsOld[action] = 0.5f * Mathf.Pow((float)(feedback - maxQOld), 2.0f);
                //toutputsNow[actionNow] += feedback - toutputsNow[actionNow];
                //toutputsNow[actionNow] += feedback;
                toutputsNow[action1Now]     = feedback1;
                toutputsNow[action2Now + 2] = feedback2;

                //ann.UpdateWeights(toutputsNowOld, toutputsNow);
                batchInputs.Add(replayMemory[i].states);
                batchOutputs.Add(toutputsNow);
            }
            ann.TrainBatch(batchInputs, batchOutputs);

            if (ball.GetComponent <BallState>().dropped)
            {
                if (timer > maxBalanceTime)
                {
                    maxBalanceTime = timer;
                }

                balanceTimes.Add(timer);
                if (balanceTimes.Count > 100)
                {
                    balanceTimes.RemoveAt(0);
                }

                averageBalanceTime = 0;
                foreach (float balanceTime in balanceTimes)
                {
                    averageBalanceTime += balanceTime;
                }
                averageBalanceTime /= balanceTimes.Count;

                timer = 0;

                failCount++;
            }
            ResetState();
            replayMemory.Clear();
            ann.saveBrain();
        }
    }