void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); states.Add(this.transform.rotation.x * 10); states.Add(this.transform.rotation.y * 10); states.Add(this.transform.rotation.z * 10); states.Add(ball.transform.position.x / 2.5f); states.Add(ball.transform.position.z / 2.5f); states.Add(ball.GetComponent <Rigidbody>().velocity.x / 4.0f); states.Add(ball.GetComponent <Rigidbody>().velocity.z / 4.0f); List <double> qs = ann.Predict(states); qs1 = new List <double>(); qs2 = new List <double>(); qs1.Add(qs[0]); qs1.Add(qs[1]); qs2.Add(qs[2]); qs2.Add(qs[3]); List <double> qs1SoftMax = SoftMax(qs1); List <double> qs2SoftMax = SoftMax(qs2); double maxQ1 = qs1.Max(); double maxQ2 = qs2.Max(); maxQIndex1 = qs1.ToList().IndexOf(maxQ1); maxQIndex2 = qs2.ToList().IndexOf(maxQ2); if (Random.Range(1, 100) < exploreRate) { maxQIndex1 = Random.Range(0, 2); maxQIndex2 = Random.Range(0, 2); } if (maxQIndex1 == 0 && this.transform.rotation.z >= -0.25f) { transform.Rotate(Vector3.forward, tiltSpeed);// * (float)qs1SoftMax[maxQIndex1]); } else if (maxQIndex1 == 1 && this.transform.rotation.z <= 0.25f) { transform.Rotate(Vector3.forward, -tiltSpeed);// * (float)qs1SoftMax[maxQIndex1]); } if (maxQIndex2 == 0 && this.transform.rotation.x <= 0.25f) { transform.Rotate(Vector3.right, tiltSpeed);// * (float)qs2SoftMax[maxQIndex2]); } else if (maxQIndex2 == 1 && this.transform.rotation.x <= 0.25f) { transform.Rotate(Vector3.right, -tiltSpeed);// * (float)qs2SoftMax[maxQIndex2]); } float reward = 0.0f; if (ball.GetComponent <BallState>().dropped) { reward = -1.0f; // * (Mathf.Sqrt(Mathf.Pow(ball.GetComponent<Rigidbody>().angularVelocity.z, 2.0f)) / 4.0f); } else { reward = 0.1f; // - Mathf.Sqrt(Mathf.Pow(ball.transform.position.x, 2.0f)); } if (maxQIndex1 != -1 && maxQIndex2 != -1) { Replay3D lastMemory = new Replay3D(states[0], states[1], states[2], states[3], states[4], states[5], states[6], maxQIndex1, maxQIndex2, reward); /*if (replayMemory.Count > mCapacity) * { * replayMemory.RemoveAt(0); * }*/ replayMemory.Add(lastMemory); } if (ball.GetComponent <BallState>().dropped || replayMemory.Count > 5000) { if (exploreRate > minExploreRate) { exploreRate -= exploreDecay; } int batchSize = 32; if (replayMemory.Count < batchSize) { batchSize = replayMemory.Count; } List <int> samples = new List <int>(); while (samples.Count < batchSize) { int rand = Random.Range(0, replayMemory.Count); if (!samples.Contains(rand)) { samples.Add(rand); } } List <List <double> > batchInputs = new List <List <double> >(); List <List <double> > batchOutputs = new List <List <double> >(); for (int samp = 0; samp < samples.Count; samp++) //for (int i = replayMemory.Count - 1; i >= 0; i--) { int i = samples[samp]; double feedback1; double feedback2; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback1 = replayMemory[i].reward; feedback2 = replayMemory[i].reward; } else { List <double> toutputsNext = ann.Predict(replayMemory[i + 1].states); List <double> toutputs1Next = new List <double>(); List <double> toutputs2Next = new List <double>(); toutputs1Next.Add(toutputsNext[0]); toutputs1Next.Add(toutputsNext[1]); toutputs2Next.Add(toutputsNext[2]); toutputs2Next.Add(toutputsNext[3]); double maxQ1Next = toutputs1Next.Max(); double maxQ2Next = toutputs2Next.Max(); feedback1 = (replayMemory[i].reward + (discount * maxQ1Next)); feedback2 = (replayMemory[i].reward + (discount * maxQ2Next)); } List <double> toutputsNow = ann.Predict(replayMemory[i].states); //List<double> toutputsNowOld = new List<double>(toutputsNow); int action1Now = replayMemory[i].action1; int action2Now = replayMemory[i].action2; // thisQ = thisQ + learnRate * [thisReward + discount * nextQMax - thisQ]; //toutputsOld[action] = 0.5f * Mathf.Pow((float)(feedback - maxQOld), 2.0f); //toutputsNow[actionNow] += feedback - toutputsNow[actionNow]; //toutputsNow[actionNow] += feedback; toutputsNow[action1Now] = feedback1; toutputsNow[action2Now + 2] = feedback2; //ann.UpdateWeights(toutputsNowOld, toutputsNow); batchInputs.Add(replayMemory[i].states); batchOutputs.Add(toutputsNow); } ann.TrainBatch(batchInputs, batchOutputs); if (ball.GetComponent <BallState>().dropped) { if (timer > maxBalanceTime) { maxBalanceTime = timer; } balanceTimes.Add(timer); if (balanceTimes.Count > 100) { balanceTimes.RemoveAt(0); } averageBalanceTime = 0; foreach (float balanceTime in balanceTimes) { averageBalanceTime += balanceTime; } averageBalanceTime /= balanceTimes.Count; timer = 0; failCount++; } ResetState(); replayMemory.Clear(); ann.saveBrain(); } }