void train() { double[] state;//a, w double lambda = 0.9;// 0.9; double gamma = 0.99; Random r = new Random(); while (isRunning) { igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); for (int k = 0; k < 500; ++k) { List<TDLambda_data> diffQs = new List<TDLambda_data>(); if (r.NextDouble() > 0.01) state = new double[] { (2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 0.8 * Math.PI, (2 * r.NextDouble() - 1) * 0.5 * Math.PI, (2 * r.NextDouble() - 1) * 0.5 * Math.PI }; else state = new double[] { -Math.PI, 0, 0, 0 }; //state = new double[] { (2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 0.8 * Math.PI, 0, 0 }; //state = new double[] { 0, 0, 0, 0 }; double M; double q_act; double sumreward = 0; GetAction(state, out M, out q_act); for (int i = 0; i < 300; ++i) { double[] state_new; double reward; sim.Simulate(state, new double[] { M }, dT, out state_new, out reward); double M_new; double q_act_new; GetAction(state_new, out M_new, out q_act_new); double delta_q = (reward + gamma * q_act_new - q_act); diffQs.Add(new TDLambda_data(state, M, q_act, 0)); for (int j = i; j >= 0; --j) { diffQs[j].diff += Math.Pow(lambda * gamma, i - j) * delta_q; } M = M_new; q_act = q_act_new; state = state_new; sumreward += reward; } //Console.Out.WriteLine(sumreward); //td lambda tanitas for (int i = 0; i < diffQs.Count; ++i) { if (diffQs[i].action < 0) { lock (igmn1) { igmn1.Train(new Vector(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3], diffQs[i].diff + diffQs[i].Q })); } lock (tab1) { //tab1.Train(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3] }, diffQs[i].diff + diffQs[i].Q, 0.1); } } else if (diffQs[i].action > 0) { lock (igmn2) { igmn2.Train(new Vector(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3], diffQs[i].diff + diffQs[i].Q })); } lock (tab2) { //tab2.Train(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3] }, diffQs[i].diff + diffQs[i].Q, 0.1); } } } bm = VisualizeAcrobot(); lock (this) { } //Thread.Sleep(1000); } } }