public IGMNData(IGMN owner, Vector mean) { this.owner = owner; this.gauss = new Gaussian(mean, getStarterCovariance()); this.inputGauss = new Gaussian(mean.Part(0, mean.Elements.Length - 1), getInputStarterCovariance()); Age = 1; Accumlator = 1; }
public RL_TDlambda() { InitializeComponent(); sim = new InvertedPendulumEnvironmentSimulator(); igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 })); tab1 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI }); tab2 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI }); r = new Random(); isRunning = true; trainingThread = new Thread(new ThreadStart(train)); trainingThread.Start(); timer1.Start(); }
public Form1() { InitializeComponent(); r = new Random(); for (int k = 0; k < maxepochcount; ++k) { rewardsMin[k] = double.MaxValue; rewardsMax[k] = double.MinValue; rewardsAvg[k] = 0; } igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 })); timer1.Start(); }
public RL_TDlambda_acrobot() { InitializeComponent(); sim = new AcrobotEnvironmentSimulator2(); igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); tab1 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI }); tab2 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI }); r = new Random(); isRunning = true; trainingThread = new Thread(new ThreadStart(train)); trainingThread.Start(); timer1.Start(); }
public Form1() { InitializeComponent(); igmn = new IGMN(new Vector(new double[] {0.005, 0.005, 1 })); //IGMNDataTest2(); ////for ( x < 12.5; x += 0.01) ////{ ////} ////for (double x = 0; x < 12.5; x += 0.1) ////{ //// Console.Out.WriteLine(igmn.Recall(new Vector(new double[] { x }))); ////} timer1.Start(); }
private void timer1_Tick(object sender, EventArgs e) { if (count >= maxepochcount) { igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 10 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 10 })); count = 0; traincount++; if (traincount >= maxtraincount) { timer1.Stop(); //saveBitmap(); using (StreamWriter sw = new StreamWriter("learningMin.txt")) { foreach (double d in rewardsMin) { sw.Write(d.ToString() + "\r\n"); } } using (StreamWriter sw = new StreamWriter("learningMax.txt")) { foreach (double d in rewardsMax) { sw.Write(d.ToString() + "\r\n"); } } using (StreamWriter sw = new StreamWriter("learningAvg.txt")) { foreach (double d in rewardsAvg) { sw.Write(d.ToString() + "\r\n"); } } } } //if (stepcount >= maxstepcount) { stepcount = 0; Console.Out.WriteLine(sumreward); rewardsMin[count] = Math.Min(sumreward, rewardsMin[count]); rewardsMax[count] = Math.Max(sumreward, rewardsMax[count]); rewardsAvg[count] += sumreward / maxtraincount; count++; sumreward = 0; q1 = -Math.PI / 2;//r.NextDouble()*2*Math.PI-Math.PI; dq1 = 0; q2 = 0; dq2 = 0; } for (int i = 0; i < 200; ++i) { //q values double actionq1 = igmn1.Recall(new Vector(new double[] { q1, q2, dq1, dq2 })); double actionq2 = igmn2.Recall(new Vector(new double[] { q1, q2, dq1, dq2 })); int action = 0; if (actionq1 > actionq2) { action = -1; } else { action = 1; } //e greedy if (r.NextDouble() > 0.9) { action = r.Next(2) * 2 - 1; } if (action < 0) actionq_act = actionq1; else actionq_act = actionq2; //reward double reward = /*Math.Sin(q1) + Math.Sin(q1 + q2) + 100 **/ Math.Round(Math.Pow(Math.Max(0,0.5 * Math.Sin(q1) + 0.5 * Math.Sin(q1 + q2)),30),5); sumreward += reward; //elozo lepest tanitani double delta_q = (reward + 0.99 * actionq_act); //Math.Max(q1,q2) if (M_old < 0) igmn1.Train(new Vector(new double[] { q1_old, q2_old, dq1_old, dq2_old, delta_q })); else igmn2.Train(new Vector(new double[] { q1_old, q2_old, dq1_old, dq2_old, delta_q })); //leptetni M = action * M_abs; q1_old = q1; q2_old = q2; dq1_old = dq1; dq2_old = dq2; M_old = M; actionq_old = actionq_act; double d11, d22, d12, d21; double h1, h2, fi1, fi2; double ddq1; double ddq2; double c1, c2, c3, c4, c5; c1 = m1 * lc1 * lc1 + m2 * l1 * l1 + I1; c2 = m2 * lc2 * lc2 + I2; c3 = m2 * l1 * lc2; c4 = m1 * lc1 + m2 * l1; c5 = m2 * lc2; d11 = c1 + c2 + 2 * c3 * Math.Cos(q2); d21 = d12 = c2 + c3 * Math.Cos(q2); d22 = c2; h1 = c3 * (-2 * dq1 * dq2 - dq2 * dq2) * Math.Sin(q2); h2 = c3 * (dq1 * dq1) * Math.Sin(q2); fi1 = c4 * g * Math.Cos(q1) + c5 * g * Math.Cos(q1 + q2); fi2 = c5 * g * Math.Cos(q1 + q2); double delta = d11 * d22 - d12 * d21; ddq1 = (1 / delta) * (d22 * (-h1 - fi1) - d12 * (M - h2 - fi2)); ddq2 = (1 / delta) * (-d21 * (-h1 - fi1) + d11 * (M - h2 - fi2)); dq1 *= 0.97; dq2 *= 0.97; q1 += dq1 * deltaT; q2 += dq2 * deltaT; dq1 += ddq1 * deltaT; dq2 += ddq2 * deltaT; if (q1 < -Math.PI) q1 += 2 * Math.PI; if (q1 > Math.PI) q1 -= 2 * Math.PI; if (q2 < -Math.PI) q2 += 2 * Math.PI; if (q2 > Math.PI) q2 -= 2 * Math.PI; } //stepcount++; Invalidate(); }
void train() { double[] state;//a, w double lambda = 0.9;// 0.9; double gamma = 0.99; Random r = new Random(); while (isRunning) { igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 })); for (int k = 0; k < 500; ++k) { List<TDLambda_data> diffQs = new List<TDLambda_data>(); if (r.NextDouble() > 0.01) state = new double[] { (2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 0.8 * Math.PI, (2 * r.NextDouble() - 1) * 0.5 * Math.PI, (2 * r.NextDouble() - 1) * 0.5 * Math.PI }; else state = new double[] { -Math.PI, 0, 0, 0 }; //state = new double[] { (2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 0.8 * Math.PI, 0, 0 }; //state = new double[] { 0, 0, 0, 0 }; double M; double q_act; double sumreward = 0; GetAction(state, out M, out q_act); for (int i = 0; i < 300; ++i) { double[] state_new; double reward; sim.Simulate(state, new double[] { M }, dT, out state_new, out reward); double M_new; double q_act_new; GetAction(state_new, out M_new, out q_act_new); double delta_q = (reward + gamma * q_act_new - q_act); diffQs.Add(new TDLambda_data(state, M, q_act, 0)); for (int j = i; j >= 0; --j) { diffQs[j].diff += Math.Pow(lambda * gamma, i - j) * delta_q; } M = M_new; q_act = q_act_new; state = state_new; sumreward += reward; } //Console.Out.WriteLine(sumreward); //td lambda tanitas for (int i = 0; i < diffQs.Count; ++i) { if (diffQs[i].action < 0) { lock (igmn1) { igmn1.Train(new Vector(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3], diffQs[i].diff + diffQs[i].Q })); } lock (tab1) { //tab1.Train(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3] }, diffQs[i].diff + diffQs[i].Q, 0.1); } } else if (diffQs[i].action > 0) { lock (igmn2) { igmn2.Train(new Vector(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3], diffQs[i].diff + diffQs[i].Q })); } lock (tab2) { //tab2.Train(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3] }, diffQs[i].diff + diffQs[i].Q, 0.1); } } } bm = VisualizeAcrobot(); lock (this) { } //Thread.Sleep(1000); } } }
private void timer1_Tick(object sender, EventArgs e) { if (count >= maxepochcount) { igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 })); igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 })); count = 0; traincount++; if (traincount >= maxtraincount) { timer1.Stop(); //saveBitmap(); using (StreamWriter sw = new StreamWriter("learningMin.txt")) { foreach (double d in rewardsMin) { sw.Write(d.ToString() + "\r\n"); } } using (StreamWriter sw = new StreamWriter("learningMax.txt")) { foreach (double d in rewardsMax) { sw.Write(d.ToString() + "\r\n"); } } using (StreamWriter sw = new StreamWriter("learningAvg.txt")) { foreach (double d in rewardsAvg) { sw.Write(d.ToString() + "\r\n"); } } } } double sumreward = 0; a = (2 * r.NextDouble() - 1) * Math.PI; w = (2 * r.NextDouble() - 1) * 2 * Math.PI; a_old = a; w_old = w; for (int i = 0; i < 200; ++i) { //q values double q1 = igmn1.Recall(new Vector(new double[] { a, w })); double q2 = igmn2.Recall(new Vector(new double[] { a, w })); int action = 0; if (q1 > q2) { action = -1; } else { action = 1; } //e greedy if (r.NextDouble() > 0.7) { action = r.Next(2) * 2 - 1; } if (action < 0) q_act = q1; else q_act = q2; //reward double reward = Math.Cos(a); sumreward += reward; //elozo lepest tanitani double delta_q = (reward + 0.99 * q_act); //Math.Max(q1,q2) if (M_old < 0) igmn1.Train(new Vector(new double[] { a_old, w_old, delta_q })); else igmn2.Train(new Vector(new double[] { a_old, w_old, delta_q })); //leptetni M = action * M_abs; a_old = a; w_old = w; M_old = M; q_old = q_act; w += (l * m * g * Math.Sin(a) + M - mu * w) / (l * l * m) * dT; a += w * dT; if (a < -Math.PI) a += 2 * Math.PI; if (a > Math.PI) a -= 2 * Math.PI; } Console.Out.WriteLine(sumreward); rewardsMin[count] = Math.Min(sumreward, rewardsMin[count]); rewardsMax[count] = Math.Max(sumreward, rewardsMax[count]); rewardsAvg[count] += sumreward / maxtraincount; count++; //Invalidate(); }