Esempio n. 1
0
 public IGMNData(IGMN owner, Vector mean)
 {
     this.owner = owner;
     this.gauss = new Gaussian(mean, getStarterCovariance());
     this.inputGauss = new Gaussian(mean.Part(0, mean.Elements.Length - 1), getInputStarterCovariance());
     Age = 1;
     Accumlator = 1;
 }
Esempio n. 2
0
        public RL_TDlambda()
        {
            InitializeComponent();
            sim = new InvertedPendulumEnvironmentSimulator();

            igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
            igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
            tab1 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI });
            tab2 = new TabularApproximator(new int[] { 200, 200 }, new double[] { -Math.PI, -1.5*Math.PI }, new double[] { Math.PI, 1.5 * Math.PI });
            r = new Random();
            isRunning = true;
            trainingThread = new Thread(new ThreadStart(train));
            trainingThread.Start();
            timer1.Start();
        }
Esempio n. 3
0
        public Form1()
        {
            InitializeComponent();
            r = new Random();

            for (int k = 0; k < maxepochcount; ++k)
            {
                rewardsMin[k] = double.MaxValue;
                rewardsMax[k] = double.MinValue;
                rewardsAvg[k] = 0;
            }

            igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
            igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
            timer1.Start();
        }
Esempio n. 4
0
        public RL_TDlambda_acrobot()
        {
            InitializeComponent();
            sim = new AcrobotEnvironmentSimulator2();

            igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));
            igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));

            tab1 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI });
            tab2 = new TabularApproximator(new int[] { 50, 50, 50, 50 }, new double[] { -Math.PI, -Math.PI, -3 * Math.PI, -3 * Math.PI }, new double[] { Math.PI, Math.PI, 3 * Math.PI, 3 * Math.PI });

            r = new Random();
            isRunning = true;
            trainingThread = new Thread(new ThreadStart(train));
            trainingThread.Start();
            timer1.Start();
        }
Esempio n. 5
0
        public Form1()
        {
            InitializeComponent();
                        
            igmn = new IGMN(new Vector(new double[] {0.005, 0.005, 1 }));
            //IGMNDataTest2();

            ////for ( x < 12.5; x += 0.01)
            ////{                
            ////}


            ////for (double x = 0; x < 12.5; x += 0.1)
            ////{
            ////    Console.Out.WriteLine(igmn.Recall(new Vector(new double[] { x })));
            ////}

            timer1.Start();



        }
Esempio n. 6
0
        private void timer1_Tick(object sender, EventArgs e)
        {

            if (count >= maxepochcount)
            {
                igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 10 }));
                igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 10 }));
                count = 0;
                traincount++;
                if (traincount >= maxtraincount)
                {
                    timer1.Stop();
                    //saveBitmap();
                    using (StreamWriter sw = new StreamWriter("learningMin.txt"))
                    {
                        foreach (double d in rewardsMin)
                        {
                            sw.Write(d.ToString() + "\r\n");
                        }
                    }
                    using (StreamWriter sw = new StreamWriter("learningMax.txt"))
                    {
                        foreach (double d in rewardsMax)
                        {
                            sw.Write(d.ToString() + "\r\n");
                        }
                    }
                    using (StreamWriter sw = new StreamWriter("learningAvg.txt"))
                    {
                        foreach (double d in rewardsAvg)
                        {
                            sw.Write(d.ToString() + "\r\n");
                        }
                    }
                }

            }

            //if (stepcount >= maxstepcount)
            {
                stepcount = 0;
                Console.Out.WriteLine(sumreward);
                rewardsMin[count] = Math.Min(sumreward, rewardsMin[count]);
                rewardsMax[count] = Math.Max(sumreward, rewardsMax[count]);
                rewardsAvg[count] += sumreward / maxtraincount;
                count++;

                sumreward = 0;

                q1 = -Math.PI / 2;//r.NextDouble()*2*Math.PI-Math.PI;
                dq1 = 0;
                q2 = 0;
                dq2 = 0;            
            }

            for (int i = 0; i < 200; ++i)
            {
                //q values
                double actionq1 = igmn1.Recall(new Vector(new double[] { q1, q2, dq1, dq2 }));
                double actionq2 = igmn2.Recall(new Vector(new double[] { q1, q2, dq1, dq2 }));

                int action = 0;

                if (actionq1 > actionq2)
                {
                    action = -1;
                }
                else
                {
                    action = 1;
                }

                //e greedy
                if (r.NextDouble() > 0.9)
                {
                    action = r.Next(2) * 2 - 1;
                }

                if (action < 0) actionq_act = actionq1;
                else actionq_act = actionq2;

                //reward
                double reward = /*Math.Sin(q1) + Math.Sin(q1 + q2) + 100 **/ Math.Round(Math.Pow(Math.Max(0,0.5 * Math.Sin(q1) + 0.5 * Math.Sin(q1 + q2)),30),5);

                sumreward += reward;


                //elozo lepest tanitani 
                double delta_q = (reward + 0.99 * actionq_act); //Math.Max(q1,q2)

                if (M_old < 0) igmn1.Train(new Vector(new double[] { q1_old, q2_old, dq1_old, dq2_old, delta_q }));
                else igmn2.Train(new Vector(new double[] { q1_old, q2_old, dq1_old, dq2_old, delta_q }));

                //leptetni
                M = action * M_abs;

                q1_old = q1;
                q2_old = q2;
                dq1_old = dq1;
                dq2_old = dq2;

                M_old = M;
                actionq_old = actionq_act;

                double d11, d22, d12, d21;
                double h1, h2, fi1, fi2;
                double ddq1;
                double ddq2;

                double c1, c2, c3, c4, c5;

                c1 = m1 * lc1 * lc1 + m2 * l1 * l1 + I1;
                c2 = m2 * lc2 * lc2 + I2;
                c3 = m2 * l1 * lc2;
                c4 = m1 * lc1 + m2 * l1;
                c5 = m2 * lc2;

                d11 = c1 + c2 + 2 * c3 * Math.Cos(q2);
                d21 = d12 = c2 + c3 * Math.Cos(q2);
                d22 = c2;

                h1 = c3 * (-2 * dq1 * dq2 - dq2 * dq2) * Math.Sin(q2);
                h2 = c3 * (dq1 * dq1) * Math.Sin(q2);

                fi1 = c4 * g * Math.Cos(q1) + c5 * g * Math.Cos(q1 + q2);
                fi2 = c5 * g * Math.Cos(q1 + q2);

                double delta = d11 * d22 - d12 * d21;
                ddq1 = (1 / delta) * (d22 * (-h1 - fi1) - d12 * (M - h2 - fi2));
                ddq2 = (1 / delta) * (-d21 * (-h1 - fi1) + d11 * (M - h2 - fi2));

                dq1 *= 0.97;
                dq2 *= 0.97;

                q1 += dq1 * deltaT;
                q2 += dq2 * deltaT;
                dq1 += ddq1 * deltaT;
                dq2 += ddq2 * deltaT;

                if (q1 < -Math.PI) q1 += 2 * Math.PI;
                if (q1 > Math.PI) q1 -= 2 * Math.PI;
                if (q2 < -Math.PI) q2 += 2 * Math.PI;
                if (q2 > Math.PI) q2 -= 2 * Math.PI;

            }

            //stepcount++;
            Invalidate();
        }
Esempio n. 7
0
        void train()
        {
            double[] state;//a, w            
           
            double lambda = 0.9;// 0.9;
            double gamma = 0.99;
            Random r = new Random();

            while (isRunning)
            {
                
                igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));
                igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1, 1, 1 }));
                for (int k = 0; k < 500; ++k)
                {
                    List<TDLambda_data> diffQs = new List<TDLambda_data>();

                    if (r.NextDouble() > 0.01) state = new double[] { (2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 0.8 * Math.PI, (2 * r.NextDouble() - 1) * 0.5 * Math.PI, (2 * r.NextDouble() - 1) * 0.5 * Math.PI };
                    else state = new double[] { -Math.PI, 0, 0, 0 };
                    //state = new double[] { (2 * r.NextDouble() - 1) * Math.PI, (2 * r.NextDouble() - 1) * 0.8 * Math.PI, 0, 0 };
                    //state = new double[] { 0, 0, 0, 0 };

                    double M;
                    double q_act;
                    double sumreward = 0;
                    GetAction(state, out M, out q_act);
                    for (int i = 0; i < 300; ++i)
                    {
                        double[] state_new;
                        double reward;
                        sim.Simulate(state, new double[] { M }, dT, out state_new, out reward);

                        double M_new;
                        double q_act_new;
                        GetAction(state_new, out M_new, out q_act_new);
                        double delta_q = (reward + gamma * q_act_new - q_act);


                        diffQs.Add(new TDLambda_data(state, M, q_act, 0));
                        for (int j = i; j >= 0; --j)
                        {
                            diffQs[j].diff += Math.Pow(lambda * gamma, i - j) * delta_q;
                        }


                        M = M_new;
                        q_act = q_act_new;
                        state = state_new;

                        sumreward += reward;
                    }

                    //Console.Out.WriteLine(sumreward);


                    //td lambda tanitas
                    for (int i = 0; i < diffQs.Count; ++i)
                    {
                        if (diffQs[i].action < 0)
                        {
                            lock (igmn1)
                            {
                                igmn1.Train(new Vector(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3], diffQs[i].diff + diffQs[i].Q }));
                            }
                            lock (tab1)
                            {
                                //tab1.Train(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3] }, diffQs[i].diff + diffQs[i].Q, 0.1);
                            }
                        }
                        else if (diffQs[i].action > 0)
                        {
                            lock (igmn2)
                            {
                                igmn2.Train(new Vector(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3], diffQs[i].diff + diffQs[i].Q }));
                            }
                            lock (tab2)
                            {
                                //tab2.Train(new double[] { diffQs[i].state[0], diffQs[i].state[1], diffQs[i].state[2], diffQs[i].state[3] }, diffQs[i].diff + diffQs[i].Q, 0.1);
                            }
                        }
                    }

                    bm = VisualizeAcrobot();

                    lock (this)
                    {
                    }
                    //Thread.Sleep(1000);
                }
            }

        }
Esempio n. 8
0
        private void timer1_Tick(object sender, EventArgs e)
        {
            if (count >= maxepochcount)
            {
                igmn1 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
                igmn2 = new IGMN(new Vector(new double[] { 1, 1, 1 }));
                count = 0;
                traincount++;
                if (traincount >= maxtraincount)
                {
                    timer1.Stop();
                    //saveBitmap();
                    using (StreamWriter sw = new StreamWriter("learningMin.txt"))
                    {
                        foreach (double d in rewardsMin)
                        {
                            sw.Write(d.ToString() + "\r\n");
                        }
                    }
                    using (StreamWriter sw = new StreamWriter("learningMax.txt"))
                    {
                        foreach (double d in rewardsMax)
                        {
                            sw.Write(d.ToString() + "\r\n");
                        }
                    }
                    using (StreamWriter sw = new StreamWriter("learningAvg.txt"))
                    {
                        foreach (double d in rewardsAvg)
                        {
                            sw.Write(d.ToString() + "\r\n");
                        }
                    }                    
                }
                
            }

            double sumreward = 0;

            a = (2 * r.NextDouble() - 1) * Math.PI;
            w = (2 * r.NextDouble() - 1) * 2 * Math.PI;
            a_old = a;
            w_old = w;

            for (int i = 0; i < 200; ++i)
            {
                //q values
                double q1 = igmn1.Recall(new Vector(new double[] { a, w }));
                double q2 = igmn2.Recall(new Vector(new double[] { a, w }));

                int action = 0;

                if (q1 > q2)
                {
                    action = -1;
                }
                else
                {
                    action = 1;
                }

                //e greedy
                if (r.NextDouble() > 0.7)
                {
                    action = r.Next(2) * 2 - 1;
                }

                if (action < 0) q_act = q1;
                else q_act = q2;

                //reward
                double reward = Math.Cos(a);

                sumreward += reward;


                //elozo lepest tanitani 
                double delta_q = (reward + 0.99 * q_act); //Math.Max(q1,q2)

                if (M_old < 0) igmn1.Train(new Vector(new double[] { a_old, w_old, delta_q }));
                else igmn2.Train(new Vector(new double[] { a_old, w_old, delta_q }));

                //leptetni
                M = action * M_abs;

                a_old = a;
                w_old = w;
                M_old = M;
                q_old = q_act;

                w += (l * m * g * Math.Sin(a) + M - mu * w) / (l * l * m) * dT;
                a += w * dT;

                if (a < -Math.PI) a += 2 * Math.PI;
                if (a > Math.PI) a -= 2 * Math.PI;
            }

            Console.Out.WriteLine(sumreward);            
            rewardsMin[count] = Math.Min(sumreward, rewardsMin[count]);
            rewardsMax[count] = Math.Max(sumreward, rewardsMax[count]);
            rewardsAvg[count] += sumreward / maxtraincount;
            count++;
            //Invalidate();
        }