コード例 #1
0
ファイル: SparseMatrix.cs プロジェクト: hunsteve/RLResearch
 public static SparseMatrix Identity(int n)
 {
     SparseMatrix ret = new SparseMatrix(n);
     for (int i = 0; i < n; ++i)
     {
         ret[i, i] = 1;
     }
     return ret;
 }
コード例 #2
0
ファイル: ESN.cs プロジェクト: hunsteve/RLResearch
        public ESN(int reservoirSize, int inputSize, int outputSize) 
        {
            states = new Vector(reservoirSize);
            innerConnections = GenerateInnerWeights(reservoirSize);
            inputWeights = new DenseMatrix(inputSize,reservoirSize);
            biasWeights = new Vector(reservoirSize);
            Random r = new Random();
            for (int row = 0; row < inputSize; ++row)
            {
                for (int col = 0; col < reservoirSize; ++col)
                {
                    inputWeights[row, col] = (double)r.NextDouble();
                }                
            }

            for (int i = 0; i < reservoirSize; ++i)
            {
                biasWeights.Elements[i] = (double)r.NextDouble();
            }
        }
コード例 #3
0
ファイル: SparseMatrix.cs プロジェクト: hunsteve/RLResearch
        public SparseMatrix(SparseMatrix copy)
        {
            size = copy.size;
            values = new Dictionary<long, double>();
            foreach (long i in copy.values.Keys)
            {
                values.Add(i, copy.values[i]);
            }

            rowElements = new HashSet<int>[size];
            columnElements = new HashSet<int>[size];

            for (int i = 0; i < size; ++i)
            {
                rowElements[i] = new HashSet<int>();
                rowElements[i].UnionWith(copy.rowElements[i]);

                columnElements[i] = new HashSet<int>();
                columnElements[i].UnionWith(copy.columnElements[i]);
            }
        }
コード例 #4
0
        public void Train()
        {
            GenerateRVector();
            LoadMMatrices("mmatrices.dat");

            Vector[] Q = new Vector[CarNavigationQStore.LENACTION];
            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                Q[i] = new Vector(statenum);
            }

            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                for (int j = 0; j < CarNavigationQStore.LENXY; ++j)
                {
                    for (int k = 0; k < CarNavigationQStore.LENXY; ++k)
                    {
                        for (int l = 0; l < CarNavigationQStore.LENANG; ++l)
                        {
                            Q[i].Elements[l * CarNavigationQStore.LENXY * CarNavigationQStore.LENXY + k * CarNavigationQStore.LENXY + j] = qstore.value[i, j, k, l];
                        }                        
                    }
                }
            }

            Vector[] p_ = new Vector[CarNavigationQStore.LENACTION];
            float max_ = float.MinValue;
            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                p_[i] = new Vector(Q[i]);
                for (int j = 0; j < statenum; ++j)
                {
                    if (max_ < p_[i].Elements[j]) max_ = (float)p_[i].Elements[j];
                }
            }

            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {                
                p_[i].Exp(-max_);
            }

            Vector sum = new Vector(statenum);
            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                sum += p_[i];
            }

            SparseMatrix[] Ma_ = new SparseMatrix[CarNavigationQStore.LENACTION];
            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                p_[i].Div(sum);
                Ma_[i] = new SparseMatrix(Ma[i]);
                Ma_[i].Multiply(p_[i]);
            }

            //M matrix kiszamitasa
            SparseMatrix M = new SparseMatrix(statenum);
            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                M.Add(Ma_[i]);                
            }
            M.Multiply(0.9999f);

            SparseMatrix IM = SparseMatrix.Identity(statenum) - M;

            //IM.WriteToFile("IM.txt");

            Vector utility = IM.SolveLinearEquation2(R);

            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                Q[i] = Ma[i].MatrixMultiplyRight(utility);
            }

            Vector QMax = new Vector(statenum);
            for (int j = 0; j < statenum; ++j)
            {
                float max = float.MinValue;
                int best = 0;
                for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
                {
                    if (Q[i].Elements[j] > max)
                    {
                        max = (float)Q[i].Elements[j];
                        best = i;
                    }
                }
                QMax.Elements[j] = best;
            }


            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                for (int j = 0; j < CarNavigationQStore.LENXY; ++j)
                {
                    for (int k = 0; k < CarNavigationQStore.LENXY; ++k)
                    {
                        for (int l = 0; l < CarNavigationQStore.LENANG; ++l)
                        {
                            qstore.value[i, j, k, l] = (float)Q[i].Elements[l * CarNavigationQStore.LENXY * CarNavigationQStore.LENXY + k * CarNavigationQStore.LENXY + j];
                        }
                    }
                }                               
            }

            utility.WriteToFile("u.txt");

            qstore.Save("qstore.dat");

            //Q[0].WriteToFile("q1.txt");
            //Q[1].WriteToFile("q2.txt");
            //Q[2].WriteToFile("q3.txt");
            //Q[3].WriteToFile("q4.txt");
            //Q[4].WriteToFile("q5.txt");
            //Q[5].WriteToFile("q6.txt");
            //Q[6].WriteToFile("q7.txt");
            //Q[7].WriteToFile("q8.txt");
            //Q[8].WriteToFile("q9.txt");
            //Q[9].WriteToFile("q10.txt");
            //Q[10].WriteToFile("q11.txt");            
        }
コード例 #5
0
        public void GenerateMMatrices()
        {
            CarNavigationEnvironment testws = new CarNavigationEnvironment();          

            Ma = new SparseMatrix[CarNavigationQStore.LENACTION];
            for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
            {
                Ma[i] = new SparseMatrix(statenum);
            }

            for (int ii = 0; ii < statenum; ++ii)
            {                
                int l = ii / (CarNavigationQStore.LENXY * CarNavigationQStore.LENXY);
                int iil = ii - l * (CarNavigationQStore.LENXY * CarNavigationQStore.LENXY);
                int k = iil / CarNavigationQStore.LENXY;
                int j = iil % CarNavigationQStore.LENXY;               

                CarNavigationState state;
                CarNavigationQStore.GetState(j, k, l, out state);

                for (int i = 0; i < CarNavigationQStore.LENACTION; ++i)
                {
                    CarNavigationAction action;
                    CarNavigationQStore.GetAction(i, out action);                    
                    for (int i1 = 0; i1 < 100; ++i1)
                    {
                        testws.x = state.x + r.NextDouble() * (CarNavigationQStore.MAXXY - CarNavigationQStore.MINXY) / CarNavigationQStore.LENXY;
                        testws.y = state.y + r.NextDouble() * (CarNavigationQStore.MAXXY - CarNavigationQStore.MINXY) / CarNavigationQStore.LENXY;
                        testws.alpha = state.alpha + r.NextDouble() * (2 * Math.PI) / CarNavigationQStore.LENANG;

                        testws.Step(action.ang);

                        int j2, k2, l2;
                        CarNavigationState state2 = new CarNavigationState(testws.x, testws.y, testws.alpha);
                        CarNavigationQStore.GetStateIndices(state2, out j2, out k2, out l2);

                        int ii2 = l2 * CarNavigationQStore.LENXY * CarNavigationQStore.LENXY + k2 * CarNavigationQStore.LENXY + j2;
                        if (ii2 > statenum - 1) throw new Exception();
                        Ma[i][ii, ii2] += 0.01f;                        
                    }
                }               
            }
            SaveMMatrices("mmatrices.dat");
        }
コード例 #6
0
ファイル: SparseMatrix.cs プロジェクト: hunsteve/RLResearch
        public static SparseMatrix Load(BinaryReader br)
        {
            SparseMatrix ret = null;
            
            int size = br.ReadInt32();
            ret = new SparseMatrix(size);

            int count = br.ReadInt32();
            for(int i=0; i<count; ++i)
            {
                long index = br.ReadInt64();
                double value = br.ReadSingle();
                if (index < size * size && index >= 0)
                {                                     
                    ret[index] = value;
                }
            }
            
            return ret;
        }
コード例 #7
0
ファイル: SparseMatrix.cs プロジェクト: hunsteve/RLResearch
 public static SparseMatrix operator -(SparseMatrix a, SparseMatrix b)
 {
     if (a.size != b.size)
     {
         return null;
     }
     SparseMatrix ret = new SparseMatrix(a);
     ret.Add(b, -1);
     return ret;
 }
コード例 #8
0
ファイル: SparseMatrix.cs プロジェクト: hunsteve/RLResearch
 public void Add(SparseMatrix a)
 {
     Add(a, 1);
 }
コード例 #9
0
ファイル: SparseMatrix.cs プロジェクト: hunsteve/RLResearch
        public void Add(SparseMatrix a, double factor) {
            if (a.size != size)
            {
                return;
            }

            foreach (long i in a.values.Keys)
            {                
                ChangeValue(i, this[i] + factor * a[i]);
            }
        }
コード例 #10
0
        public void Train()
        {
            Vector[] Q = new Vector[3];
            for (int i = 0; i < 3; ++i)
            {
                Q[i] = new Vector(statenum);
            }

            for (int i = 0; i < 3; ++i)
            {
                for (int j = 0; j < InvertedPendulumQStore.LEN; ++j)
                {
                    for (int k = 0; k < InvertedPendulumQStore.LEN; ++k)
                    {
                        Q[i].Elements[k * InvertedPendulumQStore.LEN + j] = qstore.value[i, j, k];
                    }
                }
            }
                 
            Vector[] p_ = new Vector[3];
            float max_ = float.MinValue;
            for (int i = 0; i < 3; ++i)
            {
                p_[i] = new Vector(Q[i]);
                for (int j = 0; j < statenum; ++j)
                {
                    if (max_ < p_[i].Elements[j]) max_ = (float)p_[i].Elements[j];
                }
            }

            for (int i = 0; i < 3; ++i)
            {                
                p_[i].Exp(-max_);
            }
            Vector sum = p_[0] + p_[1] + p_[2];
            SparseMatrix[] Ma_ = new SparseMatrix[3];
            for (int i = 0; i < 3; ++i)
            {
                p_[i].Div(sum);
                Ma_[i] = new SparseMatrix(Ma[i]);
                Ma_[i].Multiply(p_[i]);
            }

            //M matrix kiszamitasa
            SparseMatrix M = (Ma_[0] + Ma_[1] + Ma_[2]);
            M.Multiply(0.99f);

            SparseMatrix IM = SparseMatrix.Identity(statenum) - M;
            Vector utility = IM.SolveLinearEquation2(R);

            for (int i = 0; i < 3; ++i)
            {
                Q[i] = Ma[i].MatrixMultiplyRight(utility);
            }

            Vector QMax = new Vector(statenum);
            for (int i = 0; i < statenum; ++i)
            {
                float max = float.MinValue;
                int best = 0;
                for (int j = 0; j < 3; ++j)
                {
                    if (Q[j].Elements[i] > max)
                    {
                        max = (float)Q[j].Elements[i];
                        best = j;
                    }
                }
                QMax.Elements[i] = best;
            }

               
            for (int i = 0; i < 3; ++i)
            {
                for (int j = 0; j < InvertedPendulumQStore.LEN; ++j)
                {
                    for (int k = 0; k < InvertedPendulumQStore.LEN; ++k)
                    {
                        qstore.value[i, j, k] = (float)Q[i].Elements[k * InvertedPendulumQStore.LEN + j];
                    }
                }                               
            }

            utility.WriteToFile("u.txt");

            Q[0].WriteToFile("q1.txt");
            Q[1].WriteToFile("q2.txt");
            Q[2].WriteToFile("q3.txt");            
        }
コード例 #11
0
        public void GenerateMMatrices()
        {
            InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment();
            int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN;
            
            Ma = new SparseMatrix[3];
            for (int i = 0; i < 3; ++i)
            {
                Ma[i] = new SparseMatrix(statenum);
            }

            for (int ii = 0; ii < statenum; ++ii)
            {
                int j = ii % InvertedPendulumQStore.LEN;
                int k = ii / InvertedPendulumQStore.LEN;

                InvertedPendulumState state;
                InvertedPendulumQStore.GetState(j, k, out state);

                for (int i = 0; i < 3; ++i)
                {
                    int action = i - 1;
                    for (int i1 = 0; i1 < 1000; ++i1)
                    {
                        testws.a = state.a + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * Math.PI;
                        testws.w = state.w + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * 3;

                        testws.Step(action);

                        int j2, k2;
                        InvertedPendulumState state2 = new InvertedPendulumState(testws.a, testws.w);
                        InvertedPendulumQStore.GetStateIndices(state2, out j2, out k2);

                        int ii2 = k2 * InvertedPendulumQStore.LEN + j2;

                        Ma[i][ii, ii2] += 0.001f;
                    }
                }               
            }
            SaveMMatrices("mmatrices.dat");
        }
コード例 #12
0
        public void Train()
        {
            InvertedPendulumEnvironment testenv = new InvertedPendulumEnvironment();
            int mincount = 100;


            states = new List<InvertedPendulumState>();            
            for(int i=0; i<mincount; ++i) {
                testenv.Reset();
                InvertedPendulumState newstate = (InvertedPendulumState)testenv.State();                
                states.Add(newstate);
            }            
            
            int statenum = states.Count;

           


            //allapot-atmenet valsegek es varhato rewardok szamitasa
            Vector R = new Vector(statenum);

            SparseMatrix[] Ma = new SparseMatrix[3];
            for (int i = 0; i < 3; ++i)
            {
                Ma[i] = new SparseMatrix(statenum);
            }

            Vector counts = new Vector(statenum);            

            for (int ii = 0; ii < statenum*10000; ++ii)
            {
                testenv.Reset();
                InvertedPendulumState curstate = (InvertedPendulumState)testenv.State();
                int from = GetClosestStateIndex(states, curstate);

                R.Elements[from] += testenv.Reward();

                for (int i = 0; i < 3; ++i)
                {
                    int action = i - 1;
                    testenv.SetState(curstate);
                    testenv.Step(action);
                    InvertedPendulumState tostate = (InvertedPendulumState)testenv.State();
                    int to = GetClosestStateIndex(states, tostate);

                    Ma[i][from, to] += 1;                    
                }

                float temp = (float)counts.Elements[from];
                if (temp != 0) temp = 1 / (1 / temp + 1);
                else temp = 1;
                counts.Elements[from] = temp;
            }

            for (int i = 0; i < 3; ++i)
            {
                Ma[i].Multiply(counts);
            }

            R.Multiply(counts);


            SparseMatrix M = (Ma[0] + Ma[1] + Ma[2]);
            M.Multiply(0.333333f);
            M.Multiply(0.99f);

            SparseMatrix IM = SparseMatrix.Identity(statenum) - M;
            Vector utility = IM.SolveLinearEquation2(R);

            for (int i = 0; i < 3; ++i)
            {
                Q[i] = Ma[i].MatrixMultiplyRight(utility);
            }
        }