public static SparseMatrix Identity(int n) { SparseMatrix ret = new SparseMatrix(n); for (int i = 0; i < n; ++i) { ret[i, i] = 1; } return ret; }
public ESN(int reservoirSize, int inputSize, int outputSize) { states = new Vector(reservoirSize); innerConnections = GenerateInnerWeights(reservoirSize); inputWeights = new DenseMatrix(inputSize,reservoirSize); biasWeights = new Vector(reservoirSize); Random r = new Random(); for (int row = 0; row < inputSize; ++row) { for (int col = 0; col < reservoirSize; ++col) { inputWeights[row, col] = (double)r.NextDouble(); } } for (int i = 0; i < reservoirSize; ++i) { biasWeights.Elements[i] = (double)r.NextDouble(); } }
public SparseMatrix(SparseMatrix copy) { size = copy.size; values = new Dictionary<long, double>(); foreach (long i in copy.values.Keys) { values.Add(i, copy.values[i]); } rowElements = new HashSet<int>[size]; columnElements = new HashSet<int>[size]; for (int i = 0; i < size; ++i) { rowElements[i] = new HashSet<int>(); rowElements[i].UnionWith(copy.rowElements[i]); columnElements[i] = new HashSet<int>(); columnElements[i].UnionWith(copy.columnElements[i]); } }
public void Train() { GenerateRVector(); LoadMMatrices("mmatrices.dat"); Vector[] Q = new Vector[CarNavigationQStore.LENACTION]; for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { Q[i] = new Vector(statenum); } for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { for (int j = 0; j < CarNavigationQStore.LENXY; ++j) { for (int k = 0; k < CarNavigationQStore.LENXY; ++k) { for (int l = 0; l < CarNavigationQStore.LENANG; ++l) { Q[i].Elements[l * CarNavigationQStore.LENXY * CarNavigationQStore.LENXY + k * CarNavigationQStore.LENXY + j] = qstore.value[i, j, k, l]; } } } } Vector[] p_ = new Vector[CarNavigationQStore.LENACTION]; float max_ = float.MinValue; for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { p_[i] = new Vector(Q[i]); for (int j = 0; j < statenum; ++j) { if (max_ < p_[i].Elements[j]) max_ = (float)p_[i].Elements[j]; } } for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { p_[i].Exp(-max_); } Vector sum = new Vector(statenum); for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { sum += p_[i]; } SparseMatrix[] Ma_ = new SparseMatrix[CarNavigationQStore.LENACTION]; for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { p_[i].Div(sum); Ma_[i] = new SparseMatrix(Ma[i]); Ma_[i].Multiply(p_[i]); } //M matrix kiszamitasa SparseMatrix M = new SparseMatrix(statenum); for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { M.Add(Ma_[i]); } M.Multiply(0.9999f); SparseMatrix IM = SparseMatrix.Identity(statenum) - M; //IM.WriteToFile("IM.txt"); Vector utility = IM.SolveLinearEquation2(R); for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { Q[i] = Ma[i].MatrixMultiplyRight(utility); } Vector QMax = new Vector(statenum); for (int j = 0; j < statenum; ++j) { float max = float.MinValue; int best = 0; for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { if (Q[i].Elements[j] > max) { max = (float)Q[i].Elements[j]; best = i; } } QMax.Elements[j] = best; } for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { for (int j = 0; j < CarNavigationQStore.LENXY; ++j) { for (int k = 0; k < CarNavigationQStore.LENXY; ++k) { for (int l = 0; l < CarNavigationQStore.LENANG; ++l) { qstore.value[i, j, k, l] = (float)Q[i].Elements[l * CarNavigationQStore.LENXY * CarNavigationQStore.LENXY + k * CarNavigationQStore.LENXY + j]; } } } } utility.WriteToFile("u.txt"); qstore.Save("qstore.dat"); //Q[0].WriteToFile("q1.txt"); //Q[1].WriteToFile("q2.txt"); //Q[2].WriteToFile("q3.txt"); //Q[3].WriteToFile("q4.txt"); //Q[4].WriteToFile("q5.txt"); //Q[5].WriteToFile("q6.txt"); //Q[6].WriteToFile("q7.txt"); //Q[7].WriteToFile("q8.txt"); //Q[8].WriteToFile("q9.txt"); //Q[9].WriteToFile("q10.txt"); //Q[10].WriteToFile("q11.txt"); }
public void GenerateMMatrices() { CarNavigationEnvironment testws = new CarNavigationEnvironment(); Ma = new SparseMatrix[CarNavigationQStore.LENACTION]; for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { Ma[i] = new SparseMatrix(statenum); } for (int ii = 0; ii < statenum; ++ii) { int l = ii / (CarNavigationQStore.LENXY * CarNavigationQStore.LENXY); int iil = ii - l * (CarNavigationQStore.LENXY * CarNavigationQStore.LENXY); int k = iil / CarNavigationQStore.LENXY; int j = iil % CarNavigationQStore.LENXY; CarNavigationState state; CarNavigationQStore.GetState(j, k, l, out state); for (int i = 0; i < CarNavigationQStore.LENACTION; ++i) { CarNavigationAction action; CarNavigationQStore.GetAction(i, out action); for (int i1 = 0; i1 < 100; ++i1) { testws.x = state.x + r.NextDouble() * (CarNavigationQStore.MAXXY - CarNavigationQStore.MINXY) / CarNavigationQStore.LENXY; testws.y = state.y + r.NextDouble() * (CarNavigationQStore.MAXXY - CarNavigationQStore.MINXY) / CarNavigationQStore.LENXY; testws.alpha = state.alpha + r.NextDouble() * (2 * Math.PI) / CarNavigationQStore.LENANG; testws.Step(action.ang); int j2, k2, l2; CarNavigationState state2 = new CarNavigationState(testws.x, testws.y, testws.alpha); CarNavigationQStore.GetStateIndices(state2, out j2, out k2, out l2); int ii2 = l2 * CarNavigationQStore.LENXY * CarNavigationQStore.LENXY + k2 * CarNavigationQStore.LENXY + j2; if (ii2 > statenum - 1) throw new Exception(); Ma[i][ii, ii2] += 0.01f; } } } SaveMMatrices("mmatrices.dat"); }
public static SparseMatrix Load(BinaryReader br) { SparseMatrix ret = null; int size = br.ReadInt32(); ret = new SparseMatrix(size); int count = br.ReadInt32(); for(int i=0; i<count; ++i) { long index = br.ReadInt64(); double value = br.ReadSingle(); if (index < size * size && index >= 0) { ret[index] = value; } } return ret; }
public static SparseMatrix operator -(SparseMatrix a, SparseMatrix b) { if (a.size != b.size) { return null; } SparseMatrix ret = new SparseMatrix(a); ret.Add(b, -1); return ret; }
public void Add(SparseMatrix a) { Add(a, 1); }
public void Add(SparseMatrix a, double factor) { if (a.size != size) { return; } foreach (long i in a.values.Keys) { ChangeValue(i, this[i] + factor * a[i]); } }
public void Train() { Vector[] Q = new Vector[3]; for (int i = 0; i < 3; ++i) { Q[i] = new Vector(statenum); } for (int i = 0; i < 3; ++i) { for (int j = 0; j < InvertedPendulumQStore.LEN; ++j) { for (int k = 0; k < InvertedPendulumQStore.LEN; ++k) { Q[i].Elements[k * InvertedPendulumQStore.LEN + j] = qstore.value[i, j, k]; } } } Vector[] p_ = new Vector[3]; float max_ = float.MinValue; for (int i = 0; i < 3; ++i) { p_[i] = new Vector(Q[i]); for (int j = 0; j < statenum; ++j) { if (max_ < p_[i].Elements[j]) max_ = (float)p_[i].Elements[j]; } } for (int i = 0; i < 3; ++i) { p_[i].Exp(-max_); } Vector sum = p_[0] + p_[1] + p_[2]; SparseMatrix[] Ma_ = new SparseMatrix[3]; for (int i = 0; i < 3; ++i) { p_[i].Div(sum); Ma_[i] = new SparseMatrix(Ma[i]); Ma_[i].Multiply(p_[i]); } //M matrix kiszamitasa SparseMatrix M = (Ma_[0] + Ma_[1] + Ma_[2]); M.Multiply(0.99f); SparseMatrix IM = SparseMatrix.Identity(statenum) - M; Vector utility = IM.SolveLinearEquation2(R); for (int i = 0; i < 3; ++i) { Q[i] = Ma[i].MatrixMultiplyRight(utility); } Vector QMax = new Vector(statenum); for (int i = 0; i < statenum; ++i) { float max = float.MinValue; int best = 0; for (int j = 0; j < 3; ++j) { if (Q[j].Elements[i] > max) { max = (float)Q[j].Elements[i]; best = j; } } QMax.Elements[i] = best; } for (int i = 0; i < 3; ++i) { for (int j = 0; j < InvertedPendulumQStore.LEN; ++j) { for (int k = 0; k < InvertedPendulumQStore.LEN; ++k) { qstore.value[i, j, k] = (float)Q[i].Elements[k * InvertedPendulumQStore.LEN + j]; } } } utility.WriteToFile("u.txt"); Q[0].WriteToFile("q1.txt"); Q[1].WriteToFile("q2.txt"); Q[2].WriteToFile("q3.txt"); }
public void GenerateMMatrices() { InvertedPendulumEnvironment testws = new InvertedPendulumEnvironment(); int statenum = InvertedPendulumQStore.LEN * InvertedPendulumQStore.LEN; Ma = new SparseMatrix[3]; for (int i = 0; i < 3; ++i) { Ma[i] = new SparseMatrix(statenum); } for (int ii = 0; ii < statenum; ++ii) { int j = ii % InvertedPendulumQStore.LEN; int k = ii / InvertedPendulumQStore.LEN; InvertedPendulumState state; InvertedPendulumQStore.GetState(j, k, out state); for (int i = 0; i < 3; ++i) { int action = i - 1; for (int i1 = 0; i1 < 1000; ++i1) { testws.a = state.a + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * Math.PI; testws.w = state.w + r.NextDouble() / InvertedPendulumQStore.LEN * 2 * 3; testws.Step(action); int j2, k2; InvertedPendulumState state2 = new InvertedPendulumState(testws.a, testws.w); InvertedPendulumQStore.GetStateIndices(state2, out j2, out k2); int ii2 = k2 * InvertedPendulumQStore.LEN + j2; Ma[i][ii, ii2] += 0.001f; } } } SaveMMatrices("mmatrices.dat"); }
public void Train() { InvertedPendulumEnvironment testenv = new InvertedPendulumEnvironment(); int mincount = 100; states = new List<InvertedPendulumState>(); for(int i=0; i<mincount; ++i) { testenv.Reset(); InvertedPendulumState newstate = (InvertedPendulumState)testenv.State(); states.Add(newstate); } int statenum = states.Count; //allapot-atmenet valsegek es varhato rewardok szamitasa Vector R = new Vector(statenum); SparseMatrix[] Ma = new SparseMatrix[3]; for (int i = 0; i < 3; ++i) { Ma[i] = new SparseMatrix(statenum); } Vector counts = new Vector(statenum); for (int ii = 0; ii < statenum*10000; ++ii) { testenv.Reset(); InvertedPendulumState curstate = (InvertedPendulumState)testenv.State(); int from = GetClosestStateIndex(states, curstate); R.Elements[from] += testenv.Reward(); for (int i = 0; i < 3; ++i) { int action = i - 1; testenv.SetState(curstate); testenv.Step(action); InvertedPendulumState tostate = (InvertedPendulumState)testenv.State(); int to = GetClosestStateIndex(states, tostate); Ma[i][from, to] += 1; } float temp = (float)counts.Elements[from]; if (temp != 0) temp = 1 / (1 / temp + 1); else temp = 1; counts.Elements[from] = temp; } for (int i = 0; i < 3; ++i) { Ma[i].Multiply(counts); } R.Multiply(counts); SparseMatrix M = (Ma[0] + Ma[1] + Ma[2]); M.Multiply(0.333333f); M.Multiply(0.99f); SparseMatrix IM = SparseMatrix.Identity(statenum) - M; Vector utility = IM.SolveLinearEquation2(R); for (int i = 0; i < 3; ++i) { Q[i] = Ma[i].MatrixMultiplyRight(utility); } }