public void Set(int row, int col, double value) { var ix = (Columns * row) + col; Tembo.Assert(ix >= 0 && ix < W.Length); W[ix] = value; }
/// <summary> /// Returns an action from a state /// </summary> /// <param name="state">state size must be equal to NumberOfStates</param> /// <returns></returns> public int Act(double[] state) { Tembo.Assert(state.Length == NumberOfStates, $"Current state({state.Length}) not equal to NS({NumberOfStates})"); var a = 0; // convert to a Mat column vector var s = new Matrix(NumberOfStates, 1); s.Set(state); // epsilon greedy policy if (Tembo.Random() < Options.Epsilon) { a = Tembo.RandomInt(0, NumberOfActions); } else { // greedy wrt Q function var amat = ForwardQ(Network, s, false); a = Tembo.Maxi(amat.W); // returns index of argmax action } // shift state memory this.s0 = this.s1; this.a0 = this.a1; this.s1 = s; this.a1 = a; return(a); }
public double Get(int row, int col) { var ix = (Columns * row) + col; Tembo.Assert(ix >= 0 && ix < W.Length); return(W[ix]); }