Beispiel #1
0
 protected void Reset()
 {
     // reset the agent"s policy and value function
     Q = Tembo.ArrayOfZeros(NS * NA);
     if (Options.QInitVal != 0)
     {
         Tembo.SetConst(Q, Options.QInitVal);
     }
     P = Tembo.ArrayOfZeros(NS * NA);
     E = Tembo.ArrayOfZeros(NS * NA);
     // model/planning vars
     EnvModelS = Tembo.ArrayOfZeros(NS * NA);
     Tembo.SetConst(EnvModelS, -1); // init to -1 so we can test if we saw the state before
     EnvModelR = Tembo.ArrayOfZeros(NS * NA);
     SaSeen    = new double[] { };
     PQ        = Tembo.ArrayOfZeros(NS * NA);
     // initialize uniform random policy
     for (var s = 0; s < NS; s++)
     {
         var poss = AllowedActions(s);
         for (var i = 0; i < poss.Length; i++)
         {
             P[poss[i] * NS + s] = 1.0 / poss.Length;
         }
     }
     // agent memory, needed for streaming updates
     // (s0,a0,r0,s1,a1,r1,...)
     r0 = 999999999;
     s0 = 999999999;
     s1 = 999999999;
     a0 = 999999999;
     a1 = 999999999;
 }