// Fixed update called reliably on timer void FixedUpdate() { // Slow down! *snap* my man skippy_ = (skippy_ + 1) % slowDown; if (skippy_ != 0) { return; } Action action = null; for (int i = 0; i < superSpeed; i++) { if (validationRun_ || !learning || Random.Range(0.0f, 1.0f) > epsilon_) { // Greedy choice is the on-policy action action = q_.ArgMax(currentState_); expected_ = q_.Max(currentState_); } else { // Off-policy exploration action = q_.ArgRand(currentState_); } TakeAction(action); } }
public void RefreshFloorTexture(TabQ q, State s) { State temp = new State(); temp.Set(s); for (int z = 0; z < depth; z++) { temp.z = z; for (int x = 0; x < width; x++) { temp.x = x; Color color; if (displayExpectedReward) { float val = (float)q.Max(temp); if (val > 0 && val < 10) { color = Color.LerpUnclamped(Color.blue, Color.yellow, val / 10); } else if (val >= 10) { color = Color.LerpUnclamped(Color.yellow, Color.white, (val - 10) / 10); } else { color = Color.LerpUnclamped(Color.black, Color.red, -val / 10); } } else { color = Color.black; } // Apply color for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { floorTexture_.SetPixel(x * 3 + j, z * 3 + i, color); } } // Show Q direction if (displayPolicy) { Direction d = (Direction)q.ArgMax(temp).iVal; switch (d) { case Direction.NORTH: floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray); floorTexture_.SetPixel(x * 3 + 1, z * 3 + 2, Color.white); break; case Direction.SOUTH: floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray); floorTexture_.SetPixel(x * 3 + 1, z * 3, Color.white); break; case Direction.EAST: floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray); floorTexture_.SetPixel(x * 3 + 2, z * 3 + 1, Color.white); break; case Direction.WEST: floorTexture_.SetPixel(x * 3 + 1, z * 3 + 1, Color.gray); floorTexture_.SetPixel(x * 3, z * 3 + 1, Color.white); break; } } } } floorTexture_.Apply(); }