private void SetupNewQFunction() { // Iterate through all states and create Q function entry for each potential action // Have to make assumption that all tiles are equally spaced qFunction = new QFunction(discountFactor, kActionProb); foreach (KeyValuePair <State, Tile> pair in states) { // Don't add transitions for goal state if (pair.Value.isGoal) { continue; } float[] upMove = { pair.Key.position[0] + tileDistance, pair.Key.position[1], pair.Key.position[2] }; float[] downMove = { pair.Key.position[0] - tileDistance, pair.Key.position[1], pair.Key.position[2] }; float[] leftMove = { pair.Key.position[0], pair.Key.position[1], pair.Key.position[2] + tileDistance }; float[] rightMove = { pair.Key.position[0], pair.Key.position[1], pair.Key.position[2] - tileDistance }; // Add action if there is a tile that is walkable by moving up, down, left, or right AddStateIfCan(pair.Key, new AgentAction(AgentAction.Move.Up), upMove); AddStateIfCan(pair.Key, new AgentAction(AgentAction.Move.Down), downMove); AddStateIfCan(pair.Key, new AgentAction(AgentAction.Move.Left), leftMove); AddStateIfCan(pair.Key, new AgentAction(AgentAction.Move.Right), rightMove); } }
public override Tensor GetNextAction(Tensor state) { if (Rand.NextDouble() < Epsilon) { return(base.GetNextAction(state)); } return(QFunction.GetOptimalAction(state)); }
public void QFunctionTest() { var function = new QFunction(x, n, 1, k); Assert.AreEqual(new Fraction(1, 2), function.Calculate()); var function1 = new QFunction(x, n, 2, k); Assert.AreEqual(new Fraction(0), function1.Calculate()); var function2 = new QFunction(x, n, 3, k); Assert.AreEqual(new Fraction(0), function2.Calculate()); var function3 = new QFunction(x, n, 4, k); Assert.AreEqual(new Fraction(0), function3.Calculate()); }
// case if s = 0; private Fraction StartValueCalculate() { var qFunction = new QFunction(_x, _n, _t, _k); return(qFunction.Calculate()); }