public actionType selectAction() { // get value for every action; double[] values = _actionValue.value(state, _possibleActions); //Console.WriteLine(String.Join(",", values)); if (_actionValue.getRecommendedExplorationMode() == explorationMode.suspendExploration) { return(explorationFreePolicy.selectAction(_possibleActions, values.ToList())); } else { return(_policy.selectAction(_possibleActions, values.ToList())); } }
private static double heuristic(stateType state, ActionValue <stateType, actionType> model, List <actionType> availableActions) { return(-model.value(state, availableActions).Max() / 2 + 0.5); //double bestDist = double.PositiveInfinity; //foreach (int[] goal in goals) //{ // int[] diff = arraySubtract(goal, state); // double taxiDist = 0; // foreach (int i in diff) // { // taxiDist += Math.Abs(i); // } // if (taxiDist < bestDist) // bestDist = taxiDist; //} //return bestDist*0.01; }
public override double[] value(int[] state, List <int[]> actions) { return(alloLearner.value(new int[2] { state[0], state[1] }, actions)); }