Beispiel #1
0
        public actionType selectAction()
        {
            // get value for every action;
            double[] values = _actionValue.value(state, _possibleActions);
            //Console.WriteLine(String.Join(",", values));

            if (_actionValue.getRecommendedExplorationMode() == explorationMode.suspendExploration)
            {
                return(explorationFreePolicy.selectAction(_possibleActions, values.ToList()));
            }
            else
            {
                return(_policy.selectAction(_possibleActions, values.ToList()));
            }
        }
Beispiel #2
0
 private static double heuristic(stateType state, ActionValue <stateType, actionType> model, List <actionType> availableActions)
 {
     return(-model.value(state, availableActions).Max() / 2 + 0.5);
     //double bestDist = double.PositiveInfinity;
     //foreach (int[] goal in goals)
     //{
     //    int[] diff = arraySubtract(goal, state);
     //    double taxiDist = 0;
     //    foreach (int i in diff)
     //    {
     //        taxiDist += Math.Abs(i);
     //    }
     //    if (taxiDist < bestDist)
     //        bestDist = taxiDist;
     //}
     //return bestDist*0.01;
 }
 public override double[] value(int[] state, List <int[]> actions)
 {
     return(alloLearner.value(new int[2] {
         state[0], state[1]
     }, actions));
 }