private MDPState descendByUCB(MDPState state) { RepairActionsSet actions = repairActionSearcher.ComputePossibleAcions(state.State); if (actions == null) { return(null); } RepairAction currentAction = actions.NextAction(); double U; RepairAction bestAction = null; double ans = double.MinValue; while (currentAction != null && currentAction.Count != 0) { int actionNb = state.GetNumOfTimesChosen(currentAction); if (actionNb == 0) { U = (-costEstimator.FPCost(currentAction, state.State.HealthState)) / (currentAction.Count); } else { double alpha = state.GetStateActionValue(currentAction); U = alpha + numOfDiag * (Math.Sqrt(Math.Log10(state.NumberOfVisits) / actionNb)); } if (U > ans) //update best action { bestAction = currentAction; ans = U; } currentAction = actions.NextAction(); } MDPState nextState = null; if (bestAction != null) { state.LatestChosenAction = bestAction; nextState = ComputeNextState(state, bestAction); } state.NumberOfVisits++; return(nextState); }
public override RepairAction Plan(SystemState state) { ResetProperties(); if (state == null || state.Diagnoses == null || state.Diagnoses.Count == 0) { return(null); } RepairActionsSet actions = repairActionSearcher.ComputePossibleAcions(state); if (actions == null) { return(null); } double min = double.MaxValue; RepairAction bestRepairAction = null; RepairAction action = actions.NextAction(); int counter = 0; while (action != null && action.Count > 0) { counter++; if (Stop()) { break; } double val = costEstimator.WastedCostUtility(action, state); if (val < min) { min = val; bestRepairAction = action; } action = actions.NextAction(); } FillIterationDetails(bestRepairAction, counter, min); return(bestRepairAction); }