public static CUDDNode ProbReachReward(CUDDNode trans, CUDDNode stateReward, CUDDNode transReward, CUDDNode reach, CUDDVars allRowVars, CUDDVars allColVars, CUDDNode maybe, CUDDNode inf) { CUDD.Ref(trans, maybe); CUDDNode a = CUDD.Function.Times(trans, maybe); CUDD.Ref(stateReward, maybe); CUDDNode newStateReward = CUDD.Function.Times(stateReward, maybe); CUDD.Ref(transReward, a); CUDDNode newTransRewards = CUDD.Function.Times(transReward, a); newTransRewards = CUDD.Abstract.SumAbstract(newTransRewards, allColVars); CUDDNode allReward = CUDD.Function.Plus(newStateReward, newTransRewards); // CUDDNode tmp = CUDD.Matrix.Identity(allRowVars, allColVars); CUDD.Ref(reach); tmp = CUDD.Function.And(tmp, reach); a = CUDD.Function.Minus(tmp, a); CUDDNode sol = Jacobi(a, allReward, allReward, reach, allRowVars, allColVars, 1); // set reward for infinity states to infinity CUDD.Ref(inf); sol = CUDD.Function.ITE(inf, CUDD.PlusInfinity(), sol); // CUDD.Deref(a, allReward); return sol; }
public CUDDNode ComputeReachReward(CUDDNode b, int rewardStructIndex) { //Reference to b in the calling place and create a new copy CUDD.Ref(b, reach); b = CUDD.Function.And(b, reach); // CUDDNode inf, maybe, reward; if (b.Equals(CUDD.ZERO)) { CUDD.Ref(reach); inf = reach; maybe = CUDD.Constant(0); } else if (b.Equals(reach)) { inf = CUDD.Constant(0); maybe = CUDD.Constant(0); } else { CUDDNode no = ProbAlgo.Prob0(trans01, reach, allRowVars, allColVars, reach, b); CUDDNode prob1 = ProbAlgo.Prob1(trans01, reach, allRowVars, allColVars, reach, b, no); CUDD.Deref(no); CUDD.Ref(reach); inf = CUDD.Function.And(reach, CUDD.Function.Not(prob1)); CUDD.Ref(reach, inf, b); maybe = CUDD.Function.And(reach, CUDD.Function.Not(CUDD.Function.Or(inf, b))); } // print out yes/no/maybe Debug.WriteLine("goal = " + CUDD.GetNumMinterms(b, allRowVars.GetNumVars())); Debug.WriteLine("inf = " + CUDD.GetNumMinterms(inf, allRowVars.GetNumVars())); Debug.WriteLine("maybe = " + CUDD.GetNumMinterms(maybe, allRowVars.GetNumVars())); if (maybe.Equals(CUDD.ZERO)) { CUDD.Ref(inf); reward = CUDD.Function.ITE(inf, CUDD.PlusInfinity(), CUDD.Constant(0)); } else { reward = ProbAlgo.ProbReachReward(trans, stateRewards[rewardStructIndex], transRewards[rewardStructIndex], reach, allRowVars, allColVars, maybe, inf); } CUDD.Deref(inf, maybe, b); CUDD.Ref(start); return(CUDD.Function.Times(reward, start)); }
public static CUDDNode NondetInstReward(CUDDNode trans, CUDDNode stateReward, CUDDNode nondetMask, CUDDVars allRowVars, CUDDVars allColVars, CUDDVars nondetVars, int bound, bool min, CUDDNode init) { DateTime startTime = DateTime.Now; CUDDNode newNondetMask, sol, tmp; CUDD.Ref(nondetMask); newNondetMask = CUDD.Function.ITE(nondetMask, CUDD.PlusInfinity(), CUDD.Constant(0)); CUDD.Ref(stateReward); sol = stateReward; for (int i = 0; i < bound; i++) { tmp = CUDD.Matrix.MatrixMultiplyVector(trans, sol, allRowVars, allColVars); if (min) { CUDD.Ref(newNondetMask); tmp = CUDD.Function.Maximum(tmp, newNondetMask); tmp = CUDD.Abstract.MinAbstract(tmp, nondetVars); } else { tmp = CUDD.Abstract.MaxAbstract(tmp, nondetVars); } // CUDD.Deref(sol); sol = tmp; } DateTime endTime = DateTime.Now; double runningTime = (endTime - startTime).TotalSeconds; Debug.WriteLine("NondetInstReward: " + bound + " iterations in " + runningTime + " seconds"); // CUDD.Deref(newNondetMask); return(sol); }
/// <summary> /// [ REFS: '', DEREFS:] /// </summary> /// <param name="allProb"></param> /// <param name="state"></param> public double GetMinProb(CUDDNode allProb, CUDDNode filter) { double result = 0; CUDD.Ref(filter, reach); CUDDNode newFilter = CUDD.Function.And(filter, reach); if (newFilter.Equals(CUDD.ZERO)) { result = double.MaxValue; } else { CUDD.Ref(newFilter, allProb); CUDDNode tmp = CUDD.Function.ITE(newFilter, allProb, CUDD.PlusInfinity()); result = CUDD.FindMin(tmp); CUDD.Deref(tmp); } CUDD.Deref(newFilter); return(result); }
public static CUDDNode NondetReachReward(CUDDNode trans, CUDDNode reach, CUDDNode stateReward, CUDDNode transReward, CUDDNode nondetMask, CUDDVars allRowVars, CUDDVars allColVars, CUDDVars nondetVars, CUDDNode infReward, CUDDNode maybeReward, bool min) { DateTime startTime = DateTime.Now; int numberOfIterations = 0; CUDDNode a, allReward, newNondetMask, sol, tmp; // filter out rows (goal states and infinity states) from matrix CUDD.Ref(trans, maybeReward); a = CUDD.Function.Times(trans, maybeReward); // also remove goal and infinity states from state rewards vector CUDD.Ref(stateReward, maybeReward); CUDDNode tempStateReward = CUDD.Function.Times(stateReward, maybeReward); // multiply transition rewards by transition probs and sum rows // (note also filters out unwanted states at the same time) CUDD.Ref(transReward, a); CUDDNode tempTransReward = CUDD.Function.Times(transReward, a); tempTransReward = CUDD.Abstract.SumAbstract(tempTransReward, allColVars); // combine state and transition rewards allReward = CUDD.Function.Plus(tempStateReward, tempTransReward); // need to change mask because rewards are not necessarily in the range 0..1 CUDD.Ref(nondetMask); newNondetMask = CUDD.Function.ITE(nondetMask, CUDD.PlusInfinity(), CUDD.Constant(0)); // initial solution is infinity in 'inf' states, zero elsewhere // note: ok to do this because cudd matrix-multiply (and other ops) // treat 0 * inf as 0, unlike in IEEE 754 rules CUDD.Ref(infReward); sol = CUDD.Function.ITE(infReward, CUDD.PlusInfinity(), CUDD.Constant(0)); while (true) { numberOfIterations++; tmp = CUDD.Matrix.MatrixMultiplyVector(a, sol, allRowVars, allColVars); // add rewards CUDD.Ref(allReward); tmp = CUDD.Function.Plus(tmp, allReward); if (min) { CUDD.Ref(newNondetMask); tmp = CUDD.Function.Maximum(tmp, newNondetMask); tmp = CUDD.Abstract.MinAbstract(tmp, nondetVars); } else { tmp = CUDD.Abstract.MaxAbstract(tmp, nondetVars); } CUDD.Ref(infReward); tmp = CUDD.Function.ITE(infReward, CUDD.PlusInfinity(), tmp); if (CUDD.IsEqual(tmp, sol)) { CUDD.Deref(tmp); break; } CUDD.Deref(sol); sol = tmp; } DateTime endTime = DateTime.Now; double runningTime = (endTime - startTime).TotalSeconds; Debug.WriteLine("NondetReachReward: " + numberOfIterations + " iterations in " + runningTime + " seconds"); // CUDD.Deref(a, allReward, newNondetMask); return(sol); }