public override Reinforcement PerformAction(Core.Action <double> action) { double tau_violation = 0; CalculateTau(action[0], action[1], action[2], ref tau, ref tau_violation); for (double t = 0; t < externalDiscretization; t += internalDiscretization) { double dt = Math.Min(internalDiscretization, externalDiscretization - t); double dt2 = dt * dt; Calculate2ndDerivatives(tau); for (int i = 0; i < 3; i++) { arc[i] += dArc[i] * dt + 0.5 * d2Arc[i] * dt2; dArc[i] += d2Arc[i] * dt; } } if (arc.Any(v => double.IsNaN(v)) || dArc.Any(v => double.IsNaN(v))) { for (int i = 0; i < 3; i++) { arc[i] = 100; dArc[i] = 0; } } double reward = -tau_violation * tauPenalty; CopyState(); if (IsStateOK()) { if (goalWorks && IsAtGoalPosition()) { reward += goalReward; CurrentState.IsTerminal = true; } else { reward += GetStandardReward(); CurrentState.IsTerminal = false; } } else { reward -= crushPenalty; CurrentState.IsTerminal = true; } return(reward); }
public override Reinforcement PerformAction(Core.Action <int> action) { double force = this.actionMap[action.ActionVector.First()]; double reward = 0; for (double t = 0; t < externalDiscretization; t += internalDiscretization) { double dt = Math.Min(internalDiscretization, externalDiscretization - t); double dt2 = dt * dt; double d2theta = this.TimeDerivativeFromTheta(this.dX, this.sinTheta, this.cosTheta, this.dTheta, force); double d2x = TimeDerivativeFromX(this.dX, this.sinTheta, this.cosTheta, this.dTheta, force, d2theta); this.x += (dX * dt) + (0.5 * d2x * dt2); this.dX += d2x * dt; this.theta += this.dTheta * dt + 0.5 * d2theta * dt2; this.dTheta += d2theta * dt; this.RectifyState(); } if (IsStateValid) { if (Math.Abs(this.dTheta) < Math.PI * 2 && Math.Abs(this.x) < rewardAreaRadius) { reward += this.cosTheta; } else { reward += -1; } } else { reward -= boundryPenalty; } UpdateCurrentState(); return(reward); }
public override Reinforcement PerformAction(Core.Action <double> action) { double reward = 0; double[] taus = Enumerable.Repeat(0.0, 10).ToArray(); double[] u = new double[6]; double arc, d_arc; for (double t = 0; t < externalDiscretization; t += internalDiscretization) { double dt = Math.Min(internalDiscretization, externalDiscretization - t); double[] arcs = theObject.GetLinkArcs(); double[] dArcs = theObject.GetLinkArcVelocities(); for (int i = 0; i < 6; i++) { switch (controlType) { case ControlType.Direct: u[i] = Math.Min(Math.Max(-1, action[i]), 1); reward -= Math.Abs(action[i] - u[i]) * tauPenalty * dt / externalDiscretization; taus[p[i]] = u[i] * maxTau; break; case ControlType.ProportionalDerivative: u[i] = Math.Min(Math.Max(-1, action[i]), 1); reward -= Math.Abs(action[i] - u[i]) * tauPenalty * dt / externalDiscretization; arc = arcs[p[i]] - arcs[p[i] - 1]; arc -= nominalArcs[i] + u[i] * (objConsts[p[i]].MaxArc - nominalArcs[p[i]]); arc = NormalizedArc(arc); d_arc = dArcs[p[i]] - dArcs[p[i] - 1]; taus[p[i]] = Math.Min(Math.Max(-maxTau, -arc * controlKp - d_arc * controlKd), maxTau); break; case ControlType.StabilizedProportionalDerivative: arc = arcs[p[i]] - arcs[p[i] - 1]; arc -= nominalArcs[p[i] - 1]; arc = NormalizedArc(arc); d_arc = dArcs[p[i]] - dArcs[p[i] - 1]; double pd_ctrl = Math.Atan(-arc * controlKp - d_arc * controlKd) * 2 / Math.PI; u[i] = pd_ctrl + action[i]; taus[p[i]] = Math.Min(Math.Max(-1, u[i]), 1); reward -= Math.Abs(taus[p[i]] - u[i]) * tauPenalty * dt / externalDiscretization; taus[p[i]] *= maxTaus[i]; break; default: throw new InvalidOperationException("Unknown control type"); } } theObject.GoAhead(taus, dt); } UpdateCurrentState(); if (jointImages[4].X.x > 6) { theObject.Translate(new AVector2D(-5, 0)); } // system 6 double speed = 0.5 * (jointImages[3].V.x + jointImages[4].V.x); double back = Math.Max(Math.Min(trunkWeight * jointImages[3].V.y, feetWeight), feetWeight * SoftStep01((jointImages[0].IsStanding ? 0 : 0.5) + jointImages[0].X.y * 5)); double front = Math.Max(Math.Min(trunkWeight * jointImages[4].V.y, feetWeight), feetWeight * SoftStep01((jointImages[9].IsStanding ? 0 : 0.5) + jointImages[9].X.y * 5)); double overload = 0; for (int i = 0; i < 6; i++) { overload += Math.Min(Math.Abs(jointImages[p[i]].TauW), maxTau); } reward += speed; reward -= overload * overloadPenalty; reward += (Math.Max(back, front) - 1) * (1.0 - SoftStep01(speed)); reward += faceFallPenalty * (SoftStep01(speed) - 1) * (-jointImages[6].V.y > 0 && jointImages[6].V.x - jointImages[9].V.x > 0 && jointImages[6].X.x > jointImages[9].X.x ? 1 : 0) * (-jointImages[6].V.y + jointImages[6].V.x - jointImages[9].V.x); reward += touchPenalty * (SoftStep01(jointImages[1].X.y * 9) - 1); reward += touchPenalty * (SoftStep01(jointImages[2].X.y * 5) - 1); reward += touchPenalty * (SoftStep01(jointImages[5].X.y * 2) - 1); if (!IsStateOK()) { reward -= crushPenalty; } return(reward); }