public override Reinforcement PerformAction(Core.Action <double> action)
        {
            double tau_violation = 0;

            CalculateTau(action[0], action[1], action[2], ref tau, ref tau_violation);

            for (double t = 0; t < externalDiscretization; t += internalDiscretization)
            {
                double dt  = Math.Min(internalDiscretization, externalDiscretization - t);
                double dt2 = dt * dt;

                Calculate2ndDerivatives(tau);

                for (int i = 0; i < 3; i++)
                {
                    arc[i]  += dArc[i] * dt + 0.5 * d2Arc[i] * dt2;
                    dArc[i] += d2Arc[i] * dt;
                }
            }

            if (arc.Any(v => double.IsNaN(v)) || dArc.Any(v => double.IsNaN(v)))
            {
                for (int i = 0; i < 3; i++)
                {
                    arc[i]  = 100;
                    dArc[i] = 0;
                }
            }

            double reward = -tau_violation * tauPenalty;

            CopyState();

            if (IsStateOK())
            {
                if (goalWorks && IsAtGoalPosition())
                {
                    reward += goalReward;
                    CurrentState.IsTerminal = true;
                }
                else
                {
                    reward += GetStandardReward();
                    CurrentState.IsTerminal = false;
                }
            }
            else
            {
                reward -= crushPenalty;
                CurrentState.IsTerminal = true;
            }

            return(reward);
        }
        public override Reinforcement PerformAction(Core.Action <int> action)
        {
            double force = this.actionMap[action.ActionVector.First()];

            double reward = 0;

            for (double t = 0; t < externalDiscretization; t += internalDiscretization)
            {
                double dt  = Math.Min(internalDiscretization, externalDiscretization - t);
                double dt2 = dt * dt;

                double d2theta = this.TimeDerivativeFromTheta(this.dX, this.sinTheta, this.cosTheta, this.dTheta, force);
                double d2x     = TimeDerivativeFromX(this.dX, this.sinTheta, this.cosTheta, this.dTheta, force, d2theta);
                this.x  += (dX * dt) + (0.5 * d2x * dt2);
                this.dX += d2x * dt;

                this.theta  += this.dTheta * dt + 0.5 * d2theta * dt2;
                this.dTheta += d2theta * dt;

                this.RectifyState();
            }

            if (IsStateValid)
            {
                if (Math.Abs(this.dTheta) < Math.PI * 2 && Math.Abs(this.x) < rewardAreaRadius)
                {
                    reward += this.cosTheta;
                }
                else
                {
                    reward += -1;
                }
            }
            else
            {
                reward -= boundryPenalty;
            }

            UpdateCurrentState();

            return(reward);
        }
示例#3
0
        public override Reinforcement PerformAction(Core.Action <double> action)
        {
            double reward = 0;

            double[] taus = Enumerable.Repeat(0.0, 10).ToArray();
            double[] u    = new double[6];

            double arc, d_arc;

            for (double t = 0; t < externalDiscretization; t += internalDiscretization)
            {
                double   dt    = Math.Min(internalDiscretization, externalDiscretization - t);
                double[] arcs  = theObject.GetLinkArcs();
                double[] dArcs = theObject.GetLinkArcVelocities();
                for (int i = 0; i < 6; i++)
                {
                    switch (controlType)
                    {
                    case ControlType.Direct:
                        u[i]       = Math.Min(Math.Max(-1, action[i]), 1);
                        reward    -= Math.Abs(action[i] - u[i]) * tauPenalty * dt / externalDiscretization;
                        taus[p[i]] = u[i] * maxTau;
                        break;

                    case ControlType.ProportionalDerivative:
                        u[i]       = Math.Min(Math.Max(-1, action[i]), 1);
                        reward    -= Math.Abs(action[i] - u[i]) * tauPenalty * dt / externalDiscretization;
                        arc        = arcs[p[i]] - arcs[p[i] - 1];
                        arc       -= nominalArcs[i] + u[i] * (objConsts[p[i]].MaxArc - nominalArcs[p[i]]);
                        arc        = NormalizedArc(arc);
                        d_arc      = dArcs[p[i]] - dArcs[p[i] - 1];
                        taus[p[i]] = Math.Min(Math.Max(-maxTau, -arc * controlKp - d_arc * controlKd), maxTau);
                        break;

                    case ControlType.StabilizedProportionalDerivative:
                        arc   = arcs[p[i]] - arcs[p[i] - 1];
                        arc  -= nominalArcs[p[i] - 1];
                        arc   = NormalizedArc(arc);
                        d_arc = dArcs[p[i]] - dArcs[p[i] - 1];
                        double pd_ctrl = Math.Atan(-arc * controlKp - d_arc * controlKd) * 2 / Math.PI;
                        u[i]        = pd_ctrl + action[i];
                        taus[p[i]]  = Math.Min(Math.Max(-1, u[i]), 1);
                        reward     -= Math.Abs(taus[p[i]] - u[i]) * tauPenalty * dt / externalDiscretization;
                        taus[p[i]] *= maxTaus[i];
                        break;

                    default:
                        throw new InvalidOperationException("Unknown control type");
                    }
                }

                theObject.GoAhead(taus, dt);
            }

            UpdateCurrentState();

            if (jointImages[4].X.x > 6)
            {
                theObject.Translate(new AVector2D(-5, 0));
            }

            // system 6
            double speed    = 0.5 * (jointImages[3].V.x + jointImages[4].V.x);
            double back     = Math.Max(Math.Min(trunkWeight * jointImages[3].V.y, feetWeight), feetWeight * SoftStep01((jointImages[0].IsStanding ? 0 : 0.5) + jointImages[0].X.y * 5));
            double front    = Math.Max(Math.Min(trunkWeight * jointImages[4].V.y, feetWeight), feetWeight * SoftStep01((jointImages[9].IsStanding ? 0 : 0.5) + jointImages[9].X.y * 5));
            double overload = 0;

            for (int i = 0; i < 6; i++)
            {
                overload += Math.Min(Math.Abs(jointImages[p[i]].TauW), maxTau);
            }

            reward += speed;
            reward -= overload * overloadPenalty;
            reward += (Math.Max(back, front) - 1) * (1.0 - SoftStep01(speed));
            reward += faceFallPenalty * (SoftStep01(speed) - 1) *
                      (-jointImages[6].V.y > 0 && jointImages[6].V.x - jointImages[9].V.x > 0 && jointImages[6].X.x > jointImages[9].X.x ? 1 : 0) *
                      (-jointImages[6].V.y + jointImages[6].V.x - jointImages[9].V.x);
            reward += touchPenalty * (SoftStep01(jointImages[1].X.y * 9) - 1);
            reward += touchPenalty * (SoftStep01(jointImages[2].X.y * 5) - 1);
            reward += touchPenalty * (SoftStep01(jointImages[5].X.y * 2) - 1);

            if (!IsStateOK())
            {
                reward -= crushPenalty;
            }

            return(reward);
        }