protected virtual DenseVector GetUpdatedRwdParamsGradient(uint prevStateID, uint actionID, uint nextStateID)
        {
            //updates gradient of policy wrt params

            //gets [∇θt Qt(a|it;θt) - ∑ ∇θt Qt(b|it;θt)]
            var sumActionQGradient = new DenseVector(this.RewardParameters.Count);
            var otherActions       = this.GetActionsExcept(actionID);

            foreach (var otherAction in otherActions)
            {
                sumActionQGradient += this.GetQGradient(prevStateID, otherAction, nextStateID, this.PlanningDepth);
            }

            if (sumActionQGradient.Contains(double.NaN))
            {
                int i = 0;
                i++;
            }

            //∇θt μ(at|it;Qt) = τ·μ(a|Qt) [∇θt Qt(a|it;θt) - ∑ ∇θt Qt(b|it;θt)]
            var gradient = this.Temperature * this.GetActionProb(prevStateID, actionID) * (-sumActionQGradient);

            if (gradient.Contains(double.NaN))
            {
                int i = 0;
                i++;
            }
            return(gradient);
        }
Example #2
0
        public Tuple <double[], double[]> BuildDualVariables(double[,] solution)
        {
            DenseVector alfaVector = DenseVector.Create(3, int.MaxValue);
            DenseVector betaVector = DenseVector.Create(3, int.MaxValue);
            var         matrix     = DenseMatrix.OfArray(solution);

            alfaVector[0] = 0;

            do
            {
                for (int index = 0; index < 3; index++)
                {
                    var values = matrix.Row(index);

                    for (int i = 0; i < values.Count; i++)
                    {
                        if (alfaVector[index] != int.MaxValue && values[i] != 0)
                        {
                            betaVector[i] = Solve(alfaVector[index], values[i]);
                        }
                    }

                    values = matrix.Column(index);

                    for (int i = 0; i < values.Count; i++)
                    {
                        if (betaVector[index] != int.MaxValue && values[i] != 0)
                        {
                            alfaVector[i] = Solve(betaVector[index], values[i]);
                        }
                    }
                }
            } while (alfaVector.Contains(int.MaxValue) && betaVector.Contains(int.MaxValue));

            return(new Tuple <double[], double[]>(alfaVector.Values, betaVector.Values));
        }