protected virtual DenseVector GetUpdatedRwdParamsGradient(uint prevStateID, uint actionID, uint nextStateID) { //updates gradient of policy wrt params //gets [∇θt Qt(a|it;θt) - ∑ ∇θt Qt(b|it;θt)] var sumActionQGradient = new DenseVector(this.RewardParameters.Count); var otherActions = this.GetActionsExcept(actionID); foreach (var otherAction in otherActions) { sumActionQGradient += this.GetQGradient(prevStateID, otherAction, nextStateID, this.PlanningDepth); } if (sumActionQGradient.Contains(double.NaN)) { int i = 0; i++; } //∇θt μ(at|it;Qt) = τ·μ(a|Qt) [∇θt Qt(a|it;θt) - ∑ ∇θt Qt(b|it;θt)] var gradient = this.Temperature * this.GetActionProb(prevStateID, actionID) * (-sumActionQGradient); if (gradient.Contains(double.NaN)) { int i = 0; i++; } return(gradient); }
public Tuple <double[], double[]> BuildDualVariables(double[,] solution) { DenseVector alfaVector = DenseVector.Create(3, int.MaxValue); DenseVector betaVector = DenseVector.Create(3, int.MaxValue); var matrix = DenseMatrix.OfArray(solution); alfaVector[0] = 0; do { for (int index = 0; index < 3; index++) { var values = matrix.Row(index); for (int i = 0; i < values.Count; i++) { if (alfaVector[index] != int.MaxValue && values[i] != 0) { betaVector[i] = Solve(alfaVector[index], values[i]); } } values = matrix.Column(index); for (int i = 0; i < values.Count; i++) { if (betaVector[index] != int.MaxValue && values[i] != 0) { alfaVector[i] = Solve(betaVector[index], values[i]); } } } } while (alfaVector.Contains(int.MaxValue) && betaVector.Contains(int.MaxValue)); return(new Tuple <double[], double[]>(alfaVector.Values, betaVector.Values)); }