Vec Propagate(Vec input) { Debug.Assert(!input.Any(x => double.IsNaN(x))); var output = new DenseVector(RNNInterop.PropagateInput(PropagationContext, input.ToArray(), Spec.Layers.Last().NodeCount)); Debug.Assert(!output.Any(x => double.IsNaN(x))); return(output); }
public static Vector Fit(Matrix input, Vector y) { if (input == null || y == null) { throw new ArgumentNullException("输入不能为空"); } if (input.RowCount != y.Count) { throw new ArgumentException("输入的长度不一致"); } //tol = 10*eps*norm(C,1)*length(C); // norm(C,1) = sum(abs(C)) var norm = new DenseVector(input.ColumnCount); for (int i = 0; i < input.RowCount; i++) { for (int j = 0; j < input.ColumnCount; j++) { norm[j] += Math.Abs(input[i, j]); } } var tol = 10 * Single.Epsilon * norm.Sum() * input.RowCount; var P = new DenseVector(input.ColumnCount); var Z = new DenseVector(input.ColumnCount); for (int i = 1; i <= Z.Count; i++) { Z[i - 1] = i; } var ra = P.Clone(); var ZZ = Z.Clone(); var resid = y - input * ra; var w = input.Transpose() * resid; // set up iteration criterion var outeriter = 0; var iter = 0; var itmax = 3 * input.ColumnCount; var exitflag = 1; while (Z.Any()) { var widx = ((Vector)w).Slice((Vector)ZZ.Subtract(1)); bool brk = true; foreach (var www in widx) { if ((www - tol) > 0) { brk = false; break; } } if (brk) { break; } if (!widx.Any()) { break; } outeriter++; var wt = widx.Maximum(); var t = widx.MaximumIndex(); t = (int)ZZ[t]; P[t - 1] = t; Z[t - 1] = 0; var PP = P.Find(); ZZ = Z.Find().Add(1); var nzz = ZZ.Count; var CP = new DenseMatrix(input.RowCount, input.ColumnCount); for (int j = 0; j < input.RowCount; j++) { foreach (var pj in PP) { CP[j, (int)pj] = input[j, (int)pj]; } foreach (var zj in ZZ) { CP[j, (int)zj - 1] = 0; } } var z = CP.PseudoInverse() * y; foreach (var zj in ZZ) { z[(int)zj - 1] = 0; } while (true) { var ztemp = ((Vector)z).Slice((Vector)PP); brk = true; foreach (var ztempp in ztemp) { if (ztempp <= tol) { brk = false; break; } } if (brk) { break; } iter = iter + 1; if (iter > itmax) { exitflag = 0; ra = z; break; } var lst = new List <int>(); for (int i = 0; i < z.Count; i++) { if (z[i] <= tol && P[i] != 0) { lst.Add(i); } } var QQ = lst.ToArray(); var ratemp = ((Vector)ra).Slice(QQ); var alpha = (ratemp.PointwiseDivide(ratemp - ((Vector)z).Slice(QQ))).Min(); ra = ra + alpha * (z - ra); lst = new List <int>(); for (int i = 0; i < ra.Count; i++) { if (Math.Abs(ra[i]) < tol && P[i] != 0) { lst.Add(i); } } foreach (var i in lst) { Z[i] = i; P[i] = 0; } PP = P.Find(); ZZ = Z.Find().Add(1); nzz = ZZ.Count; for (int j = 0; j < input.RowCount; j++) { foreach (var pj in PP) { CP[j, (int)pj] = input[j, (int)pj]; } foreach (var zj in ZZ) { CP[j, (int)zj - 1] = 0; } } z = CP.PseudoInverse() * y; foreach (var zj in ZZ) { z[(int)zj - 1] = 0; } } ra = z; resid = y - input * ra; w = input.Transpose() * resid; } return((Vector)ra); }
protected virtual DenseVector GetQGradient(uint prevStateID, uint actionID, uint nextStateID, uint curPlanDepth) { //verifies depth var rewardFeatures = this.GetRewardFeatures(prevStateID, actionID, nextStateID); if (rewardFeatures.Any(double.IsInfinity) || rewardFeatures.Contains(double.NaN)) { int i = 0; i++; } if (curPlanDepth == 0) { return(rewardFeatures); } //verifies cache var qGradientCache = this.GetQGradientCache(prevStateID, actionID, nextStateID, curPlanDepth); if (qGradientCache != null) { return(qGradientCache); } //updates gradient of Q wrt params (recursive forward) //gets ∑ T(o'|o,a) ∑ π(b|o').∇θ Q(o',b;θ) var otherStates = this.Agent.LongTermMemory.GetAllStateActionTransitions(prevStateID, actionID); var sumOthersQGradient = new DenseVector(this.RewardParameters.Count); foreach (var otherState in otherStates) { for (var otherAction = 0u; otherAction < this.NumActions; otherAction++) { var nextState = this.GetRandomNextState(otherState, otherAction); if (nextState.Equals(uint.MaxValue)) { continue; } sumOthersQGradient += this.GetStateTransitionProb(prevStateID, actionID, otherState) * this.GetActionProbability(otherState, otherAction) * this.GetQGradient(otherState, otherAction, nextState, curPlanDepth - 1); //this.NormalizeParams(sumOthersQGradient); } } if (sumOthersQGradient.Any(double.IsInfinity)) { int i = 0; i++; } //∇θ Q(o,a;θ) = ∇θ R(o,a;θ) + γ ∑ T(o'|o,a) ∑ π(b|o').∇θ Q(o',b;θ) var qGradient = rewardFeatures + (this.Discount * sumOthersQGradient); //this.NormalizeParams(qGradient); if (qGradient.Any(double.IsInfinity) || qGradient.Contains(double.NaN)) { int i = 0; i++; } //sets cache this.SetGradientCache(prevStateID, actionID, nextStateID, curPlanDepth, qGradient); return(qGradient); }