Beispiel #1
0
        Vec Propagate(Vec input)
        {
            Debug.Assert(!input.Any(x => double.IsNaN(x)));
            var output = new DenseVector(RNNInterop.PropagateInput(PropagationContext, input.ToArray(), Spec.Layers.Last().NodeCount));

            Debug.Assert(!output.Any(x => double.IsNaN(x)));
            return(output);
        }
Beispiel #2
0
        public static Vector Fit(Matrix input, Vector y)
        {
            if (input == null || y == null)
            {
                throw new ArgumentNullException("输入不能为空");
            }
            if (input.RowCount != y.Count)
            {
                throw new ArgumentException("输入的长度不一致");
            }
            //tol = 10*eps*norm(C,1)*length(C);
            // norm(C,1) = sum(abs(C))
            var norm = new DenseVector(input.ColumnCount);

            for (int i = 0; i < input.RowCount; i++)
            {
                for (int j = 0; j < input.ColumnCount; j++)
                {
                    norm[j] += Math.Abs(input[i, j]);
                }
            }

            var tol = 10 * Single.Epsilon * norm.Sum() * input.RowCount;


            var P = new DenseVector(input.ColumnCount);
            var Z = new DenseVector(input.ColumnCount);

            for (int i = 1; i <= Z.Count; i++)
            {
                Z[i - 1] = i;
            }
            var ra    = P.Clone();
            var ZZ    = Z.Clone();
            var resid = y - input * ra;
            var w     = input.Transpose() * resid;


            // set up iteration criterion
            var outeriter = 0;
            var iter      = 0;
            var itmax     = 3 * input.ColumnCount;
            var exitflag  = 1;


            while (Z.Any())
            {
                var  widx = ((Vector)w).Slice((Vector)ZZ.Subtract(1));
                bool brk  = true;
                foreach (var www in widx)
                {
                    if ((www - tol) > 0)
                    {
                        brk = false;
                        break;
                    }
                }
                if (brk)
                {
                    break;
                }

                if (!widx.Any())
                {
                    break;
                }
                outeriter++;
                var wt = widx.Maximum();
                var t  = widx.MaximumIndex();
                t        = (int)ZZ[t];
                P[t - 1] = t;
                Z[t - 1] = 0;
                var PP = P.Find();
                ZZ = Z.Find().Add(1);
                var nzz = ZZ.Count;

                var CP = new DenseMatrix(input.RowCount, input.ColumnCount);

                for (int j = 0; j < input.RowCount; j++)
                {
                    foreach (var pj in PP)
                    {
                        CP[j, (int)pj] = input[j, (int)pj];
                    }
                    foreach (var zj in ZZ)
                    {
                        CP[j, (int)zj - 1] = 0;
                    }
                }

                var z = CP.PseudoInverse() * y;
                foreach (var zj in ZZ)
                {
                    z[(int)zj - 1] = 0;
                }


                while (true)
                {
                    var ztemp = ((Vector)z).Slice((Vector)PP);
                    brk = true;
                    foreach (var ztempp in ztemp)
                    {
                        if (ztempp <= tol)
                        {
                            brk = false;
                            break;
                        }
                    }
                    if (brk)
                    {
                        break;
                    }
                    iter = iter + 1;
                    if (iter > itmax)
                    {
                        exitflag = 0;
                        ra       = z;
                        break;
                    }

                    var lst = new List <int>();
                    for (int i = 0; i < z.Count; i++)
                    {
                        if (z[i] <= tol && P[i] != 0)
                        {
                            lst.Add(i);
                        }
                    }
                    var QQ     = lst.ToArray();
                    var ratemp = ((Vector)ra).Slice(QQ);
                    var alpha  = (ratemp.PointwiseDivide(ratemp - ((Vector)z).Slice(QQ))).Min();
                    ra  = ra + alpha * (z - ra);
                    lst = new List <int>();
                    for (int i = 0; i < ra.Count; i++)
                    {
                        if (Math.Abs(ra[i]) < tol && P[i] != 0)
                        {
                            lst.Add(i);
                        }
                    }
                    foreach (var i in lst)
                    {
                        Z[i] = i;
                        P[i] = 0;
                    }
                    PP  = P.Find();
                    ZZ  = Z.Find().Add(1);
                    nzz = ZZ.Count;
                    for (int j = 0; j < input.RowCount; j++)
                    {
                        foreach (var pj in PP)
                        {
                            CP[j, (int)pj] = input[j, (int)pj];
                        }
                        foreach (var zj in ZZ)
                        {
                            CP[j, (int)zj - 1] = 0;
                        }
                    }

                    z = CP.PseudoInverse() * y;
                    foreach (var zj in ZZ)
                    {
                        z[(int)zj - 1] = 0;
                    }
                }
                ra    = z;
                resid = y - input * ra;
                w     = input.Transpose() * resid;
            }

            return((Vector)ra);
        }
        protected virtual DenseVector GetQGradient(uint prevStateID, uint actionID, uint nextStateID, uint curPlanDepth)
        {
            //verifies depth
            var rewardFeatures = this.GetRewardFeatures(prevStateID, actionID, nextStateID);

            if (rewardFeatures.Any(double.IsInfinity) || rewardFeatures.Contains(double.NaN))
            {
                int i = 0;
                i++;
            }
            if (curPlanDepth == 0)
            {
                return(rewardFeatures);
            }

            //verifies cache
            var qGradientCache = this.GetQGradientCache(prevStateID, actionID, nextStateID, curPlanDepth);

            if (qGradientCache != null)
            {
                return(qGradientCache);
            }

            //updates gradient of Q wrt params (recursive forward)

            //gets ∑ T(o'|o,a) ∑ π(b|o').∇θ Q(o',b;θ)
            var otherStates        = this.Agent.LongTermMemory.GetAllStateActionTransitions(prevStateID, actionID);
            var sumOthersQGradient = new DenseVector(this.RewardParameters.Count);

            foreach (var otherState in otherStates)
            {
                for (var otherAction = 0u; otherAction < this.NumActions; otherAction++)
                {
                    var nextState = this.GetRandomNextState(otherState, otherAction);
                    if (nextState.Equals(uint.MaxValue))
                    {
                        continue;
                    }
                    sumOthersQGradient +=
                        this.GetStateTransitionProb(prevStateID, actionID, otherState) *
                        this.GetActionProbability(otherState, otherAction) *
                        this.GetQGradient(otherState, otherAction, nextState, curPlanDepth - 1);
                    //this.NormalizeParams(sumOthersQGradient);
                }
            }

            if (sumOthersQGradient.Any(double.IsInfinity))
            {
                int i = 0;
                i++;
            }

            //∇θ Q(o,a;θ) = ∇θ R(o,a;θ) + γ ∑ T(o'|o,a) ∑ π(b|o').∇θ Q(o',b;θ)
            var qGradient = rewardFeatures +
                            (this.Discount * sumOthersQGradient);

            //this.NormalizeParams(qGradient);

            if (qGradient.Any(double.IsInfinity) || qGradient.Contains(double.NaN))
            {
                int i = 0;
                i++;
            }

            //sets cache
            this.SetGradientCache(prevStateID, actionID, nextStateID, curPlanDepth, qGradient);
            return(qGradient);
        }