Beispiel #1
0
        private unsafe void MultiplyScalarU(Span <float> scalar, Span <float> dst)
        {
            fixed(float *pdst = dst)
            fixed(float *psrc = scalar)
            {
                var pDstEnd     = pdst + dst.Length;
                var pDstCurrent = pdst;

                var scalarVector256 = Avx.BroadcastScalarToVector256(psrc);

                while (pDstCurrent + 8 <= pDstEnd)
                {
                    var dstVector = Avx.LoadVector256(pDstCurrent);
                    dstVector = Avx.Multiply(dstVector, scalarVector256);
                    Avx.Store(pDstCurrent, dstVector);

                    pDstCurrent += 8;
                }
            }
        }
        // This function implements Algorithm 2 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
        // Calculate the stochastic gradient and update the model.
        public static unsafe void CalculateGradientAndUpdate(int *fieldIndices, int *featureIndices, float *featureValues, float *latentSum, float *linearWeights,
                                                             float *latentWeights, float *linearAccumulatedSquaredGrads, float *latentAccumulatedSquaredGrads, float lambdaLinear, float lambdaLatent, float learningRate,
                                                             int fieldCount, int latentDim, float weight, int count, float slope)
        {
            Contracts.Assert(Avx.IsSupported);

            int    m   = fieldCount;
            int    d   = latentDim;
            int    c   = count;
            int *  pf  = fieldIndices;
            int *  pi  = featureIndices;
            float *px  = featureValues;
            float *pq  = latentSum;
            float *pw  = linearWeights;
            float *pv  = latentWeights;
            float *phw = linearAccumulatedSquaredGrads;
            float *phv = latentAccumulatedSquaredGrads;

            Vector256 <float> wei     = Vector256.Create(weight);
            Vector256 <float> s       = Vector256.Create(slope);
            Vector256 <float> lr      = Vector256.Create(learningRate);
            Vector256 <float> lambdav = Vector256.Create(lambdaLatent);

            for (int i = 0; i < count; i++)
            {
                int f = pf[i];
                int j = pi[i];

                // Calculate gradient of linear term w_j.
                float g = weight * (lambdaLinear * pw[j] + slope * px[i]);

                // Accumulate the gradient of the linear term.
                phw[j] += g * g;

                // Perform ADAGRAD update rule to adjust linear term.
                pw[j] -= learningRate / MathF.Sqrt(phw[j]) * g;

                // Update latent term, v_j,f', f'=1,...,m.
                Vector256 <float> x = Avx.BroadcastScalarToVector256(px + i);

                for (int fprime = 0; fprime < m; fprime++)
                {
                    float *           vjfprime  = pv + j * m * d + fprime * d;
                    float *           hvjfprime = phv + j * m * d + fprime * d;
                    float *           qfprimef  = pq + fprime * m * d + f * d;
                    Vector256 <float> sx        = Avx.Multiply(s, x);

                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        Vector256 <float> v = Avx.LoadVector256(vjfprime + k);
                        Vector256 <float> q = Avx.LoadVector256(qfprimef + k);

                        // Calculate L2-norm regularization's gradient.
                        Vector256 <float> gLatent = Avx.Multiply(lambdav, v);

                        Vector256 <float> tmp = q;

                        // Calculate loss function's gradient.
                        if (fprime == f)
                        {
                            tmp = MultiplyAddNegated(v, x, q);
                        }
                        gLatent = MultiplyAdd(sx, tmp, gLatent);
                        gLatent = Avx.Multiply(wei, gLatent);

                        // Accumulate the gradient of latent vectors.
                        Vector256 <float> h = MultiplyAdd(gLatent, gLatent, Avx.LoadVector256(hvjfprime + k));

                        // Perform ADAGRAD update rule to adjust latent vector.
                        v = MultiplyAddNegated(lr, Avx.Multiply(Avx.ReciprocalSqrt(h), gLatent), v);
                        Avx.Store(vjfprime + k, v);
                        Avx.Store(hvjfprime + k, h);
                    }
                }
            }
        }
        // This function implements Algorithm 1 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf.
        // Compute the output value of the field-aware factorization, as the sum of the linear part and the latent part.
        // The linear part is the inner product of linearWeights and featureValues.
        // The latent part is the sum of all intra-field interactions in one field f, for all fields possible
        public static unsafe void CalculateIntermediateVariables(int *fieldIndices, int *featureIndices, float *featureValues,
                                                                 float *linearWeights, float *latentWeights, float *latentSum, float *response, int fieldCount, int latentDim, int count)
        {
            Contracts.Assert(Avx.IsSupported);

            // The number of all possible fields.
            int    m              = fieldCount;
            int    d              = latentDim;
            int    c              = count;
            int *  pf             = fieldIndices;
            int *  pi             = featureIndices;
            float *px             = featureValues;
            float *pw             = linearWeights;
            float *pv             = latentWeights;
            float *pq             = latentSum;
            float  linearResponse = 0;
            float  latentResponse = 0;

            Unsafe.InitBlock(pq, 0, (uint)(m * m * d * sizeof(float)));

            Vector256 <float> y   = Vector256 <float> .Zero;
            Vector256 <float> tmp = Vector256 <float> .Zero;

            for (int i = 0; i < c; i++)
            {
                int f = pf[i];
                int j = pi[i];
                linearResponse += pw[j] * px[i];

                Vector256 <float> x  = Avx.BroadcastScalarToVector256(px + i);
                Vector256 <float> xx = Avx.Multiply(x, x);

                // tmp -= <v_j,f, v_j,f> * x * x
                int vBias = j * m * d + f * d;

                // j-th feature's latent vector in the f-th field hidden space.
                float *vjf = pv + vBias;

                for (int k = 0; k + 8 <= d; k += 8)
                {
                    Vector256 <float> vjfBuffer = Avx.LoadVector256(vjf + k);
                    tmp = MultiplyAddNegated(Avx.Multiply(vjfBuffer, vjfBuffer), xx, tmp);
                }

                for (int fprime = 0; fprime < m; fprime++)
                {
                    vBias = j * m * d + fprime * d;
                    int    qBias    = f * m * d + fprime * d;
                    float *vjfprime = pv + vBias;
                    float *qffprime = pq + qBias;

                    // q_f,f' += v_j,f' * x
                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        Vector256 <float> vjfprimeBuffer = Avx.LoadVector256(vjfprime + k);
                        Vector256 <float> q = Avx.LoadVector256(qffprime + k);
                        q = MultiplyAdd(vjfprimeBuffer, x, q);
                        Avx.Store(qffprime + k, q);
                    }
                }
            }

            for (int f = 0; f < m; f++)
            {
                // tmp += <q_f,f, q_f,f>
                float *qff = pq + f * m * d + f * d;
                for (int k = 0; k + 8 <= d; k += 8)
                {
                    Vector256 <float> qffBuffer = Avx.LoadVector256(qff + k);

                    // Intra-field interactions.
                    tmp = MultiplyAdd(qffBuffer, qffBuffer, tmp);
                }

                // y += <q_f,f', q_f',f>, f != f'
                // Whis loop handles inter - field interactions because f != f'.
                for (int fprime = f + 1; fprime < m; fprime++)
                {
                    float *qffprime = pq + f * m * d + fprime * d;
                    float *qfprimef = pq + fprime * m * d + f * d;
                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        // Inter-field interaction.
                        Vector256 <float> qffprimeBuffer = Avx.LoadVector256(qffprime + k);
                        Vector256 <float> qfprimefBuffer = Avx.LoadVector256(qfprimef + k);
                        y = MultiplyAdd(qffprimeBuffer, qfprimefBuffer, y);
                    }
                }
            }

            y   = MultiplyAdd(_point5, tmp, y);
            tmp = Avx.Add(y, Avx.Permute2x128(y, y, 1));
            tmp = Avx.HorizontalAdd(tmp, tmp);
            y   = Avx.HorizontalAdd(tmp, tmp);
            Sse.StoreScalar(&latentResponse, y.GetLower()); // The lowest slot is the response value.
            *response = linearResponse + latentResponse;
        }
Beispiel #4
0
        protected override unsafe double CalculateImpl(double x, double stepThreshold, int maxN)
        {
            if (!Avx.IsSupported)
            {
                Status = TaylorSeriesStatus.NotSupported;
                return(Double.NaN);
            }

            const int vectorSize = 256 / 8 / sizeof(double);

            // v8888 <- (8, 8, 8, 8)
            var value8 = 8.0;
            var v8888  = Avx.BroadcastScalarToVector256(&value8);

            // xPow8 <- (x^8, x^8, x^8, x^8)
            var xPow8 = Avx.BroadcastScalarToVector256(&x);

            xPow8 = Avx.Multiply(xPow8, xPow8);
            xPow8 = Avx.Multiply(xPow8, xPow8);
            xPow8 = Avx.Multiply(xPow8, xPow8);

            // up <- (x^(-1), x^(-3), x^(-5), x^(-7))
            var upSa        = stackalloc double[vectorSize];
            var xDiv2iPlus1 = 1 / x;

            for (var i = 0; i < vectorSize; i++)
            {
                upSa[i]      = xDiv2iPlus1;
                xDiv2iPlus1 /= x * x;
            }

            var up = Avx.LoadVector256(upSa);

            // down <- (1, 3, 5, 7)
            var downSa = stackalloc double[vectorSize] {
                1, 3, 5, 7
            };
            var down = Avx.LoadVector256(downSa);

            // sum <- (0, 0, 0, 0)
            var sum = Vector256 <double> .Zero;

            N = 0;
            while (N < maxN)
            {
                // div <- up / down
                var div = Avx.Divide(up, down);
                // sum <- sum + div
                sum = Avx.Add(sum, div);
                // div = (x1, x2, x3, last)
                var last = div.GetElement(vectorSize - 1);
                N += vectorSize;
                if (Math.Abs(last) < stepThreshold)
                {
                    break;
                }

                // up <- up / (x^8, x^8, x^8, x^8)
                up = Avx.Divide(up, xPow8);
                // down <- down + (8, 8, 8, 8)
                down = Avx.Add(down, v8888);
            }

            var resultSa = stackalloc double[vectorSize];

            Avx.Store(resultSa, sum);

            Status = N >= maxN ? TaylorSeriesStatus.TooManyIterations : TaylorSeriesStatus.Success;

            return(resultSa[0] + resultSa[1] + resultSa[2] + resultSa[3]);
        }
    }