C# (CSharp) Avx.BroadcastScalarToVector256の例

プログラミング言語: C# (CSharp)

クラス/型: Avx

メソッド/関数: BroadcastScalarToVector256

hotexamples.comのコード掲載数: 4

C# (CSharp) Avx.BroadcastScalarToVector256 - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC# (CSharp)のAvx.BroadcastScalarToVector256の実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

And(30)

Min(30)

BlendVariable(30)

LoadVector256(30)

LoadVector128(30)

Add(27)

Or(27)

Divide(24)

Permute(24)

Ceiling(24)

Compare(23)

AddSubtract(22)

AndNot(20)

LoadAlignedVector256(20)

ConvertToVector256Single(20)

DuplicateEvenIndexed(20)

CompareLessThanOrEqual(18)

DuplicateOddIndexed(17)

GetLowerHalf(17)

CompareGreaterThan(16)

CompareGreaterThanOrEqual(15)

CompareLessThan(15)

MoveMask(15)

ConvertToVector256Double(14)

CompareNotEqual(14)

HorizontalAdd(14)

CompareOrdered(14)

CompareUnordered(13)

Floor(13)

Multiply(13)

CompareNotGreaterThan(13)

CompareEqual(12)

LoadAlignedVector128(12)

Extract(12)

ConvertToVector256Int32(12)

CompareNotGreaterThanOrEqual(12)

CompareNotLessThan(12)

ExtendToVector256(11)

CompareNotLessThanOrEqual(11)

MaskLoad(10)

Max(10)

Insert(10)

ConvertToVector128Single(10)

MaskStore(10)

ConvertToVector128Int32WithTruncation(9)

ConvertToVector128Int32(8)

LoadDquVector256(8)

ConvertToVector256Int32WithTruncation(8)

ExtractVector128(7)

Blend(6)

コード例 #1

ファイルを表示

ファイル: AVX.cs プロジェクト: linhdh/CPU-Benchmark

        private unsafe void MultiplyScalarU(Span <float> scalar, Span <float> dst)
        {
            fixed(float *pdst = dst)
            fixed(float *psrc = scalar)
            {
                var pDstEnd     = pdst + dst.Length;
                var pDstCurrent = pdst;

                var scalarVector256 = Avx.BroadcastScalarToVector256(psrc);

                while (pDstCurrent + 8 <= pDstEnd)
                {
                    var dstVector = Avx.LoadVector256(pDstCurrent);
                    dstVector = Avx.Multiply(dstVector, scalarVector256);
                    Avx.Store(pDstCurrent, dstVector);

                    pDstCurrent += 8;
                }
            }
        }

コード例 #2

ファイルを表示

ファイル: AvxIntrinsics.cs プロジェクト: Trading-Lab/Microsoft-Machine-Learning

        // This function implements Algorithm 2 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
        // Calculate the stochastic gradient and update the model.
        public static unsafe void CalculateGradientAndUpdate(int *fieldIndices, int *featureIndices, float *featureValues, float *latentSum, float *linearWeights,
                                                             float *latentWeights, float *linearAccumulatedSquaredGrads, float *latentAccumulatedSquaredGrads, float lambdaLinear, float lambdaLatent, float learningRate,
                                                             int fieldCount, int latentDim, float weight, int count, float slope)
        {
            Contracts.Assert(Avx.IsSupported);

            int    m   = fieldCount;
            int    d   = latentDim;
            int    c   = count;
            int *  pf  = fieldIndices;
            int *  pi  = featureIndices;
            float *px  = featureValues;
            float *pq  = latentSum;
            float *pw  = linearWeights;
            float *pv  = latentWeights;
            float *phw = linearAccumulatedSquaredGrads;
            float *phv = latentAccumulatedSquaredGrads;

            Vector256 <float> wei     = Vector256.Create(weight);
            Vector256 <float> s       = Vector256.Create(slope);
            Vector256 <float> lr      = Vector256.Create(learningRate);
            Vector256 <float> lambdav = Vector256.Create(lambdaLatent);

            for (int i = 0; i < count; i++)
            {
                int f = pf[i];
                int j = pi[i];

                // Calculate gradient of linear term w_j.
                float g = weight * (lambdaLinear * pw[j] + slope * px[i]);

                // Accumulate the gradient of the linear term.
                phw[j] += g * g;

                // Perform ADAGRAD update rule to adjust linear term.
                pw[j] -= learningRate / MathF.Sqrt(phw[j]) * g;

                // Update latent term, v_j,f', f'=1,...,m.
                Vector256 <float> x = Avx.BroadcastScalarToVector256(px + i);

                for (int fprime = 0; fprime < m; fprime++)
                {
                    float *           vjfprime  = pv + j * m * d + fprime * d;
                    float *           hvjfprime = phv + j * m * d + fprime * d;
                    float *           qfprimef  = pq + fprime * m * d + f * d;
                    Vector256 <float> sx        = Avx.Multiply(s, x);

                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        Vector256 <float> v = Avx.LoadVector256(vjfprime + k);
                        Vector256 <float> q = Avx.LoadVector256(qfprimef + k);

                        // Calculate L2-norm regularization's gradient.
                        Vector256 <float> gLatent = Avx.Multiply(lambdav, v);

                        Vector256 <float> tmp = q;

                        // Calculate loss function's gradient.
                        if (fprime == f)
                        {
                            tmp = MultiplyAddNegated(v, x, q);
                        }
                        gLatent = MultiplyAdd(sx, tmp, gLatent);
                        gLatent = Avx.Multiply(wei, gLatent);

                        // Accumulate the gradient of latent vectors.
                        Vector256 <float> h = MultiplyAdd(gLatent, gLatent, Avx.LoadVector256(hvjfprime + k));

                        // Perform ADAGRAD update rule to adjust latent vector.
                        v = MultiplyAddNegated(lr, Avx.Multiply(Avx.ReciprocalSqrt(h), gLatent), v);
                        Avx.Store(vjfprime + k, v);
                        Avx.Store(hvjfprime + k, h);
                    }
                }
            }
        }

コード例 #3

ファイルを表示

ファイル: AvxIntrinsics.cs プロジェクト: Trading-Lab/Microsoft-Machine-Learning

        // This function implements Algorithm 1 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf.
        // Compute the output value of the field-aware factorization, as the sum of the linear part and the latent part.
        // The linear part is the inner product of linearWeights and featureValues.
        // The latent part is the sum of all intra-field interactions in one field f, for all fields possible
        public static unsafe void CalculateIntermediateVariables(int *fieldIndices, int *featureIndices, float *featureValues,
                                                                 float *linearWeights, float *latentWeights, float *latentSum, float *response, int fieldCount, int latentDim, int count)
        {
            Contracts.Assert(Avx.IsSupported);

            // The number of all possible fields.
            int    m              = fieldCount;
            int    d              = latentDim;
            int    c              = count;
            int *  pf             = fieldIndices;
            int *  pi             = featureIndices;
            float *px             = featureValues;
            float *pw             = linearWeights;
            float *pv             = latentWeights;
            float *pq             = latentSum;
            float  linearResponse = 0;
            float  latentResponse = 0;

            Unsafe.InitBlock(pq, 0, (uint)(m * m * d * sizeof(float)));

            Vector256 <float> y   = Vector256 <float> .Zero;
            Vector256 <float> tmp = Vector256 <float> .Zero;

            for (int i = 0; i < c; i++)
            {
                int f = pf[i];
                int j = pi[i];
                linearResponse += pw[j] * px[i];

                Vector256 <float> x  = Avx.BroadcastScalarToVector256(px + i);
                Vector256 <float> xx = Avx.Multiply(x, x);

                // tmp -= <v_j,f, v_j,f> * x * x
                int vBias = j * m * d + f * d;

                // j-th feature's latent vector in the f-th field hidden space.
                float *vjf = pv + vBias;

                for (int k = 0; k + 8 <= d; k += 8)
                {
                    Vector256 <float> vjfBuffer = Avx.LoadVector256(vjf + k);
                    tmp = MultiplyAddNegated(Avx.Multiply(vjfBuffer, vjfBuffer), xx, tmp);
                }

                for (int fprime = 0; fprime < m; fprime++)
                {
                    vBias = j * m * d + fprime * d;
                    int    qBias    = f * m * d + fprime * d;
                    float *vjfprime = pv + vBias;
                    float *qffprime = pq + qBias;

                    // q_f,f' += v_j,f' * x
                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        Vector256 <float> vjfprimeBuffer = Avx.LoadVector256(vjfprime + k);
                        Vector256 <float> q = Avx.LoadVector256(qffprime + k);
                        q = MultiplyAdd(vjfprimeBuffer, x, q);
                        Avx.Store(qffprime + k, q);
                    }
                }
            }

            for (int f = 0; f < m; f++)
            {
                // tmp += <q_f,f, q_f,f>
                float *qff = pq + f * m * d + f * d;
                for (int k = 0; k + 8 <= d; k += 8)
                {
                    Vector256 <float> qffBuffer = Avx.LoadVector256(qff + k);

                    // Intra-field interactions.
                    tmp = MultiplyAdd(qffBuffer, qffBuffer, tmp);
                }

                // y += <q_f,f', q_f',f>, f != f'
                // Whis loop handles inter - field interactions because f != f'.
                for (int fprime = f + 1; fprime < m; fprime++)
                {
                    float *qffprime = pq + f * m * d + fprime * d;
                    float *qfprimef = pq + fprime * m * d + f * d;
                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        // Inter-field interaction.
                        Vector256 <float> qffprimeBuffer = Avx.LoadVector256(qffprime + k);
                        Vector256 <float> qfprimefBuffer = Avx.LoadVector256(qfprimef + k);
                        y = MultiplyAdd(qffprimeBuffer, qfprimefBuffer, y);
                    }
                }
            }

            y   = MultiplyAdd(_point5, tmp, y);
            tmp = Avx.Add(y, Avx.Permute2x128(y, y, 1));
            tmp = Avx.HorizontalAdd(tmp, tmp);
            y   = Avx.HorizontalAdd(tmp, tmp);
            Sse.StoreScalar(&latentResponse, y.GetLower()); // The lowest slot is the response value.
            *response = linearResponse + latentResponse;
        }

コード例 #4

ファイルを表示

        protected override unsafe double CalculateImpl(double x, double stepThreshold, int maxN)
        {
            if (!Avx.IsSupported)
            {
                Status = TaylorSeriesStatus.NotSupported;
                return(Double.NaN);
            }

            const int vectorSize = 256 / 8 / sizeof(double);

            // v8888 <- (8, 8, 8, 8)
            var value8 = 8.0;
            var v8888  = Avx.BroadcastScalarToVector256(&value8);

            // xPow8 <- (x^8, x^8, x^8, x^8)
            var xPow8 = Avx.BroadcastScalarToVector256(&x);

            xPow8 = Avx.Multiply(xPow8, xPow8);
            xPow8 = Avx.Multiply(xPow8, xPow8);
            xPow8 = Avx.Multiply(xPow8, xPow8);

            // up <- (x^(-1), x^(-3), x^(-5), x^(-7))
            var upSa        = stackalloc double[vectorSize];
            var xDiv2iPlus1 = 1 / x;

            for (var i = 0; i < vectorSize; i++)
            {
                upSa[i]      = xDiv2iPlus1;
                xDiv2iPlus1 /= x * x;
            }

            var up = Avx.LoadVector256(upSa);

            // down <- (1, 3, 5, 7)
            var downSa = stackalloc double[vectorSize] {
                1, 3, 5, 7
            };
            var down = Avx.LoadVector256(downSa);

            // sum <- (0, 0, 0, 0)
            var sum = Vector256 <double> .Zero;

            N = 0;
            while (N < maxN)
            {
                // div <- up / down
                var div = Avx.Divide(up, down);
                // sum <- sum + div
                sum = Avx.Add(sum, div);
                // div = (x1, x2, x3, last)
                var last = div.GetElement(vectorSize - 1);
                N += vectorSize;
                if (Math.Abs(last) < stepThreshold)
                {
                    break;
                }

                // up <- up / (x^8, x^8, x^8, x^8)
                up = Avx.Divide(up, xPow8);
                // down <- down + (8, 8, 8, 8)
                down = Avx.Add(down, v8888);
            }

            var resultSa = stackalloc double[vectorSize];

            Avx.Store(resultSa, sum);

            Status = N >= maxN ? TaylorSeriesStatus.TooManyIterations : TaylorSeriesStatus.Success;

            return(resultSa[0] + resultSa[1] + resultSa[2] + resultSa[3]);
        }
    }