Exemplo n.º 1
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector256 <Double> *pClsVar1 = &_clsVar1)
            fixed(Vector256 <Double> *pClsVar2 = &_clsVar2)
            {
                var result = Avx.Max(
                    Avx.LoadVector256((Double *)(pClsVar1)),
                    Avx.LoadVector256((Double *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }
Exemplo n.º 2
0
        unsafe private static void greyLinearToGreyFloat(byte *ipstart, byte *opstart, int cb)
        {
            float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb), op = (float *)opstart;

#if HWINTRINSICS
            if (Avx.IsSupported)
            {
                var vzero = Vector256 <float> .Zero;

                ipe -= Vector256 <float> .Count;
                while (ip <= ipe)
                {
                    var v = Avx.Max(vzero, Avx.LoadVector256(ip));
                    ip += Vector256 <float> .Count;

                    v = Avx.Sqrt(v);

                    Avx.Store(op, v);
                    op += Vector256 <float> .Count;
                }
                ipe += Vector256 <float> .Count;
            }
            else
#endif
            {
                var vzero = Vector <float> .Zero;

                ipe -= VectorF.Count;
                while (ip <= ipe)
                {
                    var v = Unsafe.ReadUnaligned <VectorF>(ip);
                    ip += VectorF.Count;

                    v = Vector.SquareRoot(Vector.Max(v, vzero));

                    Unsafe.WriteUnaligned(op, v);
                    op += VectorF.Count;
                }
                ipe += VectorF.Count;
            }

            float fmin = Vector4.Zero.X;
            while (ip < ipe)
            {
                *op++ = MaxF(*ip++, fmin).Sqrt();
            }
        }
Exemplo n.º 3
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleBinaryOpTest__MaxSingle();

            fixed(Vector256 <Single> *pFld1 = &test._fld1)
            fixed(Vector256 <Single> *pFld2 = &test._fld2)
            {
                var result = Avx.Max(
                    Avx.LoadVector256((Single *)(pFld1)),
                    Avx.LoadVector256((Single *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
            }
        }
Exemplo n.º 4
0
    private static unsafe double[] BilinearInterpol_AVX(
        double[] x,
        double[] A,
        double minXA,
        double maxXA,
        double[] B,
        double minXB,
        double maxXB,
        double weightB)
    {
        double[] z = new double[outputVectorSize];

        fixed(double *pX = &x[0], pA = &A[0], pB = &B[0], pZ = &z[0])
        {
            Vector256 <double> vWeightB = Vector256.Create(weightB);
            Vector256 <double> vWeightA = Vector256.Create(1 - weightB);

            Vector256 <double> vMinXA = Vector256.Create(minXA);
            Vector256 <double> vMaxXA = Vector256.Create(maxXA);
            Vector256 <double> vMinXB = Vector256.Create(minXB);
            Vector256 <double> vMaxXB = Vector256.Create(maxXB);

            double             deltaA  = (maxXA - minXA) / (double)(A.Length - 1);
            double             deltaB  = (maxXB - minXB) / (double)(B.Length - 1);
            Vector256 <double> vDeltaA = Vector256.Create(deltaA);
            Vector256 <double> vDeltaB = Vector256.Create(deltaB);

            double             invDeltaA  = 1.0 / deltaA;
            double             invDeltaB  = 1.0 / deltaB;
            Vector256 <double> vInvDeltaA = Vector256.Create(invDeltaA);
            Vector256 <double> vInvDeltaB = Vector256.Create(invDeltaB);

            Vector128 <int> ALengthMinusOne = Vector128.Create(A.Length - 1);
            Vector128 <int> BLengthMinusOne = Vector128.Create(B.Length - 1);
            Vector128 <int> One             = Vector128.Create(1);

            for (var i = 0; i < x.Length; i += Vector256 <double> .Count)
            {
                Vector256 <double> currentX = Avx.LoadVector256(pX + i);

                // Determine the largest a, such that A[i] = f(xA) and xA <= x[i].
                // This involves casting from double to int; here we use a Vector conversion.
                Vector256 <double> aDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXA), vInvDeltaA);
                Vector128 <int>    a       = Avx.ConvertToVector128Int32WithTruncation(aDouble);
                a = Sse41.Min(Sse41.Max(a, Vector128 <int> .Zero), ALengthMinusOne);
                Vector128 <int> aPlusOne = Sse41.Min(Sse2.Add(a, One), ALengthMinusOne);

                // Now, get the reference input, xA, for our index a.
                // This involves casting from  int to double.
                Vector256 <double> xA = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(a), vDeltaA), vMinXA);

                // Now, compute the lambda for our A reference point.
                Vector256 <double> currentXNormA = Avx.Max(vMinXA, Avx.Min(currentX, vMaxXA));
                Vector256 <double> lambdaA       = Avx.Multiply(Avx.Subtract(currentXNormA, xA), vInvDeltaA);

                // Now, we need to load up our reference points using Vector Gather operations.
                Vector256 <double> AVector        = Avx2.GatherVector256(pA, a, 8);
                Vector256 <double> AVectorPlusOne = Avx2.GatherVector256(pA, aPlusOne, 8);

                // Now, do the all of the above for our B reference point.
                Vector256 <double> bDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXB), vInvDeltaB);
                Vector128 <int>    b       = Avx.ConvertToVector128Int32WithTruncation(bDouble);
                b = Sse41.Min(Sse41.Max(b, Vector128 <int> .Zero), BLengthMinusOne);
                Vector128 <int> bPlusOne = Sse41.Min(Sse2.Add(b, One), BLengthMinusOne);

                Vector256 <double> xB            = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(b), vDeltaB), vMinXB);
                Vector256 <double> currentXNormB = Avx.Max(vMinXB, Avx.Min(currentX, vMaxXB));
                Vector256 <double> lambdaB       = Avx.Multiply(Avx.Subtract(currentXNormB, xB), vInvDeltaB);

                Vector256 <double> BVector        = Avx2.GatherVector256(pB, b, 8);
                Vector256 <double> BVectorPlusOne = Avx2.GatherVector256(pB, bPlusOne, 8);

                Vector256 <double> newZ = Avx.Add(Avx.Multiply(vWeightA, Avx.Add(AVector, Avx.Multiply(lambdaA, Avx.Subtract(AVectorPlusOne, AVector)))),
                                                  Avx.Multiply(vWeightB, Avx.Add(BVector, Avx.Multiply(lambdaB, Avx.Subtract(BVectorPlusOne, BVector)))));
                Avx.Store(pZ + i, newZ);
            }
        }

        return(z);
    }
Exemplo n.º 5
0
 public static f32 Max_f32(f32 a, f32 b)
 {
     return(Avx.Max(a, b));
 }
Exemplo n.º 6
0
            unsafe public static void ConvertFloat3A(byte *ipstart, byte *opstart, float *lutstart, int lutmax, int cb)
            {
                Debug.Assert(ipstart == opstart);

                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                float *lp = lutstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vgmsk = Avx.BroadcastVector128ToVector256((float *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.GatherMask3x)));
                    var vgmax = Vector256.Create((float)lutmax);
                    var vzero = Vector256 <float> .Zero;
                    var vfone = Vector256.Create(1f);
                    var vione = Vector256.Create(1);

                    ipe -= Vector256 <float> .Count;
                    while (ip <= ipe)
                    {
                        var vf = Avx.Max(vzero, Avx.LoadVector256(ip));
                        var va = Avx.Shuffle(vf, vf, HWIntrinsics.ShuffleMaskAlpha);

                        vf = Avx.Multiply(vf, Avx.Multiply(vgmax, Avx.Reciprocal(va)));
                        vf = Avx.Min(vf, vgmax);

                        var vi  = Avx.ConvertToVector256Int32WithTruncation(vf);
                        var vfi = Avx.ConvertToVector256Single(vi);

                        var vl = Avx2.GatherMaskVector256(vfone, lp, vi, vgmsk, sizeof(float));
                        var vh = Avx2.GatherMaskVector256(vfone, lp, Avx2.Add(vi, vione), vgmsk, sizeof(float));

                        vf = HWIntrinsics.Lerp(vl, vh, Avx.Subtract(vf, vfi));
                        vf = Avx.Multiply(vf, va);

                        Avx.Store(ip, vf);
                        ip += Vector256 <float> .Count;
                    }
                    ipe += Vector256 <float> .Count;
                }
#endif
                {
                    var   vlmax = new Vector4(lutmax);
                    var   vzero = Vector4.Zero;
                    float famin = new Vector4(1 / 1024f).X;

                    while (ip < ipe)
                    {
                        var vf = Unsafe.ReadUnaligned <Vector4>(ip);

                        float f3 = vf.W;
                        if (f3 < famin)
                        {
                            Unsafe.WriteUnaligned(ip, vzero);
                        }
                        else
                        {
                            vf = (vf * vlmax / f3).Clamp(vzero, vlmax);

                            float f0 = vf.X;
                            float f1 = vf.Y;
                            float f2 = vf.Z;

                            uint i0 = (uint)f0;
                            uint i1 = (uint)f1;
                            uint i2 = (uint)f2;

                            ip[0] = Lerp(lp[i0], lp[i0 + 1], f0 - (int)i0) * f3;
                            ip[1] = Lerp(lp[i1], lp[i1 + 1], f1 - (int)i1) * f3;
                            ip[2] = Lerp(lp[i2], lp[i2 + 1], f2 - (int)i2) * f3;
                        }
                        ip += 4;
                    }
                }
            }
Exemplo n.º 7
0
            unsafe public static void ConvertFloat(byte *ipstart, byte *opstart, float *lutstart, int lutmax, int cb)
            {
                Debug.Assert(ipstart == opstart);

                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                float *lp = lutstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vlmax = Vector256.Create((float)lutmax);
                    var vzero = Vector256 <float> .Zero;
                    var vione = Vector256.Create(1);

                    ipe -= Vector256 <float> .Count;
                    while (ip <= ipe)
                    {
                        var vf = Avx.Multiply(vlmax, Avx.LoadVector256(ip));
                        vf = Avx.Min(Avx.Max(vzero, vf), vlmax);

                        var vi = Avx.ConvertToVector256Int32WithTruncation(vf);
                        var vp = Avx.ConvertToVector256Single(vi);

                        var vl = Avx2.GatherVector256(lp, vi, sizeof(float));
                        var vh = Avx2.GatherVector256(lp, Avx2.Add(vi, vione), sizeof(float));

                        vf = HWIntrinsics.Lerp(vl, vh, Avx.Subtract(vf, vp));

                        Avx.Store(ip, vf);
                        ip += Vector256 <float> .Count;
                    }
                    ipe += Vector256 <float> .Count;

                    float fmin = vzero.ToScalar(), flmax = vlmax.ToScalar();
                    while (ip < ipe)
                    {
                        float f = (*ip * flmax).Clamp(fmin, flmax);
                        uint  i = (uint)f;

                        *ip++ = Lerp(lp[i], lp[i + 1], f - i);
                    }
                }
                else
#endif
                {
                    var vlmax = new Vector4(lutmax);
                    var vzero = Vector4.Zero;

                    ipe -= 4;
                    while (ip <= ipe)
                    {
                        var vf = (Unsafe.ReadUnaligned <Vector4>(ip) * vlmax).Clamp(vzero, vlmax);

                        float f0 = vf.X;
                        float f1 = vf.Y;
                        float f2 = vf.Z;
                        float f3 = vf.W;

                        uint i0 = (uint)f0;
                        uint i1 = (uint)f1;
                        uint i2 = (uint)f2;
                        uint i3 = (uint)f3;

                        ip[0] = Lerp(lp[i0], lp[i0 + 1], f0 - (int)i0);
                        ip[1] = Lerp(lp[i1], lp[i1 + 1], f1 - (int)i1);
                        ip[2] = Lerp(lp[i2], lp[i2 + 1], f2 - (int)i2);
                        ip[3] = Lerp(lp[i3], lp[i3 + 1], f3 - (int)i3);

                        ip += 4;
                    }
                    ipe += 4;

                    float fmin = vzero.X, flmax = vlmax.X;
                    while (ip < ipe)
                    {
                        float f = (*ip * flmax).Clamp(fmin, flmax);
                        uint  i = (uint)f;

                        *ip++ = Lerp(lp[i], lp[i + 1], f - i);
                    }
                }
            }
Exemplo n.º 8
0
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                byte * op = opstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vzero  = Vector256 <float> .Zero;
                    var vmin   = Vector256.Create(0.5f / byte.MaxValue);
                    var vscale = Vector256.Create((float)byte.MaxValue);

                    var vmaskp = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMaskDeinterleave8x32)));

                    ipe -= Vector256 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Avx.LoadVector256(ip);
                        var vf1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var vf2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        var vf3 = Avx.LoadVector256(ip + Vector256 <float> .Count * 3);
                        ip += Vector256 <byte> .Count;

                        var vfa0 = Avx.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Avx.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Avx.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Avx.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Avx.Max(vfa0, vmin);
                        vfa1 = Avx.Max(vfa1, vmin);
                        vfa2 = Avx.Max(vfa2, vmin);
                        vfa3 = Avx.Max(vfa3, vmin);

                        vf0 = Avx.Multiply(vf0, Avx.Reciprocal(vfa0));
                        vf1 = Avx.Multiply(vf1, Avx.Reciprocal(vfa1));
                        vf2 = Avx.Multiply(vf2, Avx.Reciprocal(vfa2));
                        vf3 = Avx.Multiply(vf3, Avx.Reciprocal(vfa3));

                        vf0 = Avx.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Avx.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Avx.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Avx.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Avx.BlendVariable(vf0, vzero, HWIntrinsics.AvxCompareEqual(vfa0, vmin));
                        vf1 = Avx.BlendVariable(vf1, vzero, HWIntrinsics.AvxCompareEqual(vfa1, vmin));
                        vf2 = Avx.BlendVariable(vf2, vzero, HWIntrinsics.AvxCompareEqual(vfa2, vmin));
                        vf3 = Avx.BlendVariable(vf3, vzero, HWIntrinsics.AvxCompareEqual(vfa3, vmin));

                        vf0 = Avx.Multiply(vf0, vscale);
                        vf1 = Avx.Multiply(vf1, vscale);
                        vf2 = Avx.Multiply(vf2, vscale);
                        vf3 = Avx.Multiply(vf3, vscale);

                        var vi0 = Avx.ConvertToVector256Int32(vf0);
                        var vi1 = Avx.ConvertToVector256Int32(vf1);
                        var vi2 = Avx.ConvertToVector256Int32(vf2);
                        var vi3 = Avx.ConvertToVector256Int32(vf3);

                        var vs0 = Avx2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Avx2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Avx2.PackUnsignedSaturate(vs0, vs1);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskp).AsByte();

                        Avx.Store(op, vb0);
                        op += Vector256 <byte> .Count;
                    }
                    ipe += Vector256 <byte> .Count;
                }
                else if (Sse41.IsSupported)
                {
                    var vzero  = Vector128 <float> .Zero;
                    var vmin   = Vector128.Create(0.5f / byte.MaxValue);
                    var vscale = Vector128.Create((float)byte.MaxValue);

                    ipe -= Vector128 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Sse.LoadVector128(ip);
                        var vf1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                        var vf2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                        var vf3 = Sse.LoadVector128(ip + Vector128 <float> .Count * 3);
                        ip += Vector128 <byte> .Count;

                        var vfa0 = Sse.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Sse.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Sse.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Sse.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Sse.Max(vfa0, vmin);
                        vfa1 = Sse.Max(vfa1, vmin);
                        vfa2 = Sse.Max(vfa2, vmin);
                        vfa3 = Sse.Max(vfa3, vmin);

                        vf0 = Sse.Multiply(vf0, Sse.Reciprocal(vfa0));
                        vf1 = Sse.Multiply(vf1, Sse.Reciprocal(vfa1));
                        vf2 = Sse.Multiply(vf2, Sse.Reciprocal(vfa2));
                        vf3 = Sse.Multiply(vf3, Sse.Reciprocal(vfa3));

                        vf0 = Sse41.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Sse41.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Sse41.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Sse41.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Sse41.BlendVariable(vf0, vzero, Sse.CompareEqual(vfa0, vmin));
                        vf1 = Sse41.BlendVariable(vf1, vzero, Sse.CompareEqual(vfa1, vmin));
                        vf2 = Sse41.BlendVariable(vf2, vzero, Sse.CompareEqual(vfa2, vmin));
                        vf3 = Sse41.BlendVariable(vf3, vzero, Sse.CompareEqual(vfa3, vmin));

                        vf0 = Sse.Multiply(vf0, vscale);
                        vf1 = Sse.Multiply(vf1, vscale);
                        vf2 = Sse.Multiply(vf2, vscale);
                        vf3 = Sse.Multiply(vf3, vscale);

                        var vi0 = Sse2.ConvertToVector128Int32(vf0);
                        var vi1 = Sse2.ConvertToVector128Int32(vf1);
                        var vi2 = Sse2.ConvertToVector128Int32(vf2);
                        var vi3 = Sse2.ConvertToVector128Int32(vf3);

                        var vs0 = Sse2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Sse2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Sse2.PackUnsignedSaturate(vs0, vs1);

                        Sse2.Store(op, vb0);
                        op += Vector128 <byte> .Count;
                    }
                    ipe += Vector128 <byte> .Count;
                }
#endif

                float fmax = new Vector4(byte.MaxValue).X, fround = new Vector4(0.5f).X, fmin = fround / fmax;

                while (ip < ipe)
                {
                    float f3 = ip[3];
                    if (f3 < fmin)
                    {
                        *(uint *)op = 0;
                    }
                    else
                    {
                        float f3i = fmax / f3;
                        byte  o0  = ClampToByte((int)(ip[0] * f3i + fround));
                        byte  o1  = ClampToByte((int)(ip[1] * f3i + fround));
                        byte  o2  = ClampToByte((int)(ip[2] * f3i + fround));
                        byte  o3  = ClampToByte((int)(f3 * fmax + fround));
                        op[0] = o0;
                        op[1] = o1;
                        op[2] = o2;
                        op[3] = o3;
                    }

                    ip += 4;
                    op += 4;
                }
            }
Exemplo n.º 9
0
        unsafe void IConvolver.SharpenLine(byte *cstart, byte *ystart, byte *bstart, byte *ostart, int ox, int ow, float amt, float thresh, bool gamma)
        {
            float *ip = (float *)cstart + (uint)ox * channels, yp = (float *)ystart + (uint)ox, bp = (float *)bstart, op = (float *)ostart;
            float *ipe = ip + (uint)ow * channels;

            bool threshold = thresh > 0f;

            if (Avx.IsSupported && ip <= ipe - VectorAvx.Count)
            {
                var vthresh = Vector256.Create(threshold ? thresh : -1f);
                var vmsk    = Vector256.Create(0x7fffffff).AsSingle();
                var vamt    = Vector256.Create(amt);
                var vmin    = VectorAvx.Zero;

                ipe -= VectorAvx.Count;
                do
                {
                    var vd = Avx.Subtract(Avx.LoadVector256(yp), Avx.LoadVector256(bp));
                    yp += VectorAvx.Count;
                    bp += VectorAvx.Count;

                    if (threshold)
                    {
                        var sm = HWIntrinsics.AvxCompareGreaterThan(Avx.And(vd, vmsk), vthresh);
                        vd = Avx.And(vd, sm);
                    }
                    vd = Avx.Multiply(vd, vamt);

                    var v0 = Avx.LoadVector256(ip);
                    ip += VectorAvx.Count;

                    if (gamma)
                    {
                        v0 = Avx.Max(v0, vmin);
                        v0 = Avx.Multiply(v0, Avx.ReciprocalSqrt(v0));
                        v0 = Avx.Add(v0, vd);
                        v0 = Avx.Max(v0, vmin);
                        v0 = Avx.Multiply(v0, v0);
                    }
                    else
                    {
                        v0 = Avx.Add(v0, vd);
                    }

                    Avx.Store(op, v0);
                    op += VectorAvx.Count;
                } while (ip <= ipe);
                ipe += VectorAvx.Count;
            }
            else if (ip <= ipe - VectorSse.Count)
            {
                var vthresh = Vector128.Create(threshold ? thresh : -1f);
                var vmsk    = Vector128.Create(0x7fffffff).AsSingle();
                var vamt    = Vector128.Create(amt);
                var vmin    = VectorSse.Zero;

                ipe -= VectorSse.Count;
                do
                {
                    var vd = Sse.Subtract(Sse.LoadVector128(yp), Sse.LoadVector128(bp));
                    yp += VectorSse.Count;
                    bp += VectorSse.Count;

                    if (threshold)
                    {
                        var sm = Sse.CompareGreaterThan(Sse.And(vd, vmsk), vthresh);
                        vd = Sse.And(vd, sm);
                    }
                    vd = Sse.Multiply(vd, vamt);

                    var v0 = Sse.LoadVector128(ip);
                    ip += VectorSse.Count;

                    if (gamma)
                    {
                        v0 = Sse.Max(v0, vmin);
                        v0 = Sse.Multiply(v0, Sse.ReciprocalSqrt(v0));
                        v0 = Sse.Add(v0, vd);
                        v0 = Sse.Max(v0, vmin);
                        v0 = Sse.Multiply(v0, v0);
                    }
                    else
                    {
                        v0 = Sse.Add(v0, vd);
                    }

                    Sse.Store(op, v0);
                    op += VectorSse.Count;
                } while (ip <= ipe);
                ipe += VectorSse.Count;
            }

            float fmin = VectorSse.Zero.ToScalar();

            while (ip < ipe)
            {
                float dif = *yp++ - *bp++;
                float c0  = *ip++;

                if (!threshold || Math.Abs(dif) > thresh)
                {
                    dif *= amt;

                    if (gamma)
                    {
                        c0  = MathUtil.MaxF(c0, fmin).Sqrt();
                        c0  = MathUtil.MaxF(c0 + dif, fmin);
                        c0 *= c0;
                    }
                    else
                    {
                        c0 += dif;
                    }
                }

                *op++ = c0;
            }
        }
Exemplo n.º 10
0
        public static unsafe float Max(this Matrix <float> matrix)
        {
            var i = 0;

            fixed(float *ptr = matrix.GetArray())
            {
                var span      = new Span <float>(ptr, matrix.Length);
                var maxScalar = span[0];

                if (Avx.IsSupported)
                {
                    var maxValues = stackalloc float[8]
                    {
                        span[0],
                        span[0],
                        span[0],
                        span[0],
                        span[0],
                        span[0],
                        span[0],
                        span[0]
                    };
                    var max = Avx.LoadVector256(maxValues);
                    while (i < span.Length - 8)
                    {
                        var vector256 = Avx.LoadVector256(ptr + i);
                        max = Avx.Max(vector256, max);
                        i  += 8;
                    }

                    maxScalar = max.MaxVector256(8);
                }
                else if (Sse.IsSupported)
                {
                    var maxValues = stackalloc float[4]
                    {
                        span[0],
                        span[0],
                        span[0],
                        span[0]
                    };
                    var max = Sse.LoadVector128(maxValues);
                    while (i < span.Length - 4)
                    {
                        var vector128 = Sse.LoadVector128(ptr + i);
                        max = Sse.Max(vector128, max);
                        i  += 4;
                    }

                    maxScalar = max.MaxVector128(4);
                }

                while (i < span.Length)
                {
                    if (maxScalar < span[i])
                    {
                        maxScalar = span[i];
                    }
                    i++;
                }

                return(maxScalar);
            }
        }