Exemplo n.º 1
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Sse3.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[4] {
                    1, -5, 100, 0
                }, new float[4]))
                {
                    var vf1 = Sse.LoadVector128((float *)(floatTable.inArrayPtr));
                    var vf2 = Sse3.MoveHighAndDuplicate(vf1);
                    Unsafe.Write(floatTable.outArrayPtr, vf2);

                    if (BitConverter.SingleToInt32Bits(floatTable.inArray[1]) != BitConverter.SingleToInt32Bits(floatTable.outArray[0]) ||
                        BitConverter.SingleToInt32Bits(floatTable.inArray[1]) != BitConverter.SingleToInt32Bits(floatTable.outArray[1]) ||
                        BitConverter.SingleToInt32Bits(floatTable.inArray[3]) != BitConverter.SingleToInt32Bits(floatTable.outArray[2]) ||
                        BitConverter.SingleToInt32Bits(floatTable.inArray[3]) != BitConverter.SingleToInt32Bits(floatTable.outArray[3]))
                    {
                        Console.WriteLine("Sse3 MoveHighAndDuplicate failed on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }
                }
            }

            return(testResult);
        }
Exemplo n.º 2
0
    static int Main()
    {
        s_success = true;

        // We expect the AOT compiler generated HW intrinsics with the following characteristics:
        //
        // * TRUE = IsSupported assumed to be true, no runtime check
        // * NULL = IsSupported is a runtime check, code should be behind the check or bad things happen
        // * FALSE = IsSupported assumed to be false, no runtime check, PlatformNotSupportedException if used
        //
        // The test is compiled with multiple defines to test this.

#if BASELINE_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 16;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = null;
        bool?AesLzPcl           = null;
        bool?Sse4142            = null;
        bool?PopCnt             = null;
        bool?Avx12    = false;
        bool?FmaBmi12 = false;
        bool?Avxvnni  = false;
#elif NON_VEX_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 16;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = true;
        bool?AesLzPcl           = null;
        bool?Sse4142            = true;
        bool?PopCnt             = null;
        bool?Avx12    = false;
        bool?FmaBmi12 = false;
        bool?Avxvnni  = false;
#elif VEX_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 32;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = true;
        bool?AesLzPcl           = null;
        bool?Sse4142            = true;
        bool?PopCnt             = null;
        bool?Avx12    = true;
        bool?FmaBmi12 = null;
        bool?Avxvnni  = null;
#else
#error Who dis?
#endif

        if (vectorsAccelerated != Vector.IsHardwareAccelerated)
        {
            throw new Exception($"Vectors HW acceleration state unexpected - expected {vectorsAccelerated}, got {Vector.IsHardwareAccelerated}");
        }

        if (byteVectorLength != Vector <byte> .Count)
        {
            throw new Exception($"Unexpected vector length - expected {byteVectorLength}, got {Vector<byte>.Count}");
        }

        Check("Sse", Sse2AndBelow, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Sse.X64", Sse2AndBelow, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128 <float> .Zero) == 0);

        Check("Sse2", Sse2AndBelow, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128 <ushort> .Zero, 0) == 0);
        Check("Sse2.X64", Sse2AndBelow, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128 <double> .Zero) == 0);

        Check("Sse3", Sse3Group, &Sse3IsSupported, Sse3.IsSupported, () => Sse3.MoveHighAndDuplicate(Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Sse3.X64", Sse3Group, &Sse3X64IsSupported, Sse3.X64.IsSupported, null);

        Check("Ssse3", Sse3Group, &Ssse3IsSupported, Ssse3.IsSupported, () => Ssse3.Abs(Vector128 <short> .Zero).Equals(Vector128 <ushort> .Zero));
        Check("Ssse3.X64", Sse3Group, &Ssse3X64IsSupported, Ssse3.X64.IsSupported, null);

        Check("Sse41", Sse4142, &Sse41IsSupported, Sse41.IsSupported, () => Sse41.Max(Vector128 <int> .Zero, Vector128 <int> .Zero).Equals(Vector128 <int> .Zero));
        Check("Sse41.X64", Sse4142, &Sse41X64IsSupported, Sse41.X64.IsSupported, () => Sse41.X64.Extract(Vector128 <long> .Zero, 0) == 0);

        Check("Sse42", Sse4142, &Sse42IsSupported, Sse42.IsSupported, () => Sse42.Crc32(0, 0) == 0);
        Check("Sse42.X64", Sse4142, &Sse42X64IsSupported, Sse42.X64.IsSupported, () => Sse42.X64.Crc32(0, 0) == 0);

        Check("Aes", AesLzPcl, &AesIsSupported, Aes.IsSupported, () => Aes.KeygenAssist(Vector128 <byte> .Zero, 0).Equals(Vector128.Create((byte)99)));
        Check("Aes.X64", AesLzPcl, &AesX64IsSupported, Aes.X64.IsSupported, null);

        Check("Avx", Avx12, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256 <double> .Zero, Vector256 <double> .Zero).Equals(Vector256 <double> .Zero));
        Check("Avx.X64", Avx12, &AvxX64IsSupported, Avx.X64.IsSupported, null);

        Check("Avx2", Avx12, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256 <int> .Zero).Equals(Vector256 <uint> .Zero));
        Check("Avx2.X64", Avx12, &Avx2X64IsSupported, Avx2.X64.IsSupported, null);

        Check("Bmi1", FmaBmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0);
        Check("Bmi1.X64", FmaBmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0);

        Check("Bmi2", FmaBmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0);
        Check("Bmi2.X64", FmaBmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0);

        Check("Fma", FmaBmi12, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128 <float> .Zero, Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Fma.X64", FmaBmi12, &FmaX64IsSupported, Fma.X64.IsSupported, null);

        Check("Lzcnt", AesLzPcl, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32);
        Check("Lzcnt.X64", AesLzPcl, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64);

        Check("Pclmulqdq", AesLzPcl, &PclmulqdqIsSupported, Pclmulqdq.IsSupported, () => Pclmulqdq.CarrylessMultiply(Vector128 <long> .Zero, Vector128 <long> .Zero, 0).Equals(Vector128 <long> .Zero));
        Check("Pclmulqdq.X64", AesLzPcl, &PclmulqdqX64IsSupported, Pclmulqdq.X64.IsSupported, null);

        Check("Popcnt", PopCnt, &PopcntIsSupported, Popcnt.IsSupported, () => Popcnt.PopCount(0) == 0);
        Check("Popcnt.X64", PopCnt, &PopcntX64IsSupported, Popcnt.X64.IsSupported, () => Popcnt.X64.PopCount(0) == 0);

        Check("AvxVnni", Avxvnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128 <int> .Zero, Vector128 <byte> .Zero, Vector128 <sbyte> .Zero).Equals(Vector128 <int> .Zero));
        Check("AvxVnni.X64", Avxvnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null);

        return(s_success ? 100 : 1);
    }
Exemplo n.º 3
0
 public static Vector128 <float> _mm_movehdup_ps(Vector128 <float> source)
 {
     return(Sse3.MoveHighAndDuplicate(source));
 }
Exemplo n.º 4
0
 public static __m128 _mm_movehdup_ps(__m128 a) => Sse3.MoveHighAndDuplicate(a);
Exemplo n.º 5
0
        unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy)
        {
            float *tp = (float *)tstart, tpe = (float *)(tstart + cb);
            float *pmapx   = (float *)mapxstart;
            int    kstride = smapx * channels;
            int    tstride = smapy * 4;
            int    vcnt    = smapx / Vector128 <float> .Count;

            while (tp < tpe)
            {
                int ix   = *(int *)pmapx++;
                int lcnt = vcnt;

                float *ip = (float *)istart + ix * channels;
                float *mp = pmapx;
                pmapx += kstride;

                Vector128 <float> av0, av1, av2;

                if (Avx.IsSupported && lcnt >= 2)
                {
                    Vector256 <float> ax0 = Vector256 <float> .Zero, ax1 = ax0, ax2 = ax0;

                    for (; lcnt >= 2; lcnt -= 2)
                    {
                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var iv2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        ip += Vector256 <int> .Count * channels;

                        if (Fma.IsSupported)
                        {
                            ax0 = Fma.MultiplyAdd(Avx.LoadVector256(mp), iv0, ax0);
                            ax1 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count), iv1, ax1);
                            ax2 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count * 2), iv2, ax2);
                        }
                        else
                        {
                            ax0 = Avx.Add(ax0, Avx.Multiply(iv0, Avx.LoadVector256(mp)));
                            ax1 = Avx.Add(ax1, Avx.Multiply(iv1, Avx.LoadVector256(mp + Vector256 <float> .Count)));
                            ax2 = Avx.Add(ax2, Avx.Multiply(iv2, Avx.LoadVector256(mp + Vector256 <float> .Count * 2)));
                        }
                        mp += Vector256 <float> .Count * channels;
                    }

                    av0 = Sse.Add(ax0.GetLower(), ax1.GetUpper());
                    av1 = Sse.Add(ax0.GetUpper(), ax2.GetLower());
                    av2 = Sse.Add(ax1.GetLower(), ax2.GetUpper());
                }
                else
                {
                    av0 = av1 = av2 = Vector128 <float> .Zero;
                }

                for (; lcnt != 0; lcnt--)
                {
                    var iv0 = Sse.LoadVector128(ip);
                    var iv1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                    var iv2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                    ip += Vector128 <float> .Count * channels;

                    if (Fma.IsSupported)
                    {
                        av0 = Fma.MultiplyAdd(Sse.LoadVector128(mp), iv0, av0);
                        av1 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count), iv1, av1);
                        av2 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count * 2), iv2, av2);
                    }
                    else
                    {
                        av0 = Sse.Add(av0, Sse.Multiply(iv0, Sse.LoadVector128(mp)));
                        av1 = Sse.Add(av1, Sse.Multiply(iv1, Sse.LoadVector128(mp + Vector128 <float> .Count)));
                        av2 = Sse.Add(av2, Sse.Multiply(iv2, Sse.LoadVector128(mp + Vector128 <float> .Count * 2)));
                    }
                    mp += Vector128 <float> .Count * channels;
                }

                var avs0 = Sse.Add(Sse.Add(
                                       Sse.Shuffle(av0, av0, 0b_00_10_01_11),
                                       Sse.Shuffle(av1, av1, 0b_00_01_11_10)),
                                   Sse.Shuffle(av2, av2, 0b_00_11_10_01)
                                   );
                var avs1 = Sse3.IsSupported ?
                           Sse3.MoveHighAndDuplicate(avs0) :
                           Sse.Shuffle(avs0, avs0, 0b_11_11_01_01);
                var avs2 = Sse.UnpackHigh(avs0, avs0);

                tp[0] = Sse.AddScalar(av0, avs0).ToScalar();
                tp[1] = Sse.AddScalar(av1, avs1).ToScalar();
                tp[2] = Sse.AddScalar(av2, avs2).ToScalar();
                tp   += tstride;
            }
        }