Beispiel #1
0
            public static v128 cvtepu16_epi32(v128 a)
            {
                v128    dst  = default(v128);
                int *   dptr = &dst.SInt0;
                ushort *aptr = &a.UShort0;

                for (int j = 0; j <= 3; j++)
                {
                    dptr[j] = aptr[j];
                }
                return(dst);
            }
Beispiel #2
0
            public static v128 cvtepu32_epi64(v128 a)
            {
                v128  dst  = default(v128);
                long *dptr = &dst.SLong0;
                uint *aptr = &a.UInt0;

                for (int j = 0; j <= 1; j++)
                {
                    dptr[j] = aptr[j];
                }
                return(dst);
            }
Beispiel #3
0
            public static v128 cvtepu8_epi32(v128 a)
            {
                v128  dst  = default(v128);
                int * dptr = &dst.SInt0;
                byte *aptr = &a.Byte0;

                for (int j = 0; j <= 3; j++)
                {
                    dptr[j] = aptr[j];
                }
                return(dst);
            }
Beispiel #4
0
            public static v128 cvtepu8_epi64(v128 a)
            {
                v128  dst  = default(v128);
                long *dptr = &dst.SLong0;
                byte *aptr = &a.Byte0;

                for (int j = 0; j <= 1; j++)
                {
                    dptr[j] = aptr[j];
                }
                return(dst);
            }
Beispiel #5
0
            public static v128 cvtepu8_epi16(v128 a)
            {
                v128   dst  = default(v128);
                short *dptr = &dst.SShort0;
                byte * aptr = &a.Byte0;

                for (int j = 0; j <= 7; j++)
                {
                    dptr[j] = aptr[j];
                }
                return(dst);
            }
Beispiel #6
0
            public static v128 cvtepi16_epi64(v128 a)
            {
                v128   dst  = default(v128);
                long * dptr = &dst.SLong0;
                short *aptr = &a.SShort0;

                for (int j = 0; j <= 1; j++)
                {
                    dptr[j] = aptr[j];
                }
                return(dst);
            }
Beispiel #7
0
            public static v128 abs_epi32(v128 a)
            {
                v128  dst  = default(v128);
                uint *dptr = &dst.UInt0;
                int * aptr = &a.SInt0;

                for (int j = 0; j <= 3; j++)
                {
                    dptr[j] = (uint)Math.Abs((long)aptr[j]);
                }
                return(dst);
            }
Beispiel #8
0
            public static v128 abs_epi16(v128 a)
            {
                v128    dst  = default(v128);
                ushort *dptr = &dst.UShort0;
                short * aptr = &a.SShort0;

                for (int j = 0; j <= 7; j++)
                {
                    dptr[j] = (ushort)Math.Abs((int)aptr[j]);
                }
                return(dst);
            }
Beispiel #9
0
            public static v128 abs_epi8(v128 a)
            {
                v128   dst  = default(v128);
                byte * dptr = &dst.Byte0;
                sbyte *aptr = &a.SByte0;

                for (int j = 0; j <= 15; j++)
                {
                    dptr[j] = (byte)Math.Abs((int)aptr[j]);
                }
                return(dst);
            }
            public static v128 min_epu32(v128 a, v128 b)
            {
                v128  dst  = default(v128);
                uint *dptr = &dst.UInt0;
                uint *aptr = &a.UInt0;
                uint *bptr = &b.UInt0;

                for (int j = 0; j <= 3; j++)
                {
                    dptr[j] = Math.Min(aptr[j], bptr[j]);
                }
                return(dst);
            }
            public static v128 min_epi8(v128 a, v128 b)
            {
                v128   dst  = default(v128);
                sbyte *dptr = &dst.SByte0;
                sbyte *aptr = &a.SByte0;
                sbyte *bptr = &b.SByte0;

                for (int j = 0; j <= 15; j++)
                {
                    dptr[j] = Math.Min(aptr[j], bptr[j]);
                }
                return(dst);
            }
            public static v128 mullo_epi32(v128 a, v128 b)
            {
                v128 dst  = default(v128);
                int *dptr = &dst.SInt0;
                int *aptr = &a.SInt0;
                int *bptr = &b.SInt0;

                for (int j = 0; j <= 3; j++)
                {
                    dptr[j] = aptr[j] * bptr[j];
                }
                return(dst);
            }
            public static v128 max_epi32(v128 a, v128 b)
            {
                v128 dst  = default(v128);
                int *dptr = &dst.SInt0;
                int *aptr = &a.SInt0;
                int *bptr = &b.SInt0;

                for (int j = 0; j <= 3; j++)
                {
                    dptr[j] = Math.Max(aptr[j], bptr[j]);
                }
                return(dst);
            }
            public static v128 dp_pd(v128 a, v128 b, int imm8)
            {
                double t0  = (imm8 & 0x10) != 0 ? a.Double0 * b.Double0 : 0.0;
                double t1  = (imm8 & 0x20) != 0 ? a.Double1 * b.Double1 : 0.0;
                double sum = t0 + t1;

                v128 dst = default(v128);

                dst.Double0 = (imm8 & 1) != 0 ? sum : 0.0;
                dst.Double1 = (imm8 & 2) != 0 ? sum : 0.0;

                return(dst);
            }
            public static v128 min_epu16(v128 a, v128 b)
            {
                v128    dst  = default(v128);
                ushort *dptr = &dst.UShort0;
                ushort *aptr = &a.UShort0;
                ushort *bptr = &b.UShort0;

                for (int j = 0; j <= 7; j++)
                {
                    dptr[j] = Math.Min(aptr[j], bptr[j]);
                }
                return(dst);
            }
            public static v128 hsubs_epi16(v128 a, v128 b)
            {
                v128   dst  = default(v128);
                short *dptr = &dst.SShort0;
                short *aptr = &a.SShort0;
                short *bptr = &b.SShort0;

                for (int j = 0; j <= 3; ++j)
                {
                    dptr[j]     = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]);
                    dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]);
                }
                return(dst);
            }
            public static v128 packus_epi32(v128 a, v128 b)
            {
                v128 dst = default(v128);

                dst.UShort0 = Saturate_To_UnsignedInt16(a.SInt0);
                dst.UShort1 = Saturate_To_UnsignedInt16(a.SInt1);
                dst.UShort2 = Saturate_To_UnsignedInt16(a.SInt2);
                dst.UShort3 = Saturate_To_UnsignedInt16(a.SInt3);
                dst.UShort4 = Saturate_To_UnsignedInt16(b.SInt0);
                dst.UShort5 = Saturate_To_UnsignedInt16(b.SInt1);
                dst.UShort6 = Saturate_To_UnsignedInt16(b.SInt2);
                dst.UShort7 = Saturate_To_UnsignedInt16(b.SInt3);
                return(dst);
            }
            public static v128 insert_ps(v128 a, v128 b, int imm8)
            {
                v128 dst = a;

                (&dst.Float0)[(imm8 >> 4) & 3] = (&b.Float0)[(imm8 >> 6) & 3];
                for (int i = 0; i < 4; ++i)
                {
                    if (0 != (imm8 & (1 << i)))
                    {
                        (&dst.Float0)[i] = 0.0f;
                    }
                }
                return(dst);
            }
Beispiel #19
0
            public static void TRANSPOSE4_PS(ref v128 row0, ref v128 row1, ref v128 row2, ref v128 row3)
            {
                v128 _Tmp3, _Tmp2, _Tmp1, _Tmp0;

                _Tmp0 = shuffle_ps((row0), (row1), 0x44);
                _Tmp2 = shuffle_ps((row0), (row1), 0xEE);
                _Tmp1 = shuffle_ps((row2), (row3), 0x44);
                _Tmp3 = shuffle_ps((row2), (row3), 0xEE);

                row0 = shuffle_ps(_Tmp0, _Tmp1, 0x88);
                row1 = shuffle_ps(_Tmp0, _Tmp1, 0xDD);
                row2 = shuffle_ps(_Tmp2, _Tmp3, 0x88);
                row3 = shuffle_ps(_Tmp2, _Tmp3, 0xDD);
            }
            public static v128 maddubs_epi16(v128 a, v128 b)
            {
                v128   dst  = default(v128);
                short *dptr = &dst.SShort0;
                byte * aptr = &a.Byte0;
                sbyte *bptr = &b.SByte0;

                for (int j = 0; j <= 7; j++)
                {
                    int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j];
                    dptr[j] = Saturate_To_Int16(tmp);
                }
                return(dst);
            }
            public static v128 hadd_epi16(v128 a, v128 b)
            {
                v128   dst  = default(v128);
                short *dptr = &dst.SShort0;
                short *aptr = &a.SShort0;
                short *bptr = &b.SShort0;

                for (int j = 0; j <= 3; ++j)
                {
                    dptr[j]     = (short)(aptr[2 * j + 1] + aptr[2 * j]);
                    dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]);
                }
                return(dst);
            }
            public static v128 dp_ps(v128 a, v128 b, int imm8)
            {
                float t0  = (imm8 & 0x10) != 0 ? a.Float0 * b.Float0 : 0.0f;
                float t1  = (imm8 & 0x20) != 0 ? a.Float1 * b.Float1 : 0.0f;
                float t2  = (imm8 & 0x40) != 0 ? a.Float2 * b.Float2 : 0.0f;
                float t3  = (imm8 & 0x80) != 0 ? a.Float3 * b.Float3 : 0.0f;
                float sum = t0 + t1 + t2 + t3;

                v128 dst = default(v128);

                dst.Float0 = (imm8 & 1) != 0 ? sum : 0.0f;
                dst.Float1 = (imm8 & 2) != 0 ? sum : 0.0f;
                dst.Float2 = (imm8 & 4) != 0 ? sum : 0.0f;
                dst.Float3 = (imm8 & 8) != 0 ? sum : 0.0f;

                return(dst);
            }
            public static v128 mulhrs_epi16(v128 a, v128 b)
            {
                v128   dst  = default(v128);
                short *dptr = &dst.SShort0;
                short *aptr = &a.SShort0;
                short *bptr = &b.SShort0;

                for (int j = 0; j <= 7; j++)
                {
                    int tmp = aptr[j] * bptr[j];
                    tmp   >>= 14;
                    tmp    += 1;
                    tmp   >>= 1;
                    dptr[j] = (short)tmp;
                }
                return(dst);
            }
            public static v128 mpsadbw_epu8(v128 a, v128 b, int imm8)
            {
                v128    dst  = default(v128);
                ushort *dptr = &dst.UShort0;
                byte *  aptr = &a.Byte0 + ((imm8 >> 2) & 1) * 4;
                byte *  bptr = &b.Byte0 + (imm8 & 3) * 4;

                byte b0 = bptr[0];
                byte b1 = bptr[1];
                byte b2 = bptr[2];
                byte b3 = bptr[3];

                for (int j = 0; j <= 7; j++)
                {
                    dptr[j] = (ushort)(Math.Abs(aptr[j + 0] - b0) + Math.Abs(aptr[j + 1] - b1) + Math.Abs(aptr[j + 2] - b2) + Math.Abs(aptr[j + 3] - b3));
                }
                return(dst);
            }
            public static v128 shuffle_epi8(v128 a, v128 b)
            {
                v128  dst  = default(v128);
                byte *dptr = &dst.Byte0;
                byte *aptr = &a.Byte0;
                byte *bptr = &b.Byte0;

                for (int j = 0; j <= 15; j++)
                {
                    if ((bptr[j] & 0x80) != 0)
                    {
                        dptr[j] = 0x00;
                    }
                    else
                    {
                        dptr[j] = aptr[bptr[j] & 15];
                    }
                }
                return(dst);
            }
            public static v128 blend_epi16(v128 a, v128 b, int imm8)
            {
                int    j;
                v128   dst  = default(v128);
                short *dptr = &dst.SShort0;
                short *aptr = &a.SShort0;
                short *bptr = &b.SShort0;

                for (j = 0; j <= 7; j++)
                {
                    if (0 != ((imm8 >> j) & 1))
                    {
                        dptr[j] = bptr[j];
                    }
                    else
                    {
                        dptr[j] = aptr[j];
                    }
                }
                return(dst);
            }
            public static v128 minpos_epu16(v128 a)
            {
                int     index = 0;
                ushort  min   = a.UShort0;
                ushort *aptr  = &a.UShort0;

                for (int j = 1; j <= 7; j++)
                {
                    if (aptr[j] < min)
                    {
                        index = j;
                        min   = aptr[j];
                    }
                }

                v128 dst = default(v128);

                dst.UShort0 = min;
                dst.UShort1 = (ushort)index;
                return(dst);
            }
            public static v128 blend_ps(v128 a, v128 b, int imm8)
            {
                int    j;
                v128   dst  = default(v128);
                float *dptr = &dst.Float0;
                float *aptr = &a.Float0;
                float *bptr = &b.Float0;

                for (j = 0; j <= 3; j++)
                {
                    if (0 != (imm8 & (1 << j)))
                    {
                        dptr[j] = bptr[j];
                    }
                    else
                    {
                        dptr[j] = aptr[j];
                    }
                }
                return(dst);
            }
            public static v128 blend_pd(v128 a, v128 b, int imm8)
            {
                int     j;
                v128    dst  = default(v128);
                double *dptr = &dst.Double0;
                double *aptr = &a.Double0;
                double *bptr = &b.Double0;

                for (j = 0; j <= 1; j++)
                {
                    if (0 != (imm8 & (1 << j)))
                    {
                        dptr[j] = bptr[j];
                    }
                    else
                    {
                        dptr[j] = aptr[j];
                    }
                }
                return(dst);
            }
            public static v128 blendv_pd(v128 a, v128 b, v128 mask)
            {
                int     j;
                v128    dst  = default(v128);
                double *dptr = &dst.Double0;
                double *aptr = &a.Double0;
                double *bptr = &b.Double0;
                long *  mptr = &mask.SLong0;

                for (j = 0; j <= 1; j++)
                {
                    if (mptr[j] < 0)
                    {
                        dptr[j] = bptr[j];
                    }
                    else
                    {
                        dptr[j] = aptr[j];
                    }
                }
                return(dst);
            }