Beispiel #1
0
        public ushort8(ushort3 x012, ushort3 x345, ushort2 x67)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(ushort));
                v128 hi  = Sse2.bslli_si128(x67, 6 * sizeof(ushort));

                if (Sse4_1.IsSse41Supported)
                {
                    mid = Sse4_1.blend_epi16(x012, mid, 0b0011_1000);

                    this = Sse4_1.blend_epi16(mid, hi, 0b1100_0000);
                }
                else
                {
                    mid = Mask.BlendEpi16_SSE2(x012, mid, 0b0011_1000);

                    this = Mask.BlendEpi16_SSE2(mid, hi, 0b1100_0000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x345.x;
                this.x4 = x345.y;
                this.x5 = x345.z;
                this.x6 = x67.x;
                this.x7 = x67.y;
            }
        }
Beispiel #2
0
        internal static long2 shra_long(long2 x, int n)
        {
            v128 shiftLo;
            v128 shiftHi;

            if (n <= 32)
            {
                shiftHi = shra_int(x, n);
                shiftLo = shrl_long(x, n);
            }
            else
            {
                shiftHi = shra_int(x, 31);
                shiftLo = shra_int(x, n - 32);
                shiftLo = shrl_long(shiftLo, 32);
            }


            if (Sse4_1.IsSse41Supported)
            {
                return(Sse4_1.blend_epi16(shiftLo, shiftHi, 0b1100_1100));
            }
            else if (Sse2.IsSse2Supported)
            {
                return(Mask.BlendEpi16_SSE2(shiftLo, shiftHi, 0b1100_1100));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #3
0
        public ushort8(ushort2 x01, ushort3 x234, ushort3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(ushort));
                v128 hi  = Sse2.bslli_si128(x567, 5 * sizeof(ushort));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(mid, hi, 0b1110_0000);

                    this = Sse4_1.blend_epi16(x01, hi, 0b1111_1100);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1110_0000);

                    this = Mask.BlendEpi16_SSE2(x01, hi, 0b1111_1100);
                }
            }
            else
            {
                this.x0 = x01.x;
                this.x1 = x01.y;
                this.x2 = x234.x;
                this.x3 = x234.y;
                this.x4 = x234.z;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Beispiel #4
0
        public ushort8(ushort3 x012, ushort2 x34, ushort3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(short));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(x34, hi, 0b0001_1100);
                    hi = Sse2.bslli_si128(hi, 3 * sizeof(short));

                    this = Sse4_1.blend_epi16(x012, hi, 0b1111_1000);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0001_1100);
                    hi = Sse2.bslli_si128(hi, 3 * sizeof(short));

                    this = Mask.BlendEpi16_SSE2(x012, hi, 0b1111_1000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x34.x;
                this.x4 = x34.y;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Beispiel #5
0
        public byte8(byte3 x012, byte2 x34, byte3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(byte));
                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(x34, hi, 0b0110);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0110);
                }
                hi = Sse2.bslli_si128(hi, 3 * sizeof(byte));

                this = Mask.BlendV(x012, hi, new byte8(0, 0, 0, 255, 255, 255, 255, 255));
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x34.x;
                this.x4 = x34.y;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Beispiel #6
0
        public byte8(byte3 x012, byte3 x345, byte2 x67)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(byte));
                v128 hi  = Sse2.bslli_si128(x67, 6 * sizeof(byte));

                mid = Mask.BlendV(x012, mid, new byte8(0, 0, 0, 255, 255, 255, 0, 0));

                if (Sse4_1.IsSse41Supported)
                {
                    this = Sse4_1.blend_epi16(mid, hi, 0b1000);
                }
                else
                {
                    this = Mask.BlendEpi16_SSE2(mid, hi, 0b1000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x345.x;
                this.x4 = x345.y;
                this.x5 = x345.z;
                this.x6 = x67.x;
                this.x7 = x67.y;
            }
        }
Beispiel #7
0
        public byte8(byte2 x01, byte3 x234, byte3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(byte));
                v128 hi  = Sse2.bslli_si128(x567, 5 * sizeof(byte));

                hi = Mask.BlendV(mid, hi, new byte8(0, 0, 0, 0, 0, 255, 255, 255));

                if (Sse4_1.IsSse41Supported)
                {
                    this = Sse4_1.blend_epi16(x01, hi, 0b1110);
                }
                else
                {
                    this = Mask.BlendEpi16_SSE2(x01, hi, 0b1110);
                }
            }
            else
            {
                this.x0 = x01.x;
                this.x1 = x01.y;
                this.x2 = x234.x;
                this.x3 = x234.y;
                this.x4 = x234.z;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Beispiel #8
0
        public int8(int3 x012, int3 x345, int2 x67)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 lo;
                v128 mid = Sse2.bsrli_si128(*(v128 *)&x345, sizeof(int));
                v128 hi  = Sse2.unpacklo_epi64(mid, *(v128 *)&x67);

                mid = Sse2.bslli_si128(*(v128 *)&x345, 3 * sizeof(int));

                if (Sse4_1.IsSse41Supported)
                {
                    lo = Sse4_1.blend_epi16(*(v128 *)&x012, mid, 0b1100_0000);
                }
                else
                {
                    lo = Mask.BlendEpi16_SSE2(*(v128 *)&x012, mid, 0b1100_0000);
                }


                this = new int8(*(int4 *)&lo, *(int4 *)&hi);
            }
            else
            {
                this = new int8
                {
                    _v4_0 = new int4(x012, x345.x),
                    _v4_4 = new int4(x345.yz, x67)
                };
            }
        }
Beispiel #9
0
        public int8(int2 x01, int3 x234, int3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 lo  = Sse2.unpacklo_epi64(*(v128 *)&x01, *(v128 *)&x234);
                v128 mid = Sse2.bsrli_si128(*(v128 *)&x234, 2 * sizeof(int));
                v128 hi  = Sse2.bslli_si128(*(v128 *)&x567, sizeof(int));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(mid, hi, 0b1111_1100);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1111_1100);
                }


                this = new int8(*(int4 *)&lo, *(int4 *)&hi);
            }
            else
            {
                this = new int8
                {
                    _v4_0 = new int4(x01, x234.xy),
                    _v4_4 = new int4(x234.z, x567)
                };
            }
        }
Beispiel #10
0
 public static long2 subadd(long2 a, long2 b)
 {
     if (Sse2.IsSse2Supported)
     {
         if (Sse4_1.IsSse41Supported)
         {
             return(a + Sse4_1.blend_epi16(b, -b, 0b0000_1111));
         }
         else
         {
             return(a + Mask.BlendEpi16_SSE2(b, -b, 0b0000_1111));
         }
     }
     else
     {
         return(new long2(a.x - b.x, a.y + b.y));
     }
 }
Beispiel #11
0
 public static ulong2 addsub(ulong2 a, ulong2 b)
 {
     if (Sse2.IsSse2Supported)
     {
         if (Sse4_1.IsSse41Supported)
         {
             return(a + Sse4_1.blend_epi16(b, default(v128) - b, 0b1111_0000));
         }
         else
         {
             return(a + Mask.BlendEpi16_SSE2(b, default(v128) - b, 0b1111_0000));
         }
     }
     else
     {
         return(new ulong2(a.x + b.x, a.y - b.y));
     }
 }
Beispiel #12
0
        public int8(int3 x012, int2 x34, int3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 lo;
                v128 mid = Sse2.bslli_si128(*(v128 *)&x34, 3 * sizeof(int));

                if (Sse4_1.IsSse41Supported)
                {
                    lo = Sse4_1.blend_epi16(*(v128 *)&x012, mid, 0b1100_0000);
                }
                else
                {
                    lo = Mask.BlendEpi16_SSE2(*(v128 *)&x012, mid, 0b1100_0000);
                }

                mid = Sse2.bsrli_si128(*(v128 *)&x34, sizeof(int));

                v128 hi = Sse2.bslli_si128(*(v128 *)&x567, sizeof(int));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(mid, hi, 0b1111_1100);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1111_1100);
                }


                this = new int8(*(int4 *)&lo, *(int4 *)&hi);
            }
            else
            {
                this = new int8
                {
                    _v4_0 = new int4(x012, x34.x),
                    _v4_4 = new int4(x34.y, x567)
                };
            }
        }