public ushort8(ushort3 x012, ushort3 x345, ushort2 x67) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(ushort)); v128 hi = Sse2.bslli_si128(x67, 6 * sizeof(ushort)); if (Sse4_1.IsSse41Supported) { mid = Sse4_1.blend_epi16(x012, mid, 0b0011_1000); this = Sse4_1.blend_epi16(mid, hi, 0b1100_0000); } else { mid = Mask.BlendEpi16_SSE2(x012, mid, 0b0011_1000); this = Mask.BlendEpi16_SSE2(mid, hi, 0b1100_0000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x345.x; this.x4 = x345.y; this.x5 = x345.z; this.x6 = x67.x; this.x7 = x67.y; } }
internal static long2 shra_long(long2 x, int n) { v128 shiftLo; v128 shiftHi; if (n <= 32) { shiftHi = shra_int(x, n); shiftLo = shrl_long(x, n); } else { shiftHi = shra_int(x, 31); shiftLo = shra_int(x, n - 32); shiftLo = shrl_long(shiftLo, 32); } if (Sse4_1.IsSse41Supported) { return(Sse4_1.blend_epi16(shiftLo, shiftHi, 0b1100_1100)); } else if (Sse2.IsSse2Supported) { return(Mask.BlendEpi16_SSE2(shiftLo, shiftHi, 0b1100_1100)); } else { throw new CPUFeatureCheckException(); } }
public ushort8(ushort2 x01, ushort3 x234, ushort3 x567) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(ushort)); v128 hi = Sse2.bslli_si128(x567, 5 * sizeof(ushort)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(mid, hi, 0b1110_0000); this = Sse4_1.blend_epi16(x01, hi, 0b1111_1100); } else { hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1110_0000); this = Mask.BlendEpi16_SSE2(x01, hi, 0b1111_1100); } } else { this.x0 = x01.x; this.x1 = x01.y; this.x2 = x234.x; this.x3 = x234.y; this.x4 = x234.z; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public ushort8(ushort3 x012, ushort2 x34, ushort3 x567) { if (Sse2.IsSse2Supported) { v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(short)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(x34, hi, 0b0001_1100); hi = Sse2.bslli_si128(hi, 3 * sizeof(short)); this = Sse4_1.blend_epi16(x012, hi, 0b1111_1000); } else { hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0001_1100); hi = Sse2.bslli_si128(hi, 3 * sizeof(short)); this = Mask.BlendEpi16_SSE2(x012, hi, 0b1111_1000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x34.x; this.x4 = x34.y; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public byte8(byte3 x012, byte2 x34, byte3 x567) { if (Sse2.IsSse2Supported) { v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(byte)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(x34, hi, 0b0110); } else { hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0110); } hi = Sse2.bslli_si128(hi, 3 * sizeof(byte)); this = Mask.BlendV(x012, hi, new byte8(0, 0, 0, 255, 255, 255, 255, 255)); } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x34.x; this.x4 = x34.y; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public byte8(byte3 x012, byte3 x345, byte2 x67) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(byte)); v128 hi = Sse2.bslli_si128(x67, 6 * sizeof(byte)); mid = Mask.BlendV(x012, mid, new byte8(0, 0, 0, 255, 255, 255, 0, 0)); if (Sse4_1.IsSse41Supported) { this = Sse4_1.blend_epi16(mid, hi, 0b1000); } else { this = Mask.BlendEpi16_SSE2(mid, hi, 0b1000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x345.x; this.x4 = x345.y; this.x5 = x345.z; this.x6 = x67.x; this.x7 = x67.y; } }
public byte8(byte2 x01, byte3 x234, byte3 x567) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(byte)); v128 hi = Sse2.bslli_si128(x567, 5 * sizeof(byte)); hi = Mask.BlendV(mid, hi, new byte8(0, 0, 0, 0, 0, 255, 255, 255)); if (Sse4_1.IsSse41Supported) { this = Sse4_1.blend_epi16(x01, hi, 0b1110); } else { this = Mask.BlendEpi16_SSE2(x01, hi, 0b1110); } } else { this.x0 = x01.x; this.x1 = x01.y; this.x2 = x234.x; this.x3 = x234.y; this.x4 = x234.z; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public int8(int3 x012, int3 x345, int2 x67) { if (Sse2.IsSse2Supported) { v128 lo; v128 mid = Sse2.bsrli_si128(*(v128 *)&x345, sizeof(int)); v128 hi = Sse2.unpacklo_epi64(mid, *(v128 *)&x67); mid = Sse2.bslli_si128(*(v128 *)&x345, 3 * sizeof(int)); if (Sse4_1.IsSse41Supported) { lo = Sse4_1.blend_epi16(*(v128 *)&x012, mid, 0b1100_0000); } else { lo = Mask.BlendEpi16_SSE2(*(v128 *)&x012, mid, 0b1100_0000); } this = new int8(*(int4 *)&lo, *(int4 *)&hi); } else { this = new int8 { _v4_0 = new int4(x012, x345.x), _v4_4 = new int4(x345.yz, x67) }; } }
public int8(int2 x01, int3 x234, int3 x567) { if (Sse2.IsSse2Supported) { v128 lo = Sse2.unpacklo_epi64(*(v128 *)&x01, *(v128 *)&x234); v128 mid = Sse2.bsrli_si128(*(v128 *)&x234, 2 * sizeof(int)); v128 hi = Sse2.bslli_si128(*(v128 *)&x567, sizeof(int)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(mid, hi, 0b1111_1100); } else { hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1111_1100); } this = new int8(*(int4 *)&lo, *(int4 *)&hi); } else { this = new int8 { _v4_0 = new int4(x01, x234.xy), _v4_4 = new int4(x234.z, x567) }; } }
public static long2 subadd(long2 a, long2 b) { if (Sse2.IsSse2Supported) { if (Sse4_1.IsSse41Supported) { return(a + Sse4_1.blend_epi16(b, -b, 0b0000_1111)); } else { return(a + Mask.BlendEpi16_SSE2(b, -b, 0b0000_1111)); } } else { return(new long2(a.x - b.x, a.y + b.y)); } }
public static ulong2 addsub(ulong2 a, ulong2 b) { if (Sse2.IsSse2Supported) { if (Sse4_1.IsSse41Supported) { return(a + Sse4_1.blend_epi16(b, default(v128) - b, 0b1111_0000)); } else { return(a + Mask.BlendEpi16_SSE2(b, default(v128) - b, 0b1111_0000)); } } else { return(new ulong2(a.x + b.x, a.y - b.y)); } }
public int8(int3 x012, int2 x34, int3 x567) { if (Sse2.IsSse2Supported) { v128 lo; v128 mid = Sse2.bslli_si128(*(v128 *)&x34, 3 * sizeof(int)); if (Sse4_1.IsSse41Supported) { lo = Sse4_1.blend_epi16(*(v128 *)&x012, mid, 0b1100_0000); } else { lo = Mask.BlendEpi16_SSE2(*(v128 *)&x012, mid, 0b1100_0000); } mid = Sse2.bsrli_si128(*(v128 *)&x34, sizeof(int)); v128 hi = Sse2.bslli_si128(*(v128 *)&x567, sizeof(int)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(mid, hi, 0b1111_1100); } else { hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1111_1100); } this = new int8(*(int4 *)&lo, *(int4 *)&hi); } else { this = new int8 { _v4_0 = new int4(x012, x34.x), _v4_4 = new int4(x34.y, x567) }; } }