public static int csum(sbyte32 x) { if (Avx2.IsAvx2Supported) { short16 cast = (short16)x.v16_0 + (short16)x.v16_16; short8 more = cast.v8_0 + cast.v8_8; more += Sse2.unpackhi_epi64(more, more); more += Sse2.shufflelo_epi16(more, Sse.SHUFFLE(0, 1, 2, 3)); return(Sse2.add_epi16(more, Sse2.shufflelo_epi16(more, Sse.SHUFFLE(0, 0, 0, 1))).SShort0); } else if (Sse2.IsSse2Supported) { short8 cast = ((short8)x.v8_0 + (short8)x.v8_8) + ((short8)x.v8_16 + (short8)x.v8_24); cast += Sse2.unpackhi_epi64(cast, cast); cast += Sse2.shufflelo_epi16(cast, Sse.SHUFFLE(0, 1, 2, 3)); return(Sse2.add_epi16(cast, Sse2.shufflelo_epi16(cast, Sse.SHUFFLE(0, 0, 0, 1))).SShort0); } else { return(((((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7))) + (((x.x8 + x.x9) + (x.x10 + x.x11)) + ((x.x12 + x.x13) + (x.x14 + x.x15)))) + ((((x.x16 + x.x17) + (x.x18 + x.x19)) + ((x.x20 + x.x21) + (x.x22 + x.x23))) + (((x.x24 + x.x25) + (x.x26 + x.x27)) + ((x.x28 + x.x29) + (x.x30 + x.x31))))); } }
public static short2x4 operator /(short2x4 left, short right) { if (Avx2.IsAvx2Supported) { if (!Constant.IsConstantExpression(right)) { short8 div = new short8(left.c0, left.c1, left.c2, left.c3) / right; return(new short2x4(div.v2_0, div.v2_2, div.v2_4, div.v2_6)); } } else if (Sse2.IsSse2Supported) { if (!Constant.IsConstantExpression(right)) { short4 divisor = right; short4 lo = new short4(left.c0, left.c1) / divisor; short4 hi = new short4(left.c2, left.c3) / divisor; return(new short2x4(lo.xy, lo.zw, hi.xy, hi.zw)); } } return(new short2x4(left.c0 / right, left.c1 / right, left.c2 / right, left.c3 / right)); }
public static short2x4 operator %(short2x4 left, short right) { if (Avx2.IsAvx2Supported) { if (!Constant.IsConstantExpression(right)) { short8 rem = new short8(left.c0, left.c1, left.c2, left.c3) % right; return(new short2x4(rem.v2_0, rem.v2_2, rem.v2_4, rem.v2_6)); } } else if (Sse2.IsSse2Supported) { if (!Constant.IsConstantExpression(right)) { short4 divisor = right; short4 lo = new short4(left.c0, left.c1) % divisor; short4 hi = new short4(left.c2, left.c3) % divisor; return(new short2x4(lo.xy, lo.zw, hi.xy, hi.zw)); } } return(new short2x4(left.c0 % right, left.c1 % right, left.c2 % right, left.c3 % right)); }
public static ushort8 lcm(short8 x, short8 y) { ushort8 absX = (ushort8)abs(x); ushort8 absY = (ushort8)abs(y); return((absX / gcd(absX, absY)) * absY); }
public static short3x4 operator %(short3x4 left, short3x4 right) { if (Avx2.IsAvx2Supported) { short8 dividend_lo = Sse2.unpacklo_epi64(left.c0, left.c1); short8 dividend_hi = Sse2.unpacklo_epi64(left.c2, left.c3); short8 divisor_lo = Sse2.unpacklo_epi64(right.c0, right.c1); short8 divisor_hi = Sse2.unpacklo_epi64(right.c2, right.c3); #if DEBUG divisor_lo.x3 = 1; divisor_lo.x7 = 1; divisor_hi.x3 = 1; divisor_hi.x7 = 1; #endif short8 rem_lo = dividend_lo % divisor_lo; short8 rem_hi = dividend_hi % divisor_hi; return(new short3x4(rem_lo.v3_0, rem_lo.v3_4, rem_hi.v3_0, rem_hi.v3_4)); } else { return(new short3x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3)); } }
public DebuggerProxy(short8 v) { x0 = v.x0; x1 = v.x1; x2 = v.x2; x3 = v.x3; x4 = v.x4; x5 = v.x5; x6 = v.x6; x7 = v.x7; }
public static short8 max(short8 a, short8 b) { if (Sse2.IsSse2Supported) { return(Sse2.max_epi16(a, b)); } else { return(new short8((short)math.max(a.x0, b.x0), (short)math.max(a.x1, b.x1), (short)math.max(a.x2, b.x2), (short)math.max(a.x3, b.x3), (short)math.max(a.x4, b.x4), (short)math.max(a.x5, b.x5), (short)math.max(a.x6, b.x6), (short)math.max(a.x7, b.x7))); } }
public static short8 sign(short8 x) { if (Ssse3.IsSsse3Supported) { return(Ssse3.sign_epi16(new short8(1), x)); } else { return((x >> 15) | (short8)((ushort8)(-x) >> 15)); } }
public static bool all_eq(short8 c) { if (Ssse3.IsSsse3Supported) { return(((short8)Ssse3.shuffle_epi8(c, new v128(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1))).Equals(c)); } else { return(((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & c.x0 == c.x7)); } }
public static bool any(short8 x) { if (Sse2.IsSse2Supported) { return(bitmask32(8 * sizeof(short)) != Sse2.movemask_epi8(Sse2.cmpeq_epi16(x, default(v128)))); } else { return(any(x != 0)); } }
public static half8 ashalf(short8 x) { if (Sse.IsSseSupported) { return((v128)x); } else { return(*(half8 *)&x); } }
public static bool all(short8 x) { if (Sse2.IsSse2Supported) { return(0 == Sse2.movemask_epi8(Sse2.cmpeq_epi16(x, default(v128)))); } else { return(all(x != 0)); } }
public static short8 subadd(short8 a, short8 b) { if (Ssse3.IsSsse3Supported) { return(a + Ssse3.sign_epi16(b, new ushort8(ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1))); } else { return(a - select(b, -b, new bool8(false, true, false, true, false, true, false, true))); } }
internal static v128 ShortToByte(short8 x) { if (Ssse3.IsSsse3Supported) { return(Ssse3.shuffle_epi8(x, new byte8(0, 2, 4, 6, 8, 10, 12, 14))); } else { throw new CPUFeatureCheckException(); } }
public static short8 divrem(short8 dividend, short divisor, out short8 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (short8)divisor, out remainder)); } }
public static bool8 ispow2(short8 x) { if (Sse2.IsSse2Supported) { return((v128)(byte8)(new short8(1) & Sse2.and_si128(Sse2.cmpgt_epi16(x, default(v128)), Sse2.cmpeq_epi16(default(v128), x & (x - 1))))); } else { return(new bool8(math.ispow2(x.x0), math.ispow2(x.x1), math.ispow2(x.x2), math.ispow2(x.x3), math.ispow2(x.x4), math.ispow2(x.x5), math.ispow2(x.x6), math.ispow2(x.x7))); } }
public static short4x2 operator %(short4x2 left, short4x2 right) { if (Avx2.IsAvx2Supported) { short8 rem = new short8(left.c0, left.c1) % new short8(right.c0, right.c1); return(new short4x2(rem.v4_0, rem.v4_4)); } else { return(new short4x2(left.c0 % right.c0, left.c1 % right.c1)); } }
public static short8 rol(short8 x, short8 n) { if (Sse2.IsSse2Supported) { n &= 15; return((short8)(shl((ushort8)x, (ushort8)n) | shrl((ushort8)x, (ushort8)(-n & 15)))); } else { return(new short8(rol(x.x0, n.x0), rol(x.x1, n.x1), rol(x.x2, n.x2), rol(x.x3, n.x3), rol(x.x4, n.x4), rol(x.x5, n.x5), rol(x.x6, n.x6), rol(x.x7, n.x7))); } }
public static short4x2 operator /(short4x2 left, short4x2 right) { if (Avx2.IsAvx2Supported) { short8 div = new short8(left.c0, left.c1) / new short8(right.c0, right.c1); return(new short4x2(div.v4_0, div.v4_4)); } else { return(new short4x2(left.c0 / right.c0, left.c1 / right.c1)); } }
public static bool8 toboolsafe(short8 x) { if (Sse2.IsSse2Supported) { return((v128)(byte8)clamp(x, 0, 1)); } else { byte8 temp = (byte8)clamp(x, 0, 1); return(*(bool8 *)&temp); } }
public static bool8 isdivisible(short8 dividend, short8 divisor) { Assert.AreNotEqual(0, divisor.x0); Assert.AreNotEqual(0, divisor.x1); Assert.AreNotEqual(0, divisor.x2); Assert.AreNotEqual(0, divisor.x3); Assert.AreNotEqual(0, divisor.x4); Assert.AreNotEqual(0, divisor.x5); Assert.AreNotEqual(0, divisor.x6); Assert.AreNotEqual(0, divisor.x7); return(dividend % divisor == 0); }
public byte8 NextByte8(byte8 max) { if (Ssse3.IsSsse3Supported) { short8 temp = (short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); return(Ssse3.shuffle_epi8(temp, new byte8(1, 3, 5, 7, 9, 11, 13, 15))); } else { return((byte8)(((short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState())) >> 8)); } }
public static short4x3 operator %(short4x3 left, short4x3 right) { if (Avx2.IsAvx2Supported) { short8 rem = new short8(left.c0, left.c1) % new short8(right.c0, right.c1); return(new short4x3(rem.v4_0, rem.v4_4, left.c2 % right.c2)); } else { return(new short4x3(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2)); } }
public static short4x3 operator /(short4x3 left, short4x3 right) { if (Avx2.IsAvx2Supported) { short8 div = new short8(left.c0, left.c1) / new short8(right.c0, right.c1); return(new short4x3(div.v4_0, div.v4_4, left.c2 / right.c2)); } else { return(new short4x3(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2)); } }
public static short4x4 operator /(short4x4 left, short4x4 right) { if (Avx2.IsAvx2Supported) { short8 div_lo = new short8(left.c0, left.c1) / new short8(right.c0, right.c1); short8 div_hi = new short8(left.c2, left.c3) / new short8(right.c2, right.c3); return(new short4x4(div_lo.v4_0, div_lo.v4_4, div_hi.v4_0, div_hi.v4_4)); } else { return(new short4x4(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2, left.c3 / right.c3)); } }
public static int cprod(short8 x) { if (Avx2.IsAvx2Supported) { v128 prod = Avx.mm256_castsi256_si128((int8)x * (int8)(short8)Sse2.shuffle_epi32(x, Sse.SHUFFLE(0, 1, 2, 3))); prod = Sse4_1.mullo_epi32(prod, Sse2.shuffle_epi32(prod, Sse.SHUFFLE(0, 1, 2, 3))); return(Sse4_1.mullo_epi32(prod, Sse2.shufflelo_epi16(prod, Sse.SHUFFLE(0, 0, 3, 2))).SInt0); } else { return(cprod((int4)x.v4_0 * (int4)x.v4_4)); } }
public static short4x4 operator %(short4x4 left, short4x4 right) { if (Avx2.IsAvx2Supported) { short8 rem_lo = new short8(left.c0, left.c1) % new short8(right.c0, right.c1); short8 rem_hi = new short8(left.c2, left.c3) % new short8(right.c2, right.c3); return(new short4x4(rem_lo.v4_0, rem_lo.v4_4, rem_hi.v4_0, rem_hi.v4_4)); } else { return(new short4x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3)); } }
public static short4x3 operator %(short4x3 left, short right) { if (Avx2.IsAvx2Supported) { if (!Constant.IsConstantExpression(right)) { short8 rem = new short8(left.c0, left.c1) % right; return(new short4x3(rem.v4_0, rem.v4_4, left.c2 % right)); } } return(new short4x3(left.c0 % right, left.c1 % right, left.c2 % right)); }
public static short4x3 operator /(short4x3 left, short right) { if (Avx2.IsAvx2Supported) { if (!Constant.IsConstantExpression(right)) { short8 div = new short8(left.c0, left.c1) / right; return(new short4x3(div.v4_0, div.v4_4, left.c2 / right)); } } return(new short4x3(left.c0 / right, left.c1 / right, left.c2 / right)); }
public static int dot(short8 a, short8 b) { if (Sse2.IsSse2Supported) { a = Sse2.madd_epi16(a, b); a = Sse2.add_epi32(a, Sse2.shuffle_epi32(a, Sse.SHUFFLE(0, 1, 2, 3))); return(Sse2.add_epi32(a, Sse2.shufflelo_epi16(a, Sse.SHUFFLE(0, 0, 3, 2))).SInt0); } else { return((((a.x0 * b.x0) + (a.x1 * b.x1)) + ((a.x2 * b.x2) + (a.x3 * b.x3))) + (((a.x4 * b.x4) + (a.x5 * b.x5)) + ((a.x6 * b.x6) + (a.x7 * b.x7)))); } }