public static short16 negate(short16 x, bool16 p) { Assert.IsSafeBoolean(p.x0); Assert.IsSafeBoolean(p.x1); Assert.IsSafeBoolean(p.x2); Assert.IsSafeBoolean(p.x3); Assert.IsSafeBoolean(p.x4); Assert.IsSafeBoolean(p.x5); Assert.IsSafeBoolean(p.x6); Assert.IsSafeBoolean(p.x7); Assert.IsSafeBoolean(p.x8); Assert.IsSafeBoolean(p.x9); Assert.IsSafeBoolean(p.x10); Assert.IsSafeBoolean(p.x11); Assert.IsSafeBoolean(p.x12); Assert.IsSafeBoolean(p.x13); Assert.IsSafeBoolean(p.x14); Assert.IsSafeBoolean(p.x15); if (Avx2.IsAvx2Supported) { short16 mask = (sbyte16)Sse2.cmpgt_epi8(p, default(v128)); return((x ^ mask) - mask); } else { return(new short16(negate(x.v8_0, p.v8_0), negate(x.v8_8, p.v8_8))); } }
public static void Short16() { bool result = true; for (int i = 0; i < Tests.Short16.NUM_TESTS; i++) { short16 t = maxmath.sign(Tests.Short16.TestData_LHS[i]); result &= t.x0 == ((Tests.Short16.TestData_LHS[i].x0 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x0 < 0) ? -1 : 1)); result &= t.x1 == ((Tests.Short16.TestData_LHS[i].x1 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x1 < 0) ? -1 : 1)); result &= t.x2 == ((Tests.Short16.TestData_LHS[i].x2 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x2 < 0) ? -1 : 1)); result &= t.x3 == ((Tests.Short16.TestData_LHS[i].x3 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x3 < 0) ? -1 : 1)); result &= t.x4 == ((Tests.Short16.TestData_LHS[i].x4 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x4 < 0) ? -1 : 1)); result &= t.x5 == ((Tests.Short16.TestData_LHS[i].x5 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x5 < 0) ? -1 : 1)); result &= t.x6 == ((Tests.Short16.TestData_LHS[i].x6 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x6 < 0) ? -1 : 1)); result &= t.x7 == ((Tests.Short16.TestData_LHS[i].x7 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x7 < 0) ? -1 : 1)); result &= t.x8 == ((Tests.Short16.TestData_LHS[i].x8 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x8 < 0) ? -1 : 1)); result &= t.x9 == ((Tests.Short16.TestData_LHS[i].x9 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x9 < 0) ? -1 : 1)); result &= t.x10 == ((Tests.Short16.TestData_LHS[i].x10 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x10 < 0) ? -1 : 1)); result &= t.x11 == ((Tests.Short16.TestData_LHS[i].x11 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x11 < 0) ? -1 : 1)); result &= t.x12 == ((Tests.Short16.TestData_LHS[i].x12 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x12 < 0) ? -1 : 1)); result &= t.x13 == ((Tests.Short16.TestData_LHS[i].x13 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x13 < 0) ? -1 : 1)); result &= t.x14 == ((Tests.Short16.TestData_LHS[i].x14 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x14 < 0) ? -1 : 1)); result &= t.x15 == ((Tests.Short16.TestData_LHS[i].x15 == 0) ? 0 : ((Tests.Short16.TestData_LHS[i].x15 < 0) ? -1 : 1)); } Assert.AreEqual(true, result); }
public byte16 NextByte16(byte16 min, byte16 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); if (Avx2.IsAvx2Supported) { short16 temp = (short16)(max - min) * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); temp = Avx2.mm256_shuffle_epi8(temp, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(min + Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(temp, Sse.SHUFFLE(0, 0, 2, 0)))); } else { return(new byte16(NextByte8(min.v8_0, max.v8_0), NextByte8(min.v8_8, max.v8_8))); } }
public static void short16() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { short16 x = rng.NextShort16(); Assert.AreEqual(new short16((short)maxmath.lzcnt(x.x0), (short)maxmath.lzcnt(x.x1), (short)maxmath.lzcnt(x.x2), (short)maxmath.lzcnt(x.x3), (short)maxmath.lzcnt(x.x4), (short)maxmath.lzcnt(x.x5), (short)maxmath.lzcnt(x.x6), (short)maxmath.lzcnt(x.x7), (short)maxmath.lzcnt(x.x8), (short)maxmath.lzcnt(x.x9), (short)maxmath.lzcnt(x.x10), (short)maxmath.lzcnt(x.x11), (short)maxmath.lzcnt(x.x12), (short)maxmath.lzcnt(x.x13), (short)maxmath.lzcnt(x.x14), (short)maxmath.lzcnt(x.x15)), maxmath.lzcnt(x)); } }
public static void short16() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { short16 x = rng.NextShort16(); ushort16 n = rng.NextUShort16(); Assert.AreEqual(new short16((short)_intpow(x.x0, n.x0), (short)_intpow(x.x1, n.x1), (short)_intpow(x.x2, n.x2), (short)_intpow(x.x3, n.x3), (short)_intpow(x.x4, n.x4), (short)_intpow(x.x5, n.x5), (short)_intpow(x.x6, n.x6), (short)_intpow(x.x7, n.x7), (short)_intpow(x.x8, n.x8), (short)_intpow(x.x9, n.x9), (short)_intpow(x.x10, n.x10), (short)_intpow(x.x11, n.x11), (short)_intpow(x.x12, n.x12), (short)_intpow(x.x13, n.x13), (short)_intpow(x.x14, n.x14), (short)_intpow(x.x15, n.x15)), maxmath.intpow(x, n)); } }
public static ushort16 lcm(short16 x, short16 y) { ushort16 absX = (ushort16)abs(x); ushort16 absY = (ushort16)abs(y); return((absX / gcd(absX, absY)) * absY); }
public static int csum(sbyte32 x) { if (Avx2.IsAvx2Supported) { short16 cast = (short16)x.v16_0 + (short16)x.v16_16; short8 more = cast.v8_0 + cast.v8_8; more += Sse2.unpackhi_epi64(more, more); more += Sse2.shufflelo_epi16(more, Sse.SHUFFLE(0, 1, 2, 3)); return(Sse2.add_epi16(more, Sse2.shufflelo_epi16(more, Sse.SHUFFLE(0, 0, 0, 1))).SShort0); } else if (Sse2.IsSse2Supported) { short8 cast = ((short8)x.v8_0 + (short8)x.v8_8) + ((short8)x.v8_16 + (short8)x.v8_24); cast += Sse2.unpackhi_epi64(cast, cast); cast += Sse2.shufflelo_epi16(cast, Sse.SHUFFLE(0, 1, 2, 3)); return(Sse2.add_epi16(cast, Sse2.shufflelo_epi16(cast, Sse.SHUFFLE(0, 0, 0, 1))).SShort0); } else { return(((((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7))) + (((x.x8 + x.x9) + (x.x10 + x.x11)) + ((x.x12 + x.x13) + (x.x14 + x.x15)))) + ((((x.x16 + x.x17) + (x.x18 + x.x19)) + ((x.x20 + x.x21) + (x.x22 + x.x23))) + (((x.x24 + x.x25) + (x.x26 + x.x27)) + ((x.x28 + x.x29) + (x.x30 + x.x31))))); } }
public static void short16() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { short16 x = rng.NextShort16(); short16 y = rng.NextShort16(); Assert.AreEqual(new ushort16((ushort)_gcd(x.x0, y.x0), (ushort)_gcd(x.x1, y.x1), (ushort)_gcd(x.x2, y.x2), (ushort)_gcd(x.x3, y.x3), (ushort)_gcd(x.x4, y.x4), (ushort)_gcd(x.x5, y.x5), (ushort)_gcd(x.x6, y.x6), (ushort)_gcd(x.x7, y.x7), (ushort)_gcd(x.x8, y.x8), (ushort)_gcd(x.x9, y.x9), (ushort)_gcd(x.x10, y.x10), (ushort)_gcd(x.x11, y.x11), (ushort)_gcd(x.x12, y.x12), (ushort)_gcd(x.x13, y.x13), (ushort)_gcd(x.x14, y.x14), (ushort)_gcd(x.x15, y.x15)), maxmath.gcd(x, y)); } }
public static void Short16x2() { bool result = true; for (int i = 0; i < Tests.Short16.NUM_TESTS; i++) { short16 x = maxmath.avg(Tests.Short16.TestData_LHS[i], Tests.Short16.TestData_RHS[i]); result &= x.x0 == (((Tests.Short16.TestData_LHS[i].x0 + Tests.Short16.TestData_RHS[i].x0) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x0 + Tests.Short16.TestData_RHS[i].x0) / 2; result &= x.x1 == (((Tests.Short16.TestData_LHS[i].x1 + Tests.Short16.TestData_RHS[i].x1) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x1 + Tests.Short16.TestData_RHS[i].x1) / 2; result &= x.x2 == (((Tests.Short16.TestData_LHS[i].x2 + Tests.Short16.TestData_RHS[i].x2) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x2 + Tests.Short16.TestData_RHS[i].x2) / 2; result &= x.x3 == (((Tests.Short16.TestData_LHS[i].x3 + Tests.Short16.TestData_RHS[i].x3) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x3 + Tests.Short16.TestData_RHS[i].x3) / 2; result &= x.x4 == (((Tests.Short16.TestData_LHS[i].x4 + Tests.Short16.TestData_RHS[i].x4) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x4 + Tests.Short16.TestData_RHS[i].x4) / 2; result &= x.x5 == (((Tests.Short16.TestData_LHS[i].x5 + Tests.Short16.TestData_RHS[i].x5) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x5 + Tests.Short16.TestData_RHS[i].x5) / 2; result &= x.x6 == (((Tests.Short16.TestData_LHS[i].x6 + Tests.Short16.TestData_RHS[i].x6) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x6 + Tests.Short16.TestData_RHS[i].x6) / 2; result &= x.x7 == (((Tests.Short16.TestData_LHS[i].x7 + Tests.Short16.TestData_RHS[i].x7) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x7 + Tests.Short16.TestData_RHS[i].x7) / 2; result &= x.x8 == (((Tests.Short16.TestData_LHS[i].x8 + Tests.Short16.TestData_RHS[i].x8) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x8 + Tests.Short16.TestData_RHS[i].x8) / 2; result &= x.x9 == (((Tests.Short16.TestData_LHS[i].x9 + Tests.Short16.TestData_RHS[i].x9) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x9 + Tests.Short16.TestData_RHS[i].x9) / 2; result &= x.x10 == (((Tests.Short16.TestData_LHS[i].x10 + Tests.Short16.TestData_RHS[i].x10) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x10 + Tests.Short16.TestData_RHS[i].x10) / 2; result &= x.x11 == (((Tests.Short16.TestData_LHS[i].x11 + Tests.Short16.TestData_RHS[i].x11) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x11 + Tests.Short16.TestData_RHS[i].x11) / 2; result &= x.x12 == (((Tests.Short16.TestData_LHS[i].x12 + Tests.Short16.TestData_RHS[i].x12) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x12 + Tests.Short16.TestData_RHS[i].x12) / 2; result &= x.x13 == (((Tests.Short16.TestData_LHS[i].x13 + Tests.Short16.TestData_RHS[i].x13) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x13 + Tests.Short16.TestData_RHS[i].x13) / 2; result &= x.x14 == (((Tests.Short16.TestData_LHS[i].x14 + Tests.Short16.TestData_RHS[i].x14) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x14 + Tests.Short16.TestData_RHS[i].x14) / 2; result &= x.x15 == (((Tests.Short16.TestData_LHS[i].x15 + Tests.Short16.TestData_RHS[i].x15) > 0 ? 1 : -1) + Tests.Short16.TestData_LHS[i].x15 + Tests.Short16.TestData_RHS[i].x15) / 2; } Assert.AreEqual(true, result); }
public short16 NextShort16(short16 min, short16 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); if (Avx2.IsAvx2Supported) { return(min + Avx2.mm256_mulhi_epi16(max - min, new short16((short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState(), (short)NextState()))); } else { return(new short16(NextShort8(min.v8_0, max.v8_0), NextShort8(min.v8_8, max.v8_8))); } }
public byte32 NextByte32(byte32 min, byte32 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); Assert.IsNotSmaller(max.x16, min.x16); Assert.IsNotSmaller(max.x17, min.x17); Assert.IsNotSmaller(max.x18, min.x18); Assert.IsNotSmaller(max.x19, min.x19); Assert.IsNotSmaller(max.x20, min.x20); Assert.IsNotSmaller(max.x21, min.x21); Assert.IsNotSmaller(max.x22, min.x22); Assert.IsNotSmaller(max.x23, min.x23); Assert.IsNotSmaller(max.x24, min.x24); Assert.IsNotSmaller(max.x25, min.x25); Assert.IsNotSmaller(max.x26, min.x26); Assert.IsNotSmaller(max.x27, min.x27); Assert.IsNotSmaller(max.x28, min.x28); Assert.IsNotSmaller(max.x29, min.x29); Assert.IsNotSmaller(max.x30, min.x30); Assert.IsNotSmaller(max.x31, min.x31); if (Avx2.IsAvx2Supported) { max -= min; short16 lo = (short16)max.v16_0 * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); short16 hi = (short16)max.v16_16 * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); lo = Avx2.mm256_shuffle_epi8(lo, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); hi = Avx2.mm256_shuffle_epi8(hi, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(min + new byte32(Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(lo, Sse.SHUFFLE(0, 0, 2, 0))), Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(hi, Sse.SHUFFLE(0, 0, 2, 0))))); } else { return(new byte32(NextByte8(min.v8_0, max.v8_0), NextByte8(min.v8_8, max.v8_8), NextByte8(min.v8_16, max.v8_16), NextByte8(min.v8_24, max.v8_24))); } }
public static short16 subadd(short16 a, short16 b) { if (Avx2.IsAvx2Supported) { return(a + Avx2.mm256_sign_epi16(b, new v256(ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1))); } else { return(new short16(subadd(a.v8_0, b.v8_0), subadd(a.v8_8, b.v8_8))); } }
public static bool all_eq(short16 c) { if (Avx2.IsAvx2Supported) { return(((short16)Avx2.mm256_broadcastw_epi16(Avx.mm256_castsi256_si128(c))).Equals(c)); } else { return(all_eq(c.v8_0) & all_eq(c.v8_8) & c.v8_0.Equals(c.v8_8)); } }
public static int dot(short16 a, short16 b) { if (Avx2.IsAvx2Supported) { return(csum((int8)Avx2.mm256_madd_epi16(a, b))); } else { return(dot(a.v8_0, b.v8_0) + dot(a.v8_8, b.v8_8)); } }
public static short16 abs(short16 x) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_abs_epi16(x)); } else { return(new short16(abs(x.v8_0), abs(x.v8_8))); } }
public static short16 max(short16 a, short16 b) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_max_epi16(a, b)); } else { return(new short16(max(a.v8_0, b.v8_0), max(a.v8_8, b.v8_8))); } }
public static short16 sign(short16 x) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_sign_epi16(new short16(1), x)); } else { return(new short16(sign(x.v8_0), sign(x.v8_8))); } }
public static short16 nabs(short16 x) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_sub_epi16(default(v256), Avx2.mm256_abs_epi16(x))); } else { return(new short16(abs(x.v8_0), nabs(x.v8_8))); } }
public static bool all(short16 x) { if (Avx2.IsAvx2Supported) { return(0 == Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi16(x, default(v256)))); } else { return(all(x.v8_0) & all(x.v8_8)); } }
public static bool16 ispow2(short16 x) { if (Avx2.IsAvx2Supported) { return((v128)(new byte16(1) & (byte16)(short16)Avx2.mm256_and_si256(Avx2.mm256_cmpgt_epi16(x, default(v256)), Avx2.mm256_cmpeq_epi16(default(v256), x & (x - 1))))); } else { return(new bool16(ispow2(x.v8_0), ispow2(x.v8_8))); } }
public static short16 divrem(short16 dividend, short divisor, out short16 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (short16)divisor, out remainder)); } }
public static bool16 toboolsafe(short16 x) { if (Sse2.IsSse2Supported) { return((v128)(byte16)clamp(x, 0, 1)); } else { byte16 temp = (byte16)clamp(x, 0, 1); return(*(bool16 *)&temp); } }
public static short16 compareto(ushort16 x, ushort16 y) { if (Avx2.IsAvx2Supported) { short16 xGreatery = Operator.greater_mask_ushort(x, y); short16 yGreaterx = Operator.greater_mask_ushort(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new short16(compareto(x.v8_0, y.v8_0), compareto(x.v8_8, y.v8_8))); } }
public static short16 compareto(short16 x, short16 y) { if (Avx2.IsAvx2Supported) { short16 xGreatery = Avx2.mm256_cmpgt_epi16(x, y); short16 yGreaterx = Avx2.mm256_cmpgt_epi16(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new short16(compareto(x.v8_0, y.v8_0), compareto(x.v8_8, y.v8_8))); } }
internal static short16 vdiv_short(short16 dividend, short16 divisor) { if (Avx2.IsAvx2Supported) { float8 lo = vdiv_short_AVX(dividend.v8_0, divisor.v8_0); float8 hi = vdiv_short_AVX(dividend.v8_8, divisor.v8_8); return(Avx2.mm256_permute4x64_epi64(Avx2.mm256_packs_epi32((int8)lo, (int8)hi), Sse.SHUFFLE(3, 1, 2, 0))); } else { throw new CPUFeatureCheckException(); } }
public static void Short16() { bool result = true; Random64 x = new Random64(47); for (int i = 0; i < Tests.Short16.NUM_TESTS; i++) { bool16 b = x.NextBool16(); short16 a = maxmath.negate(Tests.Short16.TestData_LHS[i], b); result &= maxmath.all(a == maxmath.select(Tests.Short16.TestData_LHS[i], -Tests.Short16.TestData_LHS[i], b)); } Assert.AreEqual(true, result); }
public static sbyte16 avg(sbyte16 x, sbyte16 y) { if (Avx2.IsAvx2Supported) { short16 result = ((short16)x + (short16)y); // if the intermediate sum is positive add 1 result -= Avx2.mm256_cmpgt_epi16(result, default(v256)); return((sbyte16)(result >> 1)); } else { return(new sbyte16(avg(x.v8_0, y.v8_0), avg(x.v8_8, y.v8_8))); } }
public static short16 divrem(short16 dividend, short16 divisor, out short16 remainder) { if (Sse2.IsSse2Supported) { short16 quotient = dividend / divisor; remainder = dividend - (quotient * divisor); return(quotient); } else { remainder = dividend % divisor; return(dividend / divisor); } }
public byte16 NextByte16(byte16 max) { if (Avx2.IsAvx2Supported) { short16 temp = (short16)max * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); temp = Avx2.mm256_shuffle_epi8(temp, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(temp, Sse.SHUFFLE(0, 0, 2, 0)))); } else { return(new byte16(NextByte8(max.v8_0), NextByte8(max.v8_8))); } }
internal static short16 Short16FromInt(int mask) { if (Avx2.IsAvx2Supported) { int8 broadcast = mask; int8 shiftBoolsToSignBit_Lo = maxmath.shl(broadcast, new int8(31, 30, 29, 28, 27, 26, 25, 24)); int8 shiftBoolsToSignBit_Hi = maxmath.shl(broadcast, new int8(23, 22, 21, 20, 19, 18, 17, 16)); short16 signSaturated = Avx2.mm256_permute4x64_epi64(Avx2.mm256_packs_epi16(shiftBoolsToSignBit_Lo, shiftBoolsToSignBit_Hi), Sse.SHUFFLE(3, 1, 2, 0)); return(signSaturated >> 15); } else { return(new short16((short)(mask << 15), (short)(mask << 14), (short)(mask << 13), (short)(mask << 12), (short)(mask << 11), (short)(mask << 10), (short)(mask << 9), (short)(mask << 8), (short)(mask << 7), (short)(mask << 6), (short)(mask << 5), (short)(mask << 4), (short)(mask << 3), (short)(mask << 2), (short)(mask << 1), (short)mask) >> 15); } }