internal static byte8 vrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); if (Sse2.IsSse2Supported) { ushort8 remainders = ushort8.zero; ushort8 divisorCast = divisor; ushort8 dividendCast = dividend; remainders |= (new ushort8(1) & (dividendCast >> 7)); v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); for (int i = 6; i > 0; i--) { remainders <<= 1; remainders |= (new ushort8(1) & (dividendCast >> i)); subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); } remainders <<= 1; remainders |= new ushort8(1) & dividendCast; subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); return((byte8)remainders); } else { throw new CPUFeatureCheckException(); } }
public static bool4 ispow2(short4 x) { if (Sse2.IsSse2Supported) { v128 result = (byte4)(new short4(1) & Sse2.and_si128(Sse2.cmpgt_epi16(x, default(v128)), Sse2.cmpeq_epi16(default(v128), x & (x - 1)))); return(*(bool4 *)&result); } else { return(new bool4(math.ispow2(x.x), math.ispow2(x.y), math.ispow2(x.z), math.ispow2(x.w))); } }
public static bool4 ispow2(ushort4 x) { if (Sse2.IsSse2Supported) { v128 result = (byte4)(new ushort4(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)), Sse2.cmpeq_epi16(default(v128), x & (x - 1)))); return(*(bool4 *)&result); } else { return(new bool4(math.ispow2((uint)x.x), math.ispow2((uint)x.y), math.ispow2((uint)x.z), math.ispow2((uint)x.w))); } }
public static int3 addsub(int3 a, int3 b) { if (Ssse3.IsSsse3Supported) { v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(1, uint.MaxValue, 1, uint.MaxValue)); return(a + *(int3 *)&temp); } else { return(a + math.select(b, -b, new bool3(false, true, false))); } }
public static uint4 addsub(uint4 a, uint4 b) { if (Ssse3.IsSsse3Supported) { v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(1, uint.MaxValue, 1, uint.MaxValue)); return(a + *(uint4 *)&temp); } else { return(a + math.select(b, (uint4)(-(int4)b), new bool4(false, true, false, true))); } }
internal static int v192(v256 x) { if (Avx2.IsAvx2Supported) { v128 hi = ((long2)Avx2.mm256_extracti128_si256(x, 1)).xx; return(v64(Sse2.xor_si128(Avx.mm256_castsi256_si128(x), hi))); } else { throw new CPUFeatureCheckException(); } }
internal static int v128(v128 x) { if (Sse2.IsSse2Supported) { x = Sse2.xor_si128(x, Sse2.shuffle_epi32(x, Sse.SHUFFLE(0, 0, 3, 2))); return(Sse2.xor_si128(x, Sse2.shufflelo_epi16(x, Sse.SHUFFLE(0, 0, 3, 2))).SInt0); } else { throw new CPUFeatureCheckException(); } }
public static int4 sign(int4 x) { if (Ssse3.IsSsse3Supported) { v128 temp = Ssse3.sign_epi32(new v128(1), *(v128 *)&x); return(*(int4 *)&temp); } else { return((x >> 31) | (int4)((uint4)(-x) >> 31)); } }
public static double2 nabs(double2 x) { if (Sse2.IsSse2Supported) { v128 _nabs = Sse2.or_pd(*(v128 *)&x, new v128(1L << 63)); return(*(double2 *)&_nabs); } else { return(new double2(nabs(x.x), nabs(x.y))); } }
public static bool2 ispow2(long2 x) { if (Sse4_2.IsSse42Supported) { v128 result = (byte2)(new long2(1) & Sse2.and_si128(Operator.greater_mask_long(x, default(v128)), Operator.equals_mask_long(default(v128), x & (x - 1)))); return(*(bool2 *)&result); } else { return(new bool2(ispow2(x.x), ispow2(x.y))); } }
private static bool2 TestIsFalse(v128 input) { if (Sse2.IsSse2Supported) { input = Sse2.andnot_si128((byte2)(ushort2)input, new ushort2(0x0101)); return(*(bool2 *)&input); } else { throw new CPUFeatureCheckException(); } }
internal static v128 Long2To_U_Short2_SSE2(v128 x) { if (Sse2.IsSse2Supported) { v128 y_shifted = Sse2.bsrli_si128(x, 1 * sizeof(long)); return(Sse2.unpacklo_epi16(x, y_shifted)); } else { throw new CPUFeatureCheckException(); } }
private static bool2 TestIsTrue(v128 input) { if (Sse2.IsSse2Supported) { int cast = 0x0101 & Sse2.movemask_epi8(input); return(*(bool2 *)&cast); } else { throw new CPUFeatureCheckException(); } }
public static int4 andnot(int4 left, int4 right) { if (Sse2.IsSse2Supported) { v128 temp = Sse2.andnot_si128(*(v128 *)&right, *(v128 *)&left); return(*(int4 *)&temp); } else { return(left & ~right); } }
public static int4 subadd(int4 a, int4 b) { if (Ssse3.IsSsse3Supported) { v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(uint.MaxValue, 1, uint.MaxValue, 1)); return(a + *(int4 *)&temp); } else { return(a - math.select(b, -b, new bool4(false, true, false, true))); } }
private unsafe static string FormatVectorFailure128(v128 expected, v128 result) { var b = new StringBuilder(); b.AppendLine("128-bit vectors differ!"); b.AppendLine("Expected:"); FormatVector(b, (void *)&expected, 16); b.AppendLine(); b.AppendLine("But was :"); FormatVector(b, (void *)&result, 16); b.AppendLine(); return(b.ToString()); }
public static double2 subadd(double2 a, double2 b) { if (Sse3.IsSse3Supported) { v128 temp = Sse3.addsub_pd(*(v128 *)&a, *(v128 *)&b); return(*(double2 *)&temp); } else { return(a - math.select(b, -b, new bool2(false, true))); } }
public static float4 subadd(float4 a, float4 b) { if (Sse3.IsSse3Supported) { v128 temp = Sse3.addsub_ps(*(v128 *)&a, *(v128 *)&b); return(*(float4 *)&temp); } else { return(a - math.select(b, -b, new bool4(false, true, false, true))); } }
public static uint3 subadd(uint3 a, uint3 b) { if (Ssse3.IsSsse3Supported) { v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(uint.MaxValue, 1, uint.MaxValue, 1)); return(a + *(uint3 *)&temp); } else { return(a - math.select(b, (uint3)(-(int3)b), new bool3(false, true, false))); } }
private static bool2 TestIsFalse(v128 input) { if (Sse2.IsSse2Supported) { int result = maxmath.andnot(0x0101, Sse2.movemask_epi8(input)); return(*(bool2 *)&result); } else { throw new CPUFeatureCheckException(); } }
public static uint2 subadd(uint2 a, uint2 b) { if (Ssse3.IsSsse3Supported) { v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(uint.MaxValue, 1, 0, 0)); return(a + *(uint2 *)&temp); } else { return(a - math.select(b, (uint2)(-(int2)b), new bool2(false, true))); } }
public static float4 div(float4 dividend, float4 divisor) { if (Sse.IsSseSupported) { v128 temp = Sse.mul_ps(*(v128 *)÷nd, Sse.rcp_ps(*(v128 *)&divisor)); return(*(float4 *)&temp); } else { return(dividend / divisor); } }
internal static v128 Int2To_S_Byte2_SSE2(v128 x) { if (Sse2.IsSse2Supported) { v128 y_shifted = Sse2.bsrli_si128(x, 1 * sizeof(int)); return(Sse2.unpacklo_epi8(x, y_shifted)); } else { throw new CPUFeatureCheckException(); } }
internal static sbyte2 vdivrem_sbyte(sbyte2 dividend, sbyte2 divisor, out sbyte2 remainder) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); int2 castDividend = dividend; int2 castDivisor = divisor; v128 floatResult = vdiv_byte_quotient(castDividend, castDivisor); int2 quotientCast = (int2)(*(float2 *)&floatResult); remainder = (sbyte2)(castDividend - quotientCast * castDivisor); return((sbyte2)quotientCast); }
internal static sbyte4 vrem_sbyte(sbyte4 dividend, sbyte4 divisor) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); Assert.AreNotEqual(divisor.z, 0); Assert.AreNotEqual(divisor.w, 0); int4 castDividend = dividend; int4 castDivisor = divisor; v128 floatResult = vdiv_byte_quotient(castDividend, castDivisor); return((sbyte4)(castDividend - ((int4)(*(float4 *)&floatResult) * castDivisor))); }
public static float4 nabs(float4 x) { if (Sse.IsSseSupported) { v128 _nabs = Sse.or_ps(*(v128 *)&x, new v128(1 << 31)); return(*(float4 *)&_nabs); } else { return(new float4(nabs(x.x), nabs(x.y), nabs(x.z), nabs(x.w))); } }
internal static v128 greater_mask_uint(v128 left, v128 right) { if (Sse2.IsSse2Supported) { v128 mask = new v128(1 << 31); return(Sse2.cmpgt_epi32(Sse2.xor_si128(left, mask), Sse2.xor_si128(right, mask))); } else { throw new CPUFeatureCheckException(); } }
public static byte16 bitmask8(byte16 numBits, byte16 index = default(byte16)) { Assert.IsBetween(index.x0, 0u, 8u); Assert.IsBetween(index.x1, 0u, 8u); Assert.IsBetween(index.x2, 0u, 8u); Assert.IsBetween(index.x3, 0u, 8u); Assert.IsBetween(index.x4, 0u, 8u); Assert.IsBetween(index.x5, 0u, 8u); Assert.IsBetween(index.x6, 0u, 8u); Assert.IsBetween(index.x7, 0u, 8u); Assert.IsBetween(index.x8, 0u, 8u); Assert.IsBetween(index.x9, 0u, 8u); Assert.IsBetween(index.x10, 0u, 8u); Assert.IsBetween(index.x11, 0u, 8u); Assert.IsBetween(index.x12, 0u, 8u); Assert.IsBetween(index.x13, 0u, 8u); Assert.IsBetween(index.x14, 0u, 8u); Assert.IsBetween(index.x15, 0u, 8u); Assert.IsBetween(numBits.x0, 0u, 8u - index.x0); Assert.IsBetween(numBits.x1, 0u, 8u - index.x1); Assert.IsBetween(numBits.x2, 0u, 8u - index.x2); Assert.IsBetween(numBits.x3, 0u, 8u - index.x3); Assert.IsBetween(numBits.x4, 0u, 8u - index.x4); Assert.IsBetween(numBits.x5, 0u, 8u - index.x5); Assert.IsBetween(numBits.x6, 0u, 8u - index.x6); Assert.IsBetween(numBits.x7, 0u, 8u - index.x7); Assert.IsBetween(numBits.x8, 0u, 8u - index.x8); Assert.IsBetween(numBits.x9, 0u, 8u - index.x9); Assert.IsBetween(numBits.x10, 0u, 8u - index.x10); Assert.IsBetween(numBits.x11, 0u, 8u - index.x11); Assert.IsBetween(numBits.x12, 0u, 8u - index.x12); Assert.IsBetween(numBits.x13, 0u, 8u - index.x13); Assert.IsBetween(numBits.x14, 0u, 8u - index.x14); Assert.IsBetween(numBits.x15, 0u, 8u - index.x15); if (Sse2.IsSse2Supported) { // mask index = shl(byte.MaxValue, index); v128 isMaxBitsMask = Sse2.cmpeq_epi8(numBits, new byte16(8)); return(isMaxBitsMask | andnot(index, shl(index, numBits))); } else { return(new byte16(bitmask8(numBits.v8_0, index.v8_0), bitmask8(numBits.v8_8, index.v8_8))); } }
internal static byte3 vdivrem_byte(byte3 dividend, byte3 divisor, out byte3 remainder) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); Assert.AreNotEqual(divisor.z, 0); int3 castDividend = dividend; int3 castDivisor = divisor; v128 floatResult = vdiv_byte_quotient(castDividend, castDivisor); int3 quotientCast = (int3)(*(float3 *)&floatResult); remainder = (byte3)(castDividend - quotientCast * castDivisor); return((byte3)quotientCast); }
public static void Cast_ToV128() { bool result = true; for (int i = 0; i < NUM_TESTS; i++) { v128 x = TestData_LHS[i]; result &= x.Byte0 == TestData_LHS[i].x & x.Byte1 == TestData_LHS[i].y; } Assert.AreEqual(true, result); }