public static float4 msubadd(float4 a, float4 b, float4 c) { if (Fma.IsFmaSupported) { v128 temp = Fma.fmaddsub_ps(*(v128 *)&a, *(v128 *)&b, *(v128 *)&c); return(*(float4 *)&temp); } else if (Sse.IsSseSupported) { v128 negate = Sse.xor_ps(*(v128 *)&c, new v128(1 << 31, 0, 1 << 31, 0)); return(math.mad(a, b, *(float4 *)&negate)); } else { return(new float4(a.x * b.x - c.x, a.y * b.y + c.y, a.z * b.z - c.z, a.w * b.w + c.w)); } }
public static float2 msubadd(float2 a, float2 b, float2 c) { if (Fma.IsFmaSupported) { v128 temp = Fma.fmaddsub_ps(*(v128 *)&a, *(v128 *)&b, *(v128 *)&c); return(*(float2 *)&temp); } else if (Sse.IsSseSupported) { v128 negate = Sse.xor_ps(*(v128 *)&c, new v128(1 << 31, 0, 0, 0)); return(math.mad(a, b, *(float2 *)&negate)); } else { return(new float2(a.x * b.x - c.x, a.y * b.y + c.y)); } }
public static float3 dsubadd(float3 a, float3 b, float3 c) { if (Fma.IsFmaSupported) { v128 temp = Fma.fmaddsub_ps(*(v128 *)&a, Sse.rcp_ps(*(v128 *)&b), *(v128 *)&c); return(*(float3 *)&temp); } else if (Sse.IsSseSupported) { b = math.rcp(b); v128 negate = Sse.xor_ps(*(v128 *)&c, new v128(1 << 31, 0, 1 << 31, 0)); return(math.mad(a, b, *(float3 *)&negate)); } else { return(new float3(a.x / b.x - c.x, a.y / b.y + c.y, a.z / b.z - c.z)); } }