internal static v128 vdiv_byte_quotient(int2 dividend, int2 divisor) { if (Sse2.IsSse2Supported) { float2 dividend_f32 = dividend; float2 divisor_f32 = divisor; v128 divisor_f32_rcp = Sse.rcp_ps(*(v128 *)&divisor_f32); v128 precisionLossCompensation; if (Fma.IsFmaSupported) { precisionLossCompensation = Fma.fnmadd_ps(divisor_f32_rcp, *(v128 *)&divisor_f32, new v128(PRECISION_ADJUSTMENT_FACTOR)); } else { float2 temp = math.mad(-(*(float2 *)&divisor_f32_rcp), divisor_f32, math.asfloat(PRECISION_ADJUSTMENT_FACTOR)); precisionLossCompensation = *(v128 *)&temp; } precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, divisor_f32_rcp); precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, *(v128 *)÷nd_f32); return(precisionLossCompensation); } else { throw new CPUFeatureCheckException(); } }
internal static ushort4 vdiv_ushort(ushort4 dividend, ushort4 divisor) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); Assert.AreNotEqual(divisor.z, 0); Assert.AreNotEqual(divisor.w, 0); if (Sse2.IsSse2Supported) { float4 dividend_f32 = dividend; float4 divisor_f32 = divisor; v128 divisor_f32_rcp = Sse.rcp_ps(*(v128 *)&divisor_f32); v128 precisionLossCompensation; if (Fma.IsFmaSupported) { precisionLossCompensation = Fma.fnmadd_ps(divisor_f32_rcp, *(v128 *)&divisor_f32, new v128(PRECISION_ADJUSTMENT_FACTOR)); } else { float4 temp = math.mad(*(float4 *)&divisor_f32_rcp, -divisor_f32, math.asfloat(PRECISION_ADJUSTMENT_FACTOR)); precisionLossCompensation = *(v128 *)&temp; } precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, divisor_f32_rcp); precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, *(v128 *)÷nd_f32); return((ushort4)(*(float4 *)&precisionLossCompensation)); } else { throw new CPUFeatureCheckException(); } }