Ejemplo n.º 1
0
        internal static v128 vdiv_byte_quotient(int2 dividend, int2 divisor)
        {
            if (Sse2.IsSse2Supported)
            {
                float2 dividend_f32 = dividend;
                float2 divisor_f32  = divisor;

                v128 divisor_f32_rcp = Sse.rcp_ps(*(v128 *)&divisor_f32);


                v128 precisionLossCompensation;

                if (Fma.IsFmaSupported)
                {
                    precisionLossCompensation = Fma.fnmadd_ps(divisor_f32_rcp, *(v128 *)&divisor_f32, new v128(PRECISION_ADJUSTMENT_FACTOR));
                }
                else
                {
                    float2 temp = math.mad(-(*(float2 *)&divisor_f32_rcp), divisor_f32, math.asfloat(PRECISION_ADJUSTMENT_FACTOR));

                    precisionLossCompensation = *(v128 *)&temp;
                }

                precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, divisor_f32_rcp);
                precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, *(v128 *)&dividend_f32);

                return(precisionLossCompensation);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Ejemplo n.º 2
0
        internal static ushort4 vdiv_ushort(ushort4 dividend, ushort4 divisor)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);
            Assert.AreNotEqual(divisor.z, 0);
            Assert.AreNotEqual(divisor.w, 0);

            if (Sse2.IsSse2Supported)
            {
                float4 dividend_f32 = dividend;
                float4 divisor_f32  = divisor;

                v128 divisor_f32_rcp = Sse.rcp_ps(*(v128 *)&divisor_f32);


                v128 precisionLossCompensation;

                if (Fma.IsFmaSupported)
                {
                    precisionLossCompensation = Fma.fnmadd_ps(divisor_f32_rcp, *(v128 *)&divisor_f32, new v128(PRECISION_ADJUSTMENT_FACTOR));
                }
                else
                {
                    float4 temp = math.mad(*(float4 *)&divisor_f32_rcp, -divisor_f32, math.asfloat(PRECISION_ADJUSTMENT_FACTOR));

                    precisionLossCompensation = *(v128 *)&temp;
                }

                precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, divisor_f32_rcp);
                precisionLossCompensation = Sse.mul_ps(precisionLossCompensation, *(v128 *)&dividend_f32);

                return((ushort4)(*(float4 *)&precisionLossCompensation));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }