コード例 #1
0
        private static float float16_to_float32(ushort h)
        {
            fp32   ans = 0;
            UInt16 h_exp, h_sig;
            UInt32 f_sgn, f_exp, f_sig;

            h_exp = (UInt16)(h & 0x7c00u);
            f_sgn = ((UInt32)h & 0x8000u) << 16;
            switch (h_exp)
            {
            case 0x0000:     /* 0 or subnormal */
                h_sig = (UInt16)(h & 0x03ffu);
                /* Signed zero */
                if (h_sig == 0)
                {
                    ans = f_sgn;
                    return(ans.f);
                }
                /* Subnormal */
                h_sig <<= 1;
                while ((h_sig & 0x0400u) == 0)
                {
                    h_sig <<= 1;
                    h_exp++;
                }
                f_exp = ((UInt32)(127 - 15 - h_exp)) << 23;
                f_sig = ((UInt32)(h_sig & 0x03ffu)) << 13;
                ans   = f_sgn + f_exp + f_sig;
                return(ans.f);

            case 0x7c00:     /* inf or NaN */
                /* All-ones exponent and a copy of the significand */
                ans = f_sgn + 0x7f800000u + (((UInt32)(h & 0x03ffu)) << 13);
                return(ans.f);

            default:     /* normalized */
                /* Just need to adjust the exponent and shift */
                ans = f_sgn + (((UInt32)(h & 0x7fffu) + 0x1c000u) << 13);
                return(ans.f);
            }
        }
コード例 #2
0
        private static ushort make_float16(float value)
        {
            fp32 f32infty   = 255U << 23;
            fp32 f16infty   = 31U << 23;
            fp32 magic      = 15U << 23;
            uint sign_mask  = 0x80000000U;
            uint round_mask = ~0xFFFU;

            fp32   @in  = 0;
            ushort @out = 0;

            @in.f = value;

            uint sign = @in.u & sign_mask;

            @in.u ^= sign;

            if (@in.u >= f32infty.u)
            {
                @out = (@in.u > f32infty.u) ? (ushort)0x7FFFU : (ushort)0x7C00U;
            }
            else
            {
                @in.u &= round_mask;
                @in.f *= magic.f;
                @in.u -= round_mask;
                if (@in.u > f16infty.u)
                {
                    @in.u = f16infty.u;
                }

                @out = (ushort)(@in.u >> 13);
            }

            @out |= (ushort)(sign >> 16);

            return(@out);
        }