private static half __float2half(float a) { half r = new half(); uint sign = 0; uint remainder = 0; r.x = __internal_float2half(a, ref sign, ref remainder); if ((remainder > 0x80000000U) || ((remainder == 0x80000000U) && ((r.x & 0x1U) != 0U))) { r.x++; } return(r); }
public static extern CublasStatus cublasHgemmStridedBatched(CudaBlasHandle handle, Operation transa, Operation transb, int m, int n, int k, ref half alpha, // host or device poi$ CUdeviceptr A, int lda, long strideA, // purposely signed CUdeviceptr B, int ldb, long strideB, ref half beta, // host or device poi$ CUdeviceptr C, int ldc, long strideC, int batchCount);
public static extern CublasStatus cublasHgemm(CudaBlasHandle handle, Operation transa, Operation transb, int m, int n, int k, ref half alpha, /* host or device pointer */ CUdeviceptr A, int lda, CUdeviceptr B, int ldb, ref half beta, /* host or device pointer */ CUdeviceptr C, int ldc);
private static half __double2half(double x) { // Perform rounding to 11 bits of precision, convert value // to float and call existing float to half conversion. // By pre-rounding to 11 bits we avoid additional rounding // in float to half conversion. ulong absx; ulong[] ux = new ulong[1]; double[] xa = new double[] { x }; Buffer.BlockCopy(xa, 0, ux, 0, sizeof(double)); absx = (ux[0] & 0x7fffffffffffffffUL); if ((absx >= 0x40f0000000000000UL) || (absx <= 0x3e60000000000000UL)) { // |x| >= 2^16 or NaN or |x| <= 2^(-25) // double-rounding is not a problem return(__float2half((float)x)); } // here 2^(-25) < |x| < 2^16 // prepare shifter value such that x + shifter // done in double precision performs round-to-nearest-even // and (x + shifter) - shifter results in x rounded to // 11 bits of precision. Shifter needs to have exponent of // x plus 53 - 11 = 42 and a leading bit in mantissa to guard // against negative values. // So need to have |x| capped to avoid overflow in exponent. // For inputs that are smaller than half precision minnorm // we prepare fixed shifter exponent. ulong shifterBits = ux[0] & 0x7ff0000000000000UL; if (absx >= 0x3f10000000000000UL) { // |x| >= 2^(-14) // add 42 to exponent bits shifterBits += 42ul << 52; } else { // 2^(-25) < |x| < 2^(-14), potentially results in denormal // set exponent bits to 42 - 14 + bias shifterBits = ((42ul - 14 + 1023) << 52); } // set leading mantissa bit to protect against negative inputs shifterBits |= 1ul << 51; ulong[] shifterBitsArr = new ulong[] { shifterBits }; double[] shifter = new double[1]; Buffer.BlockCopy(shifterBitsArr, 0, shifter, 0, sizeof(double)); double xShiftRound = x + shifter[0]; double[] xShiftRoundArr = new double[] { xShiftRound }; // Prevent the compiler from optimizing away x + shifter - shifter // by doing intermediate memcopy and harmless bitwize operation ulong[] xShiftRoundBits = new ulong[1]; Buffer.BlockCopy(xShiftRoundArr, 0, xShiftRoundBits, 0, sizeof(double)); // the value is positive, so this operation doesn't change anything xShiftRoundBits[0] &= 0x7ffffffffffffffful; Buffer.BlockCopy(xShiftRoundBits, 0, xShiftRoundArr, 0, sizeof(double)); double xRounded = xShiftRound - shifter[0]; float xRndFlt = (float)xRounded; half res = __float2half(xRndFlt); return(res); }
/// <summary> /// /// </summary> public half(half h16) { x = h16.x; }