private static half __float2half(float a)
        {
            half r         = new half();
            uint sign      = 0;
            uint remainder = 0;

            r.x = __internal_float2half(a, ref sign, ref remainder);
            if ((remainder > 0x80000000U) || ((remainder == 0x80000000U) && ((r.x & 0x1U) != 0U)))
            {
                r.x++;
            }

            return(r);
        }
Ejemplo n.º 2
0
        public static extern CublasStatus cublasHgemmStridedBatched(CudaBlasHandle handle,
																 Operation transa,
																 Operation transb,
																 int m,
																 int n,
																 int k,
																 ref half alpha,  // host or device poi$
																 CUdeviceptr A,
																 int lda,
																 long strideA,   // purposely signed 
																 CUdeviceptr B,
																 int ldb,
																 long strideB,
																 ref half beta,   // host or device poi$
																 CUdeviceptr C,
																 int ldc,
																 long strideC,
																 int batchCount);
Ejemplo n.º 3
0
 public static extern CublasStatus cublasHgemm(CudaBlasHandle handle, 
                                               Operation transa,
                                               Operation transb, 
                                               int m,
                                               int n,
                                               int k,
                                               ref half alpha, /* host or device pointer */  
                                               CUdeviceptr A, 
                                               int lda,
                                               CUdeviceptr B,
                                               int ldb, 
                                               ref half beta, /* host or device pointer */  
                                               CUdeviceptr C,
                                               int ldc);
        private static half __double2half(double x)
        {
            // Perform rounding to 11 bits of precision, convert value
            // to float and call existing float to half conversion.
            // By pre-rounding to 11 bits we avoid additional rounding
            // in float to half conversion.
            ulong absx;

            ulong[]  ux = new ulong[1];
            double[] xa = new double[] { x };
            Buffer.BlockCopy(xa, 0, ux, 0, sizeof(double));

            absx = (ux[0] & 0x7fffffffffffffffUL);
            if ((absx >= 0x40f0000000000000UL) || (absx <= 0x3e60000000000000UL))
            {
                // |x| >= 2^16 or NaN or |x| <= 2^(-25)
                // double-rounding is not a problem
                return(__float2half((float)x));
            }

            // here 2^(-25) < |x| < 2^16
            // prepare shifter value such that x + shifter
            // done in double precision performs round-to-nearest-even
            // and (x + shifter) - shifter results in x rounded to
            // 11 bits of precision. Shifter needs to have exponent of
            // x plus 53 - 11 = 42 and a leading bit in mantissa to guard
            // against negative values.
            // So need to have |x| capped to avoid overflow in exponent.
            // For inputs that are smaller than half precision minnorm
            // we prepare fixed shifter exponent.
            ulong shifterBits = ux[0] & 0x7ff0000000000000UL;

            if (absx >= 0x3f10000000000000UL)
            {               // |x| >= 2^(-14)
                // add 42 to exponent bits
                shifterBits += 42ul << 52;
            }

            else
            {               // 2^(-25) < |x| < 2^(-14), potentially results in denormal
                // set exponent bits to 42 - 14 + bias
                shifterBits = ((42ul - 14 + 1023) << 52);
            }
            // set leading mantissa bit to protect against negative inputs
            shifterBits |= 1ul << 51;
            ulong[]  shifterBitsArr = new ulong[] { shifterBits };
            double[] shifter        = new double[1];

            Buffer.BlockCopy(shifterBitsArr, 0, shifter, 0, sizeof(double));

            double xShiftRound = x + shifter[0];

            double[] xShiftRoundArr = new double[] { xShiftRound };

            // Prevent the compiler from optimizing away x + shifter - shifter
            // by doing intermediate memcopy and harmless bitwize operation
            ulong[] xShiftRoundBits = new ulong[1];

            Buffer.BlockCopy(xShiftRoundArr, 0, xShiftRoundBits, 0, sizeof(double));

            // the value is positive, so this operation doesn't change anything
            xShiftRoundBits[0] &= 0x7ffffffffffffffful;

            Buffer.BlockCopy(xShiftRoundBits, 0, xShiftRoundArr, 0, sizeof(double));

            double xRounded = xShiftRound - shifter[0];
            float  xRndFlt  = (float)xRounded;
            half   res      = __float2half(xRndFlt);

            return(res);
        }
 /// <summary>
 ///
 /// </summary>
 public half(half h16)
 {
     x = h16.x;
 }