public static void encode_rows_cpu(IntPtr input, IntPtr output)//input float, output int { var m = (int)THWrapper.THFloatTensor_size(input, 0); var n = (int)THWrapper.THFloatTensor_size(input, 1); int l = 1 + (n - 1) / matmul.ENCODE_BIT; THWrapper.THIntTensor_resize2d(output, m, l); var test1 = THWrapper.THIntTensor_nDimension(output); var dim0 = THWrapper.THIntTensor_size(output, 0); var dim1 = THWrapper.THIntTensor_size(output, 1); //THIntTensor_resize2d(output, m, l); /*int m = input->size[0]; * int n = input->size[1]; * int l = 1 + (n - 1) / ENCODE_BIT; * * THIntTensor_resize2d(output, m, l); * float* a = THFloatTensor_data(input); * uint32_t* b = (uint32_t*)THIntTensor_data(output); */ var a = THWrapper.THFloatTensor_data(input); var b = THWrapper.THIntTensor_data(output); encode_rows_cpu_kernel(a, b, m, n); //var temp = InternalArray.FromTHIntTensor(output); }
public static void encode_cols_cpu(IntPtr input, IntPtr output)//input float, output int { /* int n = input->size[0]; * int k = input->size[1]; * int l = 1 + (n - 1) / ENCODE_BIT; * * THIntTensor_resize2d(output, l, k); * float* a = THFloatTensor_data(input); * uint32_t* b = (uint32_t*)THIntTensor_data(output); * * encode_cols_cpu_kernel(a, b, n, k);*/ var n = (int)THWrapper.THFloatTensor_size(input, 0); var k = (int)THWrapper.THFloatTensor_size(input, 1); int l = 1 + (n - 1) / matmul.ENCODE_BIT; THWrapper.THIntTensor_resize2d(output, l, k); var a = THWrapper.THFloatTensor_data(input); var b = THWrapper.THIntTensor_data(output); encode_cols_cpu_kernel(a, b, n, k); //var res1 = BitConverter.ToUInt32(BitConverter.GetBytes(THWrapper.THIntTensor_get2d(output, 0, 7)), 0); //var res2 = THWrapper.THIntTensor_get2d(output, 7, 0); }
public static void fpbinary_gemm_cpu(IntPtr a, IntPtr b, IntPtr c, int m, int nn, int k, int transb, int beta, int alpha, IntPtr alphas) { if (THWrapper.THFloatTensor_nDimension(c) != 2 || THWrapper.THFloatTensor_size(c, 0) * THWrapper.THFloatTensor_size(c, 1) < m * k) { THWrapper.THFloatTensor_resize2d(c, m, k); //THFloatTensor_resize2d(c, m, k); } /* * uint32_t* A = (uint32_t*)THIntTensor_data(a); * uint32_t* B = (uint32_t*)THIntTensor_data(b); * float* C = THFloatTensor_data(c); * float* D = THFloatTensor_data(alphas); */ var A = THWrapper.THIntTensor_data(a); var B = THWrapper.THIntTensor_data(b); var C = THWrapper.THFloatTensor_data(c); var D = THWrapper.THFloatTensor_data(alphas); var aa = InternalArray.FromTHIntTensor(a); var bb = InternalArray.FromTHIntTensor(b); var cc = InternalArray.FromTHFloatTensor(c); var dd = InternalArray.FromTHFloatTensor(alphas); int n = 1 + (nn - 1) / matmul.ENCODE_BIT; int brow = transb != 0 ? 1 : k; int bcol = transb != 0 ? n : 1; //matmul.dgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta, alpha, D); //matmul.dgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta, alpha, D); matmul.fpdgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta); if (alpha != 0) { for (int i = 0; i < m; i++) { for (int j = 0; j < k; j++) { //C[i * n + j] *= alphas[i]; var aa1 = matmul.GetFloat(C, i * k + j); short aq1 = (short)(aa1 * 256); var aa2 = matmul.GetFloat(D, i); var aq2 = (short)(aa2 * 256); var val4 = (short)((int)(aq1 * aq2) >> 8); var quant_res = val4 / 256f; var orig = aa1 * aa2; //matmul.SetFloat(C, i * k + j, aa1 * aa2); matmul.SetFloat(C, i * k + j, val4); //C[i * n + j] = (float)(C[i * n + j] * alphas[i]); } } } }
public static void THNN_unfolded_copy( IntPtr columns, IntPtr input, int kW, int kH, int dW, int dH, int padW, int padH, int nInputPlane, int inputWidth, int inputHeight, int outputWidth, int outputHeight) { // This function assumes that // kH*kW does not overflow an int // nInputPlane*kH*kW does not overflow a int64_t // outputHeight*dH does not overflow a int64_t // outputWidth*dW does not overflow a int64_t // int64_t k; var input_data = THWrapper.THFloatTensor_data(input); var columns_data = THWrapper.THFloatTensor_data(columns); for (Int64 k = 0; k < (Int64)nInputPlane * kH * kW; k++) { /* float[] dats=new float[30]; * float[] dats2=new float[30]; * for (int ii = 0; ii < 30; ii++) * { * dats[ii]= matmul.GetFloat(columns_data, ii); * } * for (int ii = 0; ii < 30; ii++) * { * dats2[ii] = matmul.GetFloat(input_data, ii); * }*/ Int64 nip = k / (kH * kW); Int64 rest = k % (kH * kW); Int64 kh = rest / kW; Int64 kw = rest % kW; int x, y; Int64 ix, iy; var sh1 = (int)(nip * (kH * kW * outputHeight * outputWidth) + kh * (kW * outputHeight * outputWidth) + kw * (outputHeight * outputWidth)); var sh2 = +(int)(nip * (inputHeight * inputWidth)); //IntPtr dst = columns_data + (int)(nip * (kH * kW * outputHeight * outputWidth) + kh * (kW * outputHeight * outputWidth) + kw * (outputHeight * outputWidth)); //IntPtr src = input_data + (int)(nip * (inputHeight * inputWidth)); IntPtr dst = columns_data + 4 * sh1; IntPtr src = input_data + 4 * sh2; // float* dst = columns_data + nip * ((size_t)kH * kW * outputHeight * outputWidth) + kh * ((size_t)kW * outputHeight * outputWidth) + kw * ((size_t)outputHeight * outputWidth); // float* src = input_data + nip * ((size_t)inputHeight * inputWidth); if (padW > 0 || padH > 0) { Int64 lpad, rpad; for (y = 0; y < outputHeight; y++) { iy = (Int64)y * dH - padH + kh; if (iy < 0 || iy >= inputHeight) { memset(dst + 4 * (y * outputWidth), 0, sizeof(float) * outputWidth); } else { if (dW == 1) { ix = 0 - padW + kw; lpad = Math.Max(0, padW - kw); rpad = Math.Max(0, padW - (kW - kw - 1)); if (outputWidth - rpad - lpad <= 0) { memset(dst + 4 * (y * outputWidth), 0, sizeof(float) * outputWidth); } else { if (lpad > 0) { memset(dst + 4 * (y * outputWidth), 0, sizeof(float) * lpad); } memcpy(dst + 4 * ((int)(y * outputWidth + lpad)), src + 4 * ((int)(iy * inputWidth + ix + lpad)), sizeof(float) * (outputWidth - rpad - lpad)); if (rpad > 0) { memset(dst + 4 * (int)(y * outputWidth + outputWidth - rpad), 0, sizeof(float) * rpad); } } } else { for (x = 0; x < outputWidth; x++) { ix = (Int64)x * dW - padW + kw; if (ix < 0 || ix >= inputWidth) { memset(dst + 4 * (y * outputWidth + x), 0, sizeof(float) * 1); } else { memcpy(dst + 4 * (y * outputWidth + x), src + 4 * (int)(iy * inputWidth + ix), sizeof(float) * (1)); } } } } } } else { for (y = 0; y < outputHeight; y++) { iy = (Int64)y * dH + kh; ix = 0 + kw; if (dW == 1) { memcpy(dst + 4 * ((int)y * outputWidth), src + 4 * ((int)(iy * inputWidth + ix)), sizeof(float) * outputWidth); /* for (int ii = 0; ii < 30; ii++) * { * dats[ii] = matmul.GetFloat(columns_data, ii); * }*/ } else { for (x = 0; x < outputWidth; x++) { memcpy(dst + 4 * (y * outputWidth + x), src + 4 * ((int)(iy * inputWidth + ix + x * dW)), sizeof(float) * (1)); } } } } } }