internal static InternalArray bin_conv2d(InternalArray input, InternalArray weight, InternalArray bias, InternalArray alpha, int[] kernel_size, int[] stride, int[] padding) { var col_tensor = THWrapper.THFloatTensor_new(); var output = THWrapper.THFloatTensor_new(); var _alpha = alpha.ToTHTensor(); var _input = input.ToTHTensor(); var _weight = weight.ToTHTensor(); IntPtr _bias; if (bias == null) { _bias = THWrapper.THFloatTensor_new(); } else { _bias = bias.ToTHTensor(); } binop.THNN_Bin_SpatialConvolutionMM_updateOutput(_input, output, _weight, _bias, col_tensor, _alpha, kernel_size[0], kernel_size[1], stride[0], stride[1], padding[0], padding[1]); THWrapper.THFloatTensor_free(col_tensor); var ret = InternalArray.FromTHFloatTensor(output); THWrapper.THFloatTensor_free(output); THWrapper.THFloatTensor_free(_bias); THWrapper.THFloatTensor_free(_input); THWrapper.THFloatTensor_free(_alpha); return(ret); }
public static void fpbinary_gemm_cpu(IntPtr a, IntPtr b, IntPtr c, int m, int nn, int k, int transb, int beta, int alpha, IntPtr alphas) { if (THWrapper.THFloatTensor_nDimension(c) != 2 || THWrapper.THFloatTensor_size(c, 0) * THWrapper.THFloatTensor_size(c, 1) < m * k) { THWrapper.THFloatTensor_resize2d(c, m, k); //THFloatTensor_resize2d(c, m, k); } /* * uint32_t* A = (uint32_t*)THIntTensor_data(a); * uint32_t* B = (uint32_t*)THIntTensor_data(b); * float* C = THFloatTensor_data(c); * float* D = THFloatTensor_data(alphas); */ var A = THWrapper.THIntTensor_data(a); var B = THWrapper.THIntTensor_data(b); var C = THWrapper.THFloatTensor_data(c); var D = THWrapper.THFloatTensor_data(alphas); var aa = InternalArray.FromTHIntTensor(a); var bb = InternalArray.FromTHIntTensor(b); var cc = InternalArray.FromTHFloatTensor(c); var dd = InternalArray.FromTHFloatTensor(alphas); int n = 1 + (nn - 1) / matmul.ENCODE_BIT; int brow = transb != 0 ? 1 : k; int bcol = transb != 0 ? n : 1; //matmul.dgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta, alpha, D); //matmul.dgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta, alpha, D); matmul.fpdgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta); if (alpha != 0) { for (int i = 0; i < m; i++) { for (int j = 0; j < k; j++) { //C[i * n + j] *= alphas[i]; var aa1 = matmul.GetFloat(C, i * k + j); short aq1 = (short)(aa1 * 256); var aa2 = matmul.GetFloat(D, i); var aq2 = (short)(aa2 * 256); var val4 = (short)((int)(aq1 * aq2) >> 8); var quant_res = val4 / 256f; var orig = aa1 * aa2; //matmul.SetFloat(C, i * k + j, aa1 * aa2); matmul.SetFloat(C, i * k + j, val4); //C[i * n + j] = (float)(C[i * n + j] * alphas[i]); } } } }
internal static InternalArray fpbin_conv2d(InternalArray input, InternalArray weight, InternalArray bias, InternalArray alpha, int[] kernel_size, int[] stride, int[] padding) { var col_tensor = THWrapper.THFloatTensor_new(); var output = THWrapper.THFloatTensor_new(); var _alpha = alpha.ToTHTensor(); var cln = new InternalArray(input.Shape); for (int i = 0; i < cln.Data.Length; i++) { cln.Data[i] = input.QIntData[i]; } var _input = cln.ToTHTensor(); var _weight = weight.ToTHTensor(); IntPtr _bias; if (bias == null) { _bias = THWrapper.THFloatTensor_new(); } else { _bias = bias.ToTHTensor(); } binop.THNN_Bin_SpatialConvolutionMM_updateOutput(_input, output, _weight, _bias, col_tensor, _alpha, kernel_size[0], kernel_size[1], stride[0], stride[1], padding[0], padding[1], true); THWrapper.THFloatTensor_free(col_tensor); var ret = InternalArray.FromTHFloatTensor(output); ret.QIntData = new short[ret.Data.Length]; for (int i = 0; i < ret.Data.Length; i++) { ret.QIntData[i] = (short)ret.Data[i]; } ret.Data = null; THWrapper.THFloatTensor_free(output); THWrapper.THFloatTensor_free(_bias); THWrapper.THFloatTensor_free(_input); THWrapper.THFloatTensor_free(_alpha); return(ret); }
internal static InternalArray fpbin_linear(InternalArray input, InternalArray weight, InternalArray bias, InternalArray alpha) { var m = input.Shape[0]; var n = input.Shape[1]; var k = weight.Shape[0]; /** * * m = input.data.shape[0] * n = input.data.shape[1] * k = weight.data.shape[0] * out_tensor = torch.FloatTensor() * bin_input = torch.IntTensor() * use_cuda = input.is_cuda * binop.encode_rows_cpu(input.data, bin_input) * binop.binary_gemm_cpu(bin_input, weight.data, output.data, m, n, k, 1, 0, 0, alpha.data) * output.data.mul_(alpha.data.t().expand(output.shape)) * if bias is not None: * output.data.add_(bias.data.expand(output.shape)) * return output * */ //InternalArray output = new InternalArray(new int[] { }); var cln = new InternalArray(input.Shape); for (int i = 0; i < cln.Data.Length; i++) { cln.Data[i] = input.QIntData[i]; } var _input = cln.ToTHTensor(); var bin_input = THWrapper.THIntTensor_new(); encode_rows_cpu(_input, bin_input); //var temp = InternalArray.FromTHIntTensor(bin_input); var _alpha = alpha.ToTHTensor(); //var _bin_input = bin_input.ToTHTensor(); var _weight = weight.ToTHTensor(); var _output = THWrapper.THFloatTensor_new(); binop.fpbinary_gemm_cpu(bin_input, _weight, _output, m, n, k, 1, 0, 0, _alpha); var temp2 = InternalArray.FromTHFloatTensor(_output); THWrapper.THFloatTensor_free(_input); THWrapper.THIntTensor_free(bin_input); //var tt = alpha.ToTHTensor(); var ttt = alpha.Transpose2D(); ttt.QIntData = new short[ttt.Data.Length]; for (int i = 0; i < ttt.Data.Length; i++) { ttt.QIntData[i] = (short)(ttt.Data[i] * 256); } ttt.Data = null; //var newt=THWrapper.THFloatTensor_newTranspose(tt, 0, 1); /*output.data.mul_(alpha.data.t().expand(output.shape)) */ if (bias != null) { throw new NotImplementedException(); /* * if bias is not None: * output.data.add_(bias.data.expand(output.shape))*/ } var output = InternalArray.FromTHFloatTensor(_output); output.QIntData = new short[output.Data.Length]; for (int i = 0; i < output.QIntData.Length; i++) { output.QIntData[i] = (short)(output.Data[i] * 256); } output.Data = null; for (int i = 0; i < ttt.QIntData.Length; i++) { var val4 = (short)((int)(output.QIntData[i] * ttt.QIntData[i]) >> 8); //output.QIntData[i] = output.Data[i] * ttt.Data[i]; output.QIntData[i] = val4; } THWrapper.THFloatTensor_free(_output); return(output); }
internal static InternalArray bin_linear(InternalArray input, InternalArray weight, InternalArray bias, InternalArray alpha) { var m = input.Shape[0]; var n = input.Shape[1]; var k = weight.Shape[0]; /** * * m = input.data.shape[0] * n = input.data.shape[1] * k = weight.data.shape[0] * out_tensor = torch.FloatTensor() * bin_input = torch.IntTensor() * use_cuda = input.is_cuda * binop.encode_rows_cpu(input.data, bin_input) * binop.binary_gemm_cpu(bin_input, weight.data, output.data, m, n, k, 1, 0, 0, alpha.data) * output.data.mul_(alpha.data.t().expand(output.shape)) * if bias is not None: * output.data.add_(bias.data.expand(output.shape)) * return output * */ //InternalArray output = new InternalArray(new int[] { }); var _input = input.ToTHTensor(); var bin_input = THWrapper.THIntTensor_new(); encode_rows_cpu(_input, bin_input); var temp = InternalArray.FromTHIntTensor(bin_input); var _alpha = alpha.ToTHTensor(); //var _bin_input = bin_input.ToTHTensor(); var _weight = weight.ToTHTensor(); var _output = THWrapper.THFloatTensor_new(); binop.binary_gemm_cpu(bin_input, _weight, _output, m, n, k, 1, 0, 0, _alpha); var temp2 = InternalArray.FromTHFloatTensor(_output); THWrapper.THFloatTensor_free(_input); THWrapper.THIntTensor_free(bin_input); //var tt = alpha.ToTHTensor(); var ttt = alpha.Transpose2D(); //var newt=THWrapper.THFloatTensor_newTranspose(tt, 0, 1); /*output.data.mul_(alpha.data.t().expand(output.shape)) */ if (bias != null) {/* * if bias is not None: * output.data.add_(bias.data.expand(output.shape))*/ } var output = InternalArray.FromTHFloatTensor(_output); for (int i = 0; i < ttt.Data.Length; i++) { output.Data[i] *= ttt.Data[i]; } THWrapper.THFloatTensor_free(_output); return(output); }