예제 #1
0
파일: xnor.cs 프로젝트: fel88/Xnor
        public static void encode_rows_cpu(IntPtr input, IntPtr output)//input float, output int
        {
            var m = (int)THWrapper.THFloatTensor_size(input, 0);
            var n = (int)THWrapper.THFloatTensor_size(input, 1);

            int l = 1 + (n - 1) / matmul.ENCODE_BIT;

            THWrapper.THIntTensor_resize2d(output, m, l);
            var test1 = THWrapper.THIntTensor_nDimension(output);
            var dim0  = THWrapper.THIntTensor_size(output, 0);
            var dim1  = THWrapper.THIntTensor_size(output, 1);
            //THIntTensor_resize2d(output, m, l);

            /*int m = input->size[0];
             * int n = input->size[1];
             * int l = 1 + (n - 1) / ENCODE_BIT;
             *
             * THIntTensor_resize2d(output, m, l);
             * float* a = THFloatTensor_data(input);
             * uint32_t* b = (uint32_t*)THIntTensor_data(output);
             */

            var a = THWrapper.THFloatTensor_data(input);
            var b = THWrapper.THIntTensor_data(output);

            encode_rows_cpu_kernel(a, b, m, n);
            //var temp = InternalArray.FromTHIntTensor(output);
        }
예제 #2
0
파일: xnor.cs 프로젝트: fel88/Xnor
        public static void encode_cols_cpu(IntPtr input, IntPtr output)//input float, output int
        {
            /*   int n = input->size[0];
             * int k = input->size[1];
             * int l = 1 + (n - 1) / ENCODE_BIT;
             *
             * THIntTensor_resize2d(output, l, k);
             * float* a = THFloatTensor_data(input);
             * uint32_t* b = (uint32_t*)THIntTensor_data(output);
             *
             * encode_cols_cpu_kernel(a, b, n, k);*/

            var n = (int)THWrapper.THFloatTensor_size(input, 0);
            var k = (int)THWrapper.THFloatTensor_size(input, 1);

            int l = 1 + (n - 1) / matmul.ENCODE_BIT;

            THWrapper.THIntTensor_resize2d(output, l, k);

            var a = THWrapper.THFloatTensor_data(input);
            var b = THWrapper.THIntTensor_data(output);

            encode_cols_cpu_kernel(a, b, n, k);

            //var res1 = BitConverter.ToUInt32(BitConverter.GetBytes(THWrapper.THIntTensor_get2d(output, 0, 7)), 0);
            //var res2 = THWrapper.THIntTensor_get2d(output, 7, 0);
        }
예제 #3
0
파일: xnor.cs 프로젝트: fel88/Xnor
        public static void fpbinary_gemm_cpu(IntPtr a, IntPtr b, IntPtr c, int m, int nn, int k, int transb, int beta, int alpha, IntPtr alphas)
        {
            if (THWrapper.THFloatTensor_nDimension(c) != 2 || THWrapper.THFloatTensor_size(c, 0) * THWrapper.THFloatTensor_size(c, 1) < m * k)
            {
                THWrapper.THFloatTensor_resize2d(c, m, k);
                //THFloatTensor_resize2d(c, m, k);
            }

            /*
             * uint32_t* A = (uint32_t*)THIntTensor_data(a);
             * uint32_t* B = (uint32_t*)THIntTensor_data(b);
             * float* C = THFloatTensor_data(c);
             * float* D = THFloatTensor_data(alphas);
             */
            var A = THWrapper.THIntTensor_data(a);
            var B = THWrapper.THIntTensor_data(b);
            var C = THWrapper.THFloatTensor_data(c);
            var D = THWrapper.THFloatTensor_data(alphas);

            var aa = InternalArray.FromTHIntTensor(a);
            var bb = InternalArray.FromTHIntTensor(b);
            var cc = InternalArray.FromTHFloatTensor(c);
            var dd = InternalArray.FromTHFloatTensor(alphas);

            int n    = 1 + (nn - 1) / matmul.ENCODE_BIT;
            int brow = transb != 0 ? 1 : k;
            int bcol = transb != 0 ? n : 1;

            //matmul.dgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta, alpha, D);
            //matmul.dgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta, alpha, D);
            matmul.fpdgemm_nn(m, k, nn, A, n, 1, B, brow, bcol, C, k, 1, beta);

            if (alpha != 0)
            {
                for (int i = 0; i < m; i++)
                {
                    for (int j = 0; j < k; j++)
                    {
                        //C[i * n + j] *= alphas[i];
                        var   aa1 = matmul.GetFloat(C, i * k + j);
                        short aq1 = (short)(aa1 * 256);

                        var aa2 = matmul.GetFloat(D, i);
                        var aq2 = (short)(aa2 * 256);

                        var val4      = (short)((int)(aq1 * aq2) >> 8);
                        var quant_res = val4 / 256f;
                        var orig      = aa1 * aa2;
                        //matmul.SetFloat(C, i * k + j, aa1 * aa2);

                        matmul.SetFloat(C, i * k + j, val4);

                        //C[i * n + j] = (float)(C[i * n + j] * alphas[i]);
                    }
                }
            }
        }
예제 #4
0
파일: xnor.cs 프로젝트: fel88/Xnor
        public static void THNN_unfolded_copy(
            IntPtr columns,
            IntPtr input,
            int kW, int kH,
            int dW, int dH,
            int padW, int padH,
            int nInputPlane,
            int inputWidth, int inputHeight,
            int outputWidth, int outputHeight)
        {
            // This function assumes that
            // kH*kW does not overflow an int
            // nInputPlane*kH*kW does not overflow a int64_t
            // outputHeight*dH does not overflow a int64_t
            // outputWidth*dW does not overflow a int64_t

            //            int64_t k;
            var input_data   = THWrapper.THFloatTensor_data(input);
            var columns_data = THWrapper.THFloatTensor_data(columns);

            for (Int64 k = 0; k < (Int64)nInputPlane * kH * kW; k++)
            {
                /*  float[] dats=new float[30];
                 * float[] dats2=new float[30];
                 * for (int ii = 0; ii < 30; ii++)
                 * {
                 *    dats[ii]= matmul.GetFloat(columns_data, ii);
                 * }
                 * for (int ii = 0; ii < 30; ii++)
                 * {
                 *    dats2[ii] = matmul.GetFloat(input_data, ii);
                 * }*/
                Int64 nip = k / (kH * kW);
                Int64 rest = k % (kH * kW);
                Int64 kh = rest / kW;
                Int64 kw = rest % kW;
                int   x, y;
                Int64 ix, iy;
                var   sh1 = (int)(nip * (kH * kW * outputHeight * outputWidth) + kh * (kW * outputHeight * outputWidth) + kw * (outputHeight * outputWidth));
                var   sh2 = +(int)(nip * (inputHeight * inputWidth));
                //IntPtr dst = columns_data + (int)(nip * (kH * kW * outputHeight * outputWidth) + kh * (kW * outputHeight * outputWidth) + kw * (outputHeight * outputWidth));
                //IntPtr src = input_data + (int)(nip * (inputHeight * inputWidth));
                IntPtr dst = columns_data + 4 * sh1;
                IntPtr src = input_data + 4 * sh2;
                //                float* dst = columns_data + nip * ((size_t)kH * kW * outputHeight * outputWidth) + kh * ((size_t)kW * outputHeight * outputWidth) + kw * ((size_t)outputHeight * outputWidth);
                //                float* src = input_data + nip * ((size_t)inputHeight * inputWidth);
                if (padW > 0 || padH > 0)
                {
                    Int64 lpad, rpad;
                    for (y = 0; y < outputHeight; y++)
                    {
                        iy = (Int64)y * dH - padH + kh;
                        if (iy < 0 || iy >= inputHeight)
                        {
                            memset(dst + 4 * (y * outputWidth), 0, sizeof(float) * outputWidth);
                        }
                        else
                        {
                            if (dW == 1)
                            {
                                ix   = 0 - padW + kw;
                                lpad = Math.Max(0, padW - kw);
                                rpad = Math.Max(0, padW - (kW - kw - 1));
                                if (outputWidth - rpad - lpad <= 0)
                                {
                                    memset(dst + 4 * (y * outputWidth), 0, sizeof(float) * outputWidth);
                                }
                                else
                                {
                                    if (lpad > 0)
                                    {
                                        memset(dst + 4 * (y * outputWidth), 0, sizeof(float) * lpad);
                                    }
                                    memcpy(dst + 4 * ((int)(y * outputWidth + lpad)), src + 4 * ((int)(iy * inputWidth + ix + lpad)), sizeof(float) * (outputWidth - rpad - lpad));
                                    if (rpad > 0)
                                    {
                                        memset(dst + 4 * (int)(y * outputWidth + outputWidth - rpad), 0, sizeof(float) * rpad);
                                    }
                                }
                            }
                            else
                            {
                                for (x = 0; x < outputWidth; x++)
                                {
                                    ix = (Int64)x * dW - padW + kw;
                                    if (ix < 0 || ix >= inputWidth)
                                    {
                                        memset(dst + 4 * (y * outputWidth + x), 0, sizeof(float) * 1);
                                    }
                                    else
                                    {
                                        memcpy(dst + 4 * (y * outputWidth + x), src + 4 * (int)(iy * inputWidth + ix), sizeof(float) * (1));
                                    }
                                }
                            }
                        }
                    }
                }
                else
                {
                    for (y = 0; y < outputHeight; y++)
                    {
                        iy = (Int64)y * dH + kh;
                        ix = 0 + kw;
                        if (dW == 1)
                        {
                            memcpy(dst + 4 * ((int)y * outputWidth), src + 4 * ((int)(iy * inputWidth + ix)), sizeof(float) * outputWidth);

                            /* for (int ii = 0; ii < 30; ii++)
                             * {
                             *   dats[ii] = matmul.GetFloat(columns_data, ii);
                             * }*/
                        }
                        else
                        {
                            for (x = 0; x < outputWidth; x++)
                            {
                                memcpy(dst + 4 * (y * outputWidth + x), src + 4 * ((int)(iy * inputWidth + ix + x * dW)), sizeof(float) * (1));
                            }
                        }
                    }
                }
            }
        }