예제 #1
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                args = new string[] { "512", "512", "512", "512" };
            }
            const int redo = 10;

            int heightA = Convert.ToInt32(args[0]);
            int widthA  = Convert.ToInt32(args[1]);
            int heightB = Convert.ToInt32(args[2]);
            int widthB  = Convert.ToInt32(args[3]);

            if (widthA != heightB)
            {
                throw new ArgumentException("invalid data -- incompatible matrices");
            }

            Console.WriteLine("Execution Naive matrix mul with sizes ({0}, {1}) x ({2}, {3})", heightA, widthA, heightB, widthB);

            NaiveMatrix matrixA = new NaiveMatrix(widthA, heightA);
            NaiveMatrix matrixB = new NaiveMatrix(widthB, heightB);

            NaiveMatrix res_net  = new NaiveMatrix(widthB, heightA);
            NaiveMatrix res_cuda = new NaiveMatrix(widthB, heightA);

            double numberCompute = ((double)matrixA.Height * (double)matrixA.Width * (double)matrixB.Width) * 3.0E-9;

            matrixA.FillMatrix();
            matrixB.FillMatrix();

            Random rand = new Random();

            #region CUDA

            HybRunner runner  = HybRunner.Cuda().SetDistrib(4, 5, 8, 32, 32, 0);
            dynamic   wrapper = runner.Wrap(new Program());

            for (int i = 0; i < redo; ++i)
            {
                wrapper.ComputeRowsOfProduct(res_cuda, matrixA, matrixB, 0, res_cuda.Height);
            }
            #endregion

            #region C#

            for (int i = 0; i < redo; ++i)
            {
                Parallel.For(0, res_net.Height, (line) =>
                {
                    ComputeRowsOfProduct(res_net, matrixA, matrixB, line, line + 1);
                });
            }
            #endregion

            Console.Out.WriteLine("DONE");
        }
예제 #2
0
 public static void displayMatrix(NaiveMatrix M)
 {
     for (int i = 0; i < M.Width; ++i)
     {
         for (int j = 0; j < M.Width; ++j)
         {
             Console.Write(M[i * M.Width + j] + ", ");
         }
         Console.WriteLine();
     }
 }
예제 #3
0
 public static void reference(NaiveMatrix A, NaiveMatrix B, NaiveMatrix res, int N)
 {
     for (int i = 0; i < N; ++i)
     {
         for (int j = 0; j < N; ++j)
         {
             float tmp = 0.0F;
             for (int k = 0; k < N; ++k)
             {
                 tmp += A.Values[i * N + k] * B.Values[k * N + j];
             }
             res.Values[i * N + j] = tmp;
         }
     }
 }
예제 #4
0
 public static void Reference(NaiveMatrix result, NaiveMatrix A, NaiveMatrix B)
 {
     Parallel.For(0, A.Height, (i) =>
     {
         for (int j = 0; j < B.Width; ++j)
         {
             float accum = 0.0F;
             for (int k = 0; k < A.Width; ++k)
             {
                 accum += A[A.Width * i + k] * B[B.Width * k + j];
             }
             result[B.Width * i + j] = accum;
         }
     });
 }
예제 #5
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                args = new string[] { "512", "512", "512", "512" };
            }
            const int redo = 10;

            int heightA = Convert.ToInt32(args[0]);
            int widthA  = Convert.ToInt32(args[1]);
            int heightB = Convert.ToInt32(args[2]);
            int widthB  = Convert.ToInt32(args[3]);

            if (widthA != heightB)
            {
                throw new ArgumentException("invalid data -- incompatible matrices");
            }

            Console.WriteLine("Execution Naive matrix mul with sizes ({0}, {1}) x ({2}, {3})", heightA, widthA, heightB, widthB);

            NaiveMatrix matrixA = new NaiveMatrix(widthA, heightA);
            NaiveMatrix matrixB = new NaiveMatrix(widthB, heightB);

            NaiveMatrix res_net  = new NaiveMatrix(widthB, heightA);
            NaiveMatrix res_cuda = new NaiveMatrix(widthB, heightA);

            double numberCompute = ((double)matrixA.Height * (double)matrixA.Width * (double)matrixB.Width) * 3.0E-9;

            matrixA.FillMatrix();
            matrixB.FillMatrix();

            #region CUDA

            HybRunner runner  = HybRunner.Cuda("SharedMatrix_CUDA.dll").SetDistrib(4, 5, 32, 32, 1, 1024 * 2 * 8);
            dynamic   wrapper = runner.Wrap(new Program());

            for (int i = 0; i < redo; ++i)
            {
                wrapper.Multiply(res_cuda, matrixA, matrixB, matrixA.Width);
            }
            #endregion

            #region C#
            Reference(res_net, matrixA, matrixB);
            #endregion

            Console.Out.WriteLine("DONE");
        }
예제 #6
0
        public static void ComputeRowsOfProduct(NaiveMatrix resultMatrix, NaiveMatrix matrixA, NaiveMatrix matrixB, int lineFrom, int lineTo)
        {
            int commonSize = matrixA.Width;
            int bWidth     = matrixB.Width;

            for (int i = lineFrom + threadIdx.y + blockIdx.y * blockDim.y; i < lineTo; i += blockDim.y * gridDim.y)
            {
                for (int j = threadIdx.x + blockIdx.x * blockDim.x; j < bWidth; j += blockDim.x * gridDim.x)
                {
                    resultMatrix[i * bWidth + j] = 0.0f;

                    for (int k = 0; k < commonSize; ++k)
                    {
                        resultMatrix[i * bWidth + j] += (matrixA[i * commonSize + k] * matrixB[k * bWidth + j]);
                    }
                }
            }
        }
예제 #7
0
        static void Main(string[] args)
        {
            const int N = 1024;

            Console.WriteLine("Execution cublas matrix mul with sizes ({0}, {1}) x ({2}, {3})", N, N, N, N);

            NaiveMatrix matrixA = new NaiveMatrix(N, N);
            NaiveMatrix matrixB = new NaiveMatrix(N, N);

            NaiveMatrix res     = new NaiveMatrix(N, N);
            NaiveMatrix res_net = new NaiveMatrix(N, N);

            matrixA.FillMatrix();
            matrixB.FillMatrix();

            float alpha = 1.0f;
            float beta  = 0.0f;

            cublas         cublas = new cublas();
            cublasHandle_t handle;

            cublas.Create(out handle);

            cublasOperation_t transA = cublasOperation_t.CUBLAS_OP_N;
            cublasOperation_t transB = cublasOperation_t.CUBLAS_OP_N;

            cublasSgemm(handle, transA, transB, N, N, N, &alpha, matrixA.Values, N, matrixB.Values, N, &beta, res.Values, N);

            cublas.Destroy(handle);

            reference(matrixA, matrixB, res_net, N);

            for (int i = 0; i < N * N; ++i)
            {
                if (Math.Abs(res[i] - res_net[i]) >= 1.0E-3)
                {
                    Console.WriteLine("Error at {0}, expected {1}, got {2}", i, res_net[i], res[i]);
                    Environment.Exit(1);
                }
            }

            Console.Out.WriteLine("DONE");
        }
예제 #8
0
        public static void Multiply(NaiveMatrix result, NaiveMatrix A, NaiveMatrix B, int size)
        {
            SharedMemoryAllocator <float> allocator = new SharedMemoryAllocator <float>();

            float[] cacheA = allocator.allocate(blockDim.y * blockDim.x);
            float[] cacheB = allocator.allocate(blockDim.y * blockDim.x);

            for (int by = blockIdx.y; by < size / blockDim.y; by += gridDim.y)
            {
                for (int bx = blockIdx.x; bx < size / blockDim.x; bx += gridDim.x)
                {
                    int tx = threadIdx.x, ty = threadIdx.y;

                    int i = by * blockDim.y + ty;
                    int j = bx * blockDim.x + tx;

                    if (i >= size || j >= size)
                    {
                        return;
                    }

                    float Pvalue = 0;
                    for (int blockIdread = 0; blockIdread < size / blockDim.x; ++blockIdread)
                    {
                        cacheA[ty * blockDim.y + tx] = A[i * size + (blockIdread * blockDim.x + tx)];
                        cacheB[ty * blockDim.y + tx] = B[(blockIdread * blockDim.x + ty) * size + j];

                        SyncThreads();

                        for (int k = 0; k < blockDim.x; ++k)
                        {
                            Pvalue += cacheA[ty * blockDim.x + k] * cacheB[k * blockDim.x + tx];
                        }

                        SyncThreads();
                    }

                    result[i * size + j] = Pvalue;
                }
            }
        }