예제 #1
0
        public static void ConjugateGradient(FloatResidentArray X, SparseMatrix A, FloatResidentArray B, int maxiter, float eps)
        {
            int N = (int)B.Count;
            FloatResidentArray R  = new FloatResidentArray(N);
            FloatResidentArray P  = new FloatResidentArray(N);
            FloatResidentArray AP = new FloatResidentArray(N);

            A.RefreshDevice();
            X.RefreshDevice();
            B.RefreshDevice();

            wrapper.Fmsub(R, B, A, X, N);                       // R = B - A*X
            wrapper.Copy(P, R, N);
            int k = 0;

            while (k < maxiter)
            {
                wrapper.Multiply(AP, A, P, N);                  // AP = A*P
                float r     = ScalarProd(R, R, N);              // save <R|R>
                float alpha = r / ScalarProd(P, AP, N);         // alpha = <R|R> / <P|AP>
                wrapper.Saxpy(X, X, alpha, P, N);               // X = X - alpha*P
                wrapper.Saxpy(R, R, -alpha, AP, N);             // RR = R-alpha*AP
                float rr = ScalarProd(R, R, N);
                if (rr < eps * eps)
                {
                    break;
                }

                float beta = rr / r;
                wrapper.Saxpy(P, R, beta, P, N);                // P = R + beta*P
                ++k;
            }

            X.RefreshHost();
        }
        static void Main(string[] args)
        {
            const int          N   = 1024 * 1024 * 32;
            FloatResidentArray arr = new FloatResidentArray(N);

            float[] res = new float[1];
            for (int i = 0; i < N; ++i)
            {
                arr[i] = 1.0F;
            }

            arr.RefreshDevice();
            var            runner = HybRunner.Cuda();
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            runner.SetDistrib(16 * prop.multiProcessorCount, 1, 128, 1, 1, 128 * sizeof(float));
            var wrapped = runner.Wrap(new Program());

            runner.saveAssembly();
            cuda.ERROR_CHECK((cudaError_t)(int)wrapped.Total(arr, N, res));
            cuda.ERROR_CHECK(cuda.DeviceSynchronize());
            Console.WriteLine(res[0]);
        }
예제 #3
0
 public void RefreshDevice()
 {
     indices.RefreshDevice();
     data.RefreshDevice();
     rows.RefreshDevice();
 }