예제 #1
0
        public void device_grid_init()
        {
            int status = opcuda_cublas_init();
            if (status != 0) throw new ExecutionEngineException();

            this._device_invm = new CArray(grid.host_d_invm.Length * sizeof(float),
                                                EType.float_t, EMemorySpace.device, this, "_device_invm");
            float[] sinvm = new float[grid.host_d_invm.Length];
            for (int j = 0; j < grid.host_d_invm.Length; j++)
            {
                sinvm[j] = (float)grid.host_d_invm[j];
            }
            CArray.copy(ref _device_invm, sinvm);
        }
예제 #2
0
파일: CArray.cs 프로젝트: abisani/OPLib
 public CArray(CArray buf, uint offset_in_bytes, int sz, CModel model, string name)
 {
     if (buf == null) throw new System.Exception();
     if (sz > buf._sz - offset_in_bytes) throw new System.Exception();
     this._ptr = buf._ptr + offset_in_bytes;
     this._length = 0;
     this._sz = sz;
     this._side = buf._side;
     _model = model;
     _suballocated = false;
     _number_of_internal_buffers = 0;
     _internal_buffer_pointers = null;
     _name = name;
 }
예제 #3
0
 public void device_sdot2(CArray a, CArray b, ref CArray c, int m, int n)
 {
     if (a.length != m * n) throw new System.Exception();
     if (b.length != m * n) throw new System.Exception();
     if (c.length != m) throw new System.Exception();
     opcuda_sdot2(a.ptr, b.ptr, c.ptr, m, n);
     _gpu_nflops += (double)m * n;
 }
예제 #4
0
        public void device_mc_run1f(double[] payoff_a, CMCEvaluator evaluator)
        {
            if (mcplan._nscen_per_batch % 4096 != 0) throw new System.Exception();
            CArray device_y_sk = new CArray(mcplan.nk * mcplan._nscen_per_batch, EType.short_t, EMemorySpace.device, this, "_device_volatile_buf");

            for (int b = 0; b < mcplan._nbatches; b++)
            {
                unsafe
                {
                    fixed (short* yhost_sk = &mcbuf.host_y_sk_th[0])
                    {
                        int status = opcuda_mc1f(mcbuf._device_rgstatus.ptr, grid.y0,
                                                   device_y_sk.ptr,
                                                   mcplan._nscen_per_batch, mcplan.nk, grid.d,
                                                   mcbuf._device_ctpk_yy_m.ptr, mcbuf._device_m_k.ptr,
                                                   (uint)yhost_sk);

                        if (status > 0) throw new System.Exception();
                        device_thread_synchronize();

                        evaluator.eval(yhost_sk, 0, b);
                    }
                }
            }
        }
예제 #5
0
파일: CArray.cs 프로젝트: abisani/OPLib
 public static void subfree(ref CArray target, CArray buf)
 {
     if (target == null) throw new System.Exception();
     if (buf == null) throw new System.Exception();
     if (target.ptr + target.sz != buf.ptr + buf._offset) throw new System.Exception();
     if (target.ptr != buf._internal_buffer_pointers[buf._number_of_internal_buffers - 1])
         throw new System.Exception();
     buf._offset -= target._sz;
     buf._internal_buffer_pointers[--buf._number_of_internal_buffers] = -1;
     buf._internal_buffer_names[buf._number_of_internal_buffers] = null;
     target = null;
 }
예제 #6
0
파일: CArray.cs 프로젝트: abisani/OPLib
        public static void suballoc(ref CArray target, CArray buf, int n, EType type, EMemorySpace side, string name)
        {
            if (target != null) throw new System.Exception();
            if (buf._internal_buffer_pointers == null)
            {
                buf._internal_buffer_pointers = new int[1000];
                for (int i = 0; i < 1000; i++)
                {
                    buf._internal_buffer_pointers[i] = -1;
                }
            }

            if (buf._internal_buffer_names == null)
            {
                buf._internal_buffer_names = new string[1000];
                for (int i = 0; i < 1000; i++)
                {
                    buf._internal_buffer_names[i] = null;
                }
            }

            if (buf == null) throw new System.Exception();
            target = new CArray(buf.model);
            target._sz = round(n, type) * sizeof_type(type);
            if (buf.sz < buf._offset + target._sz) throw new System.Exception();
            target._suballocated = true;
            target._ptr = (uint)(buf._ptr + buf._offset);
            target._name = name;
            buf._internal_buffer_pointers[buf._number_of_internal_buffers] = (int)target._ptr;
            buf._internal_buffer_names[buf._number_of_internal_buffers] = target._name;
            buf._number_of_internal_buffers += 1;
            target._side = side;
            target._type = type;
            target._length = n;
            target._name = name;
            buf._offset += target._sz;
            if (target.ptr - (target.ptr / 128) * 128 > 0) throw new System.Exception();
        }
예제 #7
0
파일: CArray.cs 프로젝트: abisani/OPLib
 public static void setone(ref CArray buf, double c, int i)
 {
     if (buf.side == EMemorySpace.device)
     {
         opcuda_ssetone(buf.ptr, buf.length, (float)c, 1);
     }
     else
     {
         unsafe
         {
             float* bufp = (float*)buf.ptr;
             float cf = (float)c;
             bufp[i] = cf;
         }
     }
 }
예제 #8
0
        public void make_mc_plan(int nscen_per_batch, int nbatches, DateTime[] t_k)
        {
            _mcbuf = new SMCBuffers();
            _mcplan = new SMCPlan();
            _mcplan._nscen_per_batch = nscen_per_batch;
            _mcplan._nbatches = nbatches;
            _mcplan._nscen = _mcplan._nscen_per_batch * _mcplan._nbatches;

            if (grid.fpu == EFloatingPointUnit.device)
            {
                _device_y0 = new CArray(2, EType.short_t, EMemorySpace.device, this, "_device_y0");
                short[] host_y0 = new short[1];
                host_y0[0] = (short)grid.y0;
                CArray.copy(ref _device_y0, host_y0);
            }

            make_mc_plan(t_k);
        }
예제 #9
0
        public int device_mc_init()
        {
            FileStream stream = new FileStream("MersenneTwister.dat", FileMode.Open, FileAccess.Read);

            byte[] MT = new byte[stream.Length];
            stream.Read(MT, 0, (int) stream.Length);

            unsafe
            {
                fixed (byte* MTp = &MT[0])
                {
                    int status = opcuda_mc_load_mt_gpu(MTp, stream.Length);
                    if(status !=0) throw new System.Exception();
                }
            }

            Random rand = new Random();
            int nrng = opcuda_mc_nrng();

            CArray host_seed_rg = new CArray(nrng, EType.int_t, EMemorySpace.host, this, "host_seed_rg");
            unsafe
            {
                int* seed_rg = (int*)host_seed_rg.hptr;
                for (int rg = 0; rg < nrng; rg++)
                {
                    seed_rg[rg] = (int)(rand.NextDouble() * int.MaxValue);
                }
            }

            mcbuf._device_rgstatus = new CArray(opcuda_mc_status_sz(), EType.int_t, EMemorySpace.device, this, "mcbuf._device_rgstatus");

            unsafe
            {
                opcuda_mc_setseed(host_seed_rg.hptr, mcbuf._device_rgstatus.ptr);
            }
            return 0;
        }
예제 #10
0
파일: CArray.cs 프로젝트: abisani/OPLib
        public static void copy(ref CArray destination, short[] source)
        {
            if (destination.type == EType.short_t)
            {
                if (destination.length < source.Length) destination.alloc(source.Length, EType.short_t, EMemorySpace.device, destination._model);
                destination._length = source.Length;

                unsafe
                {
                    fixed (short* sourcep = &source[0])
                    {
                        opcuda_memcpy_h2d(destination.ptr, (IntPtr)sourcep, (uint)(source.Length * sizeof(short)));
                    }
                }

                int status = opcuda_get_status();
                if (status != 0) throw new System.Exception();

                return;
            }

            throw new System.Exception();
        }
예제 #11
0
파일: CArray.cs 프로젝트: abisani/OPLib
        public static void copy(ref double[] destination, CArray source, CArray buf)
        {
            if (source.side == EMemorySpace.device)
            {
                if (EMemorySpace.device != source.side) throw new System.Exception();
                if (EMemorySpace.host != buf.side) throw new System.Exception();
            }

            if (source == null)
            {
                destination = null;
                return;
            }

            if (source.length == 0)
            {
                destination = new double[0];
                return;
            }

            if (source.type == EType.double_t)
            {
                alloc(ref destination, source.length);
                unsafe
                {
                    fixed (double* destinationp = &destination[0])
                    {
                        opcuda_memcpy_h2d(source.ptr, (IntPtr)destinationp, (uint)(source.length * CArray.sizeof_type(source.type)));
                    }
                }
                int status = opcuda_get_status();
                if (status != 0) throw new System.Exception();
                return;
            }

            if (source.type == EType.float_t)
            {
                if (buf == null) throw new System.Exception();
                if (buf.type != EType.float_t) throw new System.Exception();
                if (buf.length < source.length) throw new System.Exception();

                alloc(ref destination, source.length);

                unsafe
                {
                    opcuda_memcpy_d2h(source.ptr, buf.hptr, (uint)(source.length * CArray.sizeof_type(source.type)));
                    float* bufp = (float*)buf.ptr;
                    for (int a = 0; a < source.length; a++)
                    {
                        destination[a] = (double)bufp[a];
                    }
                }
                int status = opcuda_get_status();
                if (status != 0) throw new System.Exception();
                return;
            }

            throw new System.Exception();
        }
예제 #12
0
파일: CArray.cs 프로젝트: abisani/OPLib
        public static void copy(ref int[] destination, CArray source)
        {
            if (source.type == EType.int_t)
            {
                if (destination.Length < source.length) destination = new int[source.length];

                unsafe
                {
                    fixed (int* destinationp = &destination[0])
                    {
                        opcuda_memcpy_d2h(source.ptr, (IntPtr)destinationp, (uint)(source.Size_of_one * source.length));
                    }
                }
                int status = opcuda_get_status();
                if (status != 0) throw new System.Exception();
                return;
            }
            throw new System.Exception();
        }
예제 #13
0
파일: CArray.cs 프로젝트: abisani/OPLib
        public static void copy(ref CArray destination, CArray source)
        {
            if (source._model != destination._model) throw new System.Exception();

            destination._type = source.type;
            destination.alloc(source.length, source.type, destination.side, source._model);

            if (source.side == EMemorySpace.device && destination.side == EMemorySpace.host)
            {
                opcuda_memcpy_d2h(source.ptr, destination.hptr, (uint)(source.Size_of_one * source.length));
            }
            if (source.side == EMemorySpace.device && destination.side == EMemorySpace.device)
            {
                if (source.Size_of_one == 4)
                {
                    opcuda_scopy1(destination.ptr, source.ptr, (uint)(source.length));
                }
                else
                {
                    opcuda_memcpy_d2d(destination.ptr, source.ptr, (uint)(source.Size_of_one * source.length));
                }
            }
            if (source.side == EMemorySpace.host && destination.side == EMemorySpace.device)
            {
                opcuda_memcpy_h2d(destination.ptr, source.hptr, (uint)(source.Size_of_one * source.length));
            }
            if (source.side == EMemorySpace.host && destination.side == EMemorySpace.host)
            {
                throw new System.Exception();
            }

            int status = opcuda_get_status();
            if (status != 0) throw new System.Exception();
        }
예제 #14
0
파일: CArray.cs 프로젝트: abisani/OPLib
        public static void alloc(ref CArray buf, int n, EType type, EMemorySpace side, CModel model, string name)
        {
            if (buf == null)
            {
                buf = new CArray(n, type, side, model, name);
                return;
            }
            if (buf._suballocated) throw new System.Exception();

            if (buf.sz < n * sizeof_type(type))
            {
                if (model != buf._model) throw new System.Exception();
                buf = new CArray(n, type, side, model, name);
                return;
            }
            if (model != buf._model) throw new System.Exception();
        }
예제 #15
0
파일: Program.cs 프로젝트: abisani/OPLib
        public static void run_benchmark_gpu_mt(uint dev)
        {
            Console.WriteLine("running the Mersenne Twister benchmark on device " + dev);

            int nscen_per_batch = 4096 * 250;
            int nbatches = 200;

            int status = opcuda_cublas_init();
            if (status != 0) throw new ExecutionEngineException();
            opcuda_set_device(dev);

            FileStream stream;
            try
            {
                stream = new FileStream("MersenneTwister.dat", FileMode.Open, FileAccess.Read);
            }
            catch
            {
                Console.WriteLine("device_mc_init() failed on device " + dev + ". Aborting");
                return;
            }

            byte[] MT = new byte[stream.Length];
            stream.Read(MT, 0, (int)stream.Length);

            unsafe
            {
                fixed (byte* MTp = &MT[0])
                {
                    status = opcuda_mc_load_mt_gpu(MTp, stream.Length);
                    if (status != 0) throw new System.Exception();
                }
            }

            Random rand = new Random();
            int nrng = opcuda_mc_nrng();

            CArray host_seed_rg = new CArray(nrng, EType.int_t, EMemorySpace.host, null, "host_seed_rg");
            unsafe
            {
                int* seed_rg = (int*)host_seed_rg.hptr;
                for (int rg = 0; rg < nrng; rg++)
                {
                    seed_rg[rg] = (int)(rand.NextDouble() * int.MaxValue);
                }
            }

            CArray device_rgstatus = new CArray(opcuda_mc_status_sz(), EType.int_t, EMemorySpace.device, null, "mcbuf._device_rgstatus");
            unsafe
            {
                opcuda_mc_setseed(host_seed_rg.hptr, device_rgstatus.ptr);
            }

            CArray device_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.device, null, "device_unif_s");
            CArray host_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.host, null, "host_unif_s");

            CStopWatch sw = new CStopWatch();
            sw.Reset();
            unsafe
            {
                for (int b = 0; b < nbatches; b++)
                {
                    opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch);
                    opcuda_memcpy_d2h(device_unif_s.ptr, host_unif_s.hptr, (uint)(sizeof(short) * host_unif_s.length));
                }
            }
            opcuda_thread_synchronize();
            double time = sw.Peek();

            double nevals = (double)nbatches * (double)nscen_per_batch;
            double milion_evals_per_second = nevals / (1000000 * time);
            status = opcuda_shutdown();
            if (status != 0) throw new ExecutionEngineException();
            Console.WriteLine("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec");

            sw.Reset();
            unsafe
            {
                for (int b = 0; b < nbatches; b++)
                {
                    opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch);
                }
            }
            opcuda_thread_synchronize();
            time = sw.Peek();

            nevals = (double)nbatches * (double)nscen_per_batch;
            milion_evals_per_second = nevals / (1000000 * time);
            status = opcuda_shutdown();
            if (status != 0) throw new ExecutionEngineException();
            Console.WriteLine("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec");
        }
예제 #16
0
 public void device_setall(ref CArray buf, double c)
 {
     opcuda_ssetall(buf.ptr, buf.length, (float)c, 1);
 }
예제 #17
0
파일: CArray.cs 프로젝트: abisani/OPLib
 public static double mismatch(CArray a, int[] b, ref double maxerror, ref int imax)
 {
     if (a.type == EType.int_t)
     {
         imax = -1;
         int[] a1 = new int[a.length];
         copy(ref a1, a);
         double error;
         maxerror = 0;
         for (int i = 0; i < b.Length; i++)
         {
             error = Math.Abs(a1[i] - b[i]);
             if (error > maxerror)
             {
                 maxerror = error;
                 imax = i;
             }
         }
         return maxerror;
     }
     throw new System.Exception();
 }
예제 #18
0
파일: CArray.cs 프로젝트: abisani/OPLib
 public static void setall(ref CArray buf, double c)
 {
     if (buf.side == EMemorySpace.device)
     {
         buf.model.device_setall(ref buf, (float)c);
     }
     else
     {
         unsafe
         {
             float* bufp = (float*)buf.ptr;
             float cf = (float)c;
             for (int i = 0; i < buf.length; i++)
             {
                 bufp[i] = cf;
             }
         }
     }
 }
예제 #19
0
        public void run_benchmark_gpu_mt(uint dev)
        {
            log.Add("running sglv1f, device " + dev);

            int nscen_per_batch = 4096 * 25;
            int nbatches = 20;

            int status = opcuda_cublas_init();
            if (status != 0) throw new ExecutionEngineException();
            opcuda_set_device(dev);

            opcuda_mc_load_mt_gpu();
            Random rand = new Random();
            int nrng = opcuda_mc_nrng();

            CArray host_seed_rg = new CArray(nrng, EType.int_t, EMemorySpace.host, null, "host_seed_rg");
            unsafe
            {
                int* seed_rg = (int*)host_seed_rg.hptr;
                for (int rg = 0; rg < nrng; rg++)
                {
                    seed_rg[rg] = (int)(rand.NextDouble() * int.MaxValue);
                }
            }

            CArray device_rgstatus = new CArray(opcuda_mc_status_sz(), EType.int_t, EMemorySpace.device, null, "mcbuf._device_rgstatus");
            unsafe
            {
                opcuda_mc_setseed(host_seed_rg.hptr, device_rgstatus.ptr);
            }

            CArray device_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.device, null, "device_unif_s");
            CArray host_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.host, null, "host_unif_s");

            CStopWatch sw = new CStopWatch();
            sw.Reset();
            unsafe
            {
                for (int b = 0; b < nbatches; b++)
                {
                    opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch);
                    opcuda_memcpy_d2h(device_unif_s.ptr, host_unif_s.hptr, (uint)(sizeof(short) * host_unif_s.length));
                }
            }
            opcuda_thread_synchronize();
            double time = sw.Peek();

            double nevals = (double)nbatches * (double)nscen_per_batch;
            double milion_evals_per_second = nevals / (1000000 * time);
            status = opcuda_shutdown();
            if (status != 0) throw new ExecutionEngineException();
            if (benchmarks == null) benchmarks = new SBenchmarks();
            if (benchmarks.gpu_mt_with_copy_performance_dev == null)
                benchmarks.gpu_mt_with_copy_performance_dev = new double[ndev];
            benchmarks.gpu_mt_with_copy_performance_dev[dev] = milion_evals_per_second;
            log.Add("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec");

            sw.Reset();
            unsafe
            {
                for (int b = 0; b < nbatches; b++)
                {
                    opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch);
                }
            }
            opcuda_thread_synchronize();
            time = sw.Peek();

            nevals = (double)nbatches * (double)nscen_per_batch;
            milion_evals_per_second = nevals / (1000000 * time);
            status = opcuda_shutdown();
            if (status != 0) throw new ExecutionEngineException();
            if (benchmarks == null) benchmarks = new SBenchmarks();
            if (benchmarks.gpu_mt_no_copy_performance_dev == null)
                benchmarks.gpu_mt_no_copy_performance_dev = new double[ndev];
            benchmarks.gpu_mt_no_copy_performance_dev[dev] = milion_evals_per_second;
            log.Add("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec");

            opcuda_shutdown();
        }
예제 #20
0
파일: CSVModel.cs 프로젝트: abisani/OPLib
        protected override void device_sgen()
        {
            int ni = grid.ni;
            int d = grid.d;

            CArray.alloc(ref this._device_gen_yy_i, ni * d * d, EType.float_t, EMemorySpace.device, this, "_device_gen_yy_i");

            unsafe
            {
                _device_pars = new CArray(d + 6 * ni * d, EType.float_t, EMemorySpace.device, this, "_device_pars");
                float[] spar = new float[d + 6 * ni * d];
                for (int y0 = 0; y0 < d; y0++)
                {
                    spar[y0] = (float)grid.host_d_xval(y0);
                    for (int i = 0; i < ni; i++)
                    {
                        spar[d + 0 * ni * d + i * d + y0] = (float)(grid.host_d_ir_yi[d * i] * grid.host_d_xval(y0));
                        spar[d + 1 * ni * d + i * d + y0] = (float)SVol(i, y0);
                        spar[d + 2 * ni * d + i * d + y0] = (float)VolDrift(i, y0);
                        spar[d + 3 * ni * d + i * d + y0] = (float)VolVol(i, y0);
                        spar[d + 4 * ni * d + i * d + y0] = (float)Jumpsz_minus(i, y0);
                        spar[d + 5 * ni * d + i * d + y0] = (float)Jumpsz_plus(i, y0);
                    }
                }
                CArray.copy(ref _device_pars, spar);
            }

            opcuda_ssetall(_device_gen_yy_i.ptr, grid.d * grid.d * grid.ni, 0, 1);
            float[] sgen = new float[_device_gen_yy_i.length];
            CArray.copy(ref sgen, _device_gen_yy_i);
            opcuda_sgsvg(_device_gen_yy_i.ptr, ni, grid.nx, grid.nr, _device_invm.ptr, _device_pars.ptr);
            CArray.copy(ref sgen, _device_gen_yy_i);
        }