public void device_grid_init() { int status = opcuda_cublas_init(); if (status != 0) throw new ExecutionEngineException(); this._device_invm = new CArray(grid.host_d_invm.Length * sizeof(float), EType.float_t, EMemorySpace.device, this, "_device_invm"); float[] sinvm = new float[grid.host_d_invm.Length]; for (int j = 0; j < grid.host_d_invm.Length; j++) { sinvm[j] = (float)grid.host_d_invm[j]; } CArray.copy(ref _device_invm, sinvm); }
public CArray(CArray buf, uint offset_in_bytes, int sz, CModel model, string name) { if (buf == null) throw new System.Exception(); if (sz > buf._sz - offset_in_bytes) throw new System.Exception(); this._ptr = buf._ptr + offset_in_bytes; this._length = 0; this._sz = sz; this._side = buf._side; _model = model; _suballocated = false; _number_of_internal_buffers = 0; _internal_buffer_pointers = null; _name = name; }
public void device_sdot2(CArray a, CArray b, ref CArray c, int m, int n) { if (a.length != m * n) throw new System.Exception(); if (b.length != m * n) throw new System.Exception(); if (c.length != m) throw new System.Exception(); opcuda_sdot2(a.ptr, b.ptr, c.ptr, m, n); _gpu_nflops += (double)m * n; }
public void device_mc_run1f(double[] payoff_a, CMCEvaluator evaluator) { if (mcplan._nscen_per_batch % 4096 != 0) throw new System.Exception(); CArray device_y_sk = new CArray(mcplan.nk * mcplan._nscen_per_batch, EType.short_t, EMemorySpace.device, this, "_device_volatile_buf"); for (int b = 0; b < mcplan._nbatches; b++) { unsafe { fixed (short* yhost_sk = &mcbuf.host_y_sk_th[0]) { int status = opcuda_mc1f(mcbuf._device_rgstatus.ptr, grid.y0, device_y_sk.ptr, mcplan._nscen_per_batch, mcplan.nk, grid.d, mcbuf._device_ctpk_yy_m.ptr, mcbuf._device_m_k.ptr, (uint)yhost_sk); if (status > 0) throw new System.Exception(); device_thread_synchronize(); evaluator.eval(yhost_sk, 0, b); } } } }
public static void subfree(ref CArray target, CArray buf) { if (target == null) throw new System.Exception(); if (buf == null) throw new System.Exception(); if (target.ptr + target.sz != buf.ptr + buf._offset) throw new System.Exception(); if (target.ptr != buf._internal_buffer_pointers[buf._number_of_internal_buffers - 1]) throw new System.Exception(); buf._offset -= target._sz; buf._internal_buffer_pointers[--buf._number_of_internal_buffers] = -1; buf._internal_buffer_names[buf._number_of_internal_buffers] = null; target = null; }
public static void suballoc(ref CArray target, CArray buf, int n, EType type, EMemorySpace side, string name) { if (target != null) throw new System.Exception(); if (buf._internal_buffer_pointers == null) { buf._internal_buffer_pointers = new int[1000]; for (int i = 0; i < 1000; i++) { buf._internal_buffer_pointers[i] = -1; } } if (buf._internal_buffer_names == null) { buf._internal_buffer_names = new string[1000]; for (int i = 0; i < 1000; i++) { buf._internal_buffer_names[i] = null; } } if (buf == null) throw new System.Exception(); target = new CArray(buf.model); target._sz = round(n, type) * sizeof_type(type); if (buf.sz < buf._offset + target._sz) throw new System.Exception(); target._suballocated = true; target._ptr = (uint)(buf._ptr + buf._offset); target._name = name; buf._internal_buffer_pointers[buf._number_of_internal_buffers] = (int)target._ptr; buf._internal_buffer_names[buf._number_of_internal_buffers] = target._name; buf._number_of_internal_buffers += 1; target._side = side; target._type = type; target._length = n; target._name = name; buf._offset += target._sz; if (target.ptr - (target.ptr / 128) * 128 > 0) throw new System.Exception(); }
public static void setone(ref CArray buf, double c, int i) { if (buf.side == EMemorySpace.device) { opcuda_ssetone(buf.ptr, buf.length, (float)c, 1); } else { unsafe { float* bufp = (float*)buf.ptr; float cf = (float)c; bufp[i] = cf; } } }
public void make_mc_plan(int nscen_per_batch, int nbatches, DateTime[] t_k) { _mcbuf = new SMCBuffers(); _mcplan = new SMCPlan(); _mcplan._nscen_per_batch = nscen_per_batch; _mcplan._nbatches = nbatches; _mcplan._nscen = _mcplan._nscen_per_batch * _mcplan._nbatches; if (grid.fpu == EFloatingPointUnit.device) { _device_y0 = new CArray(2, EType.short_t, EMemorySpace.device, this, "_device_y0"); short[] host_y0 = new short[1]; host_y0[0] = (short)grid.y0; CArray.copy(ref _device_y0, host_y0); } make_mc_plan(t_k); }
public int device_mc_init() { FileStream stream = new FileStream("MersenneTwister.dat", FileMode.Open, FileAccess.Read); byte[] MT = new byte[stream.Length]; stream.Read(MT, 0, (int) stream.Length); unsafe { fixed (byte* MTp = &MT[0]) { int status = opcuda_mc_load_mt_gpu(MTp, stream.Length); if(status !=0) throw new System.Exception(); } } Random rand = new Random(); int nrng = opcuda_mc_nrng(); CArray host_seed_rg = new CArray(nrng, EType.int_t, EMemorySpace.host, this, "host_seed_rg"); unsafe { int* seed_rg = (int*)host_seed_rg.hptr; for (int rg = 0; rg < nrng; rg++) { seed_rg[rg] = (int)(rand.NextDouble() * int.MaxValue); } } mcbuf._device_rgstatus = new CArray(opcuda_mc_status_sz(), EType.int_t, EMemorySpace.device, this, "mcbuf._device_rgstatus"); unsafe { opcuda_mc_setseed(host_seed_rg.hptr, mcbuf._device_rgstatus.ptr); } return 0; }
public static void copy(ref CArray destination, short[] source) { if (destination.type == EType.short_t) { if (destination.length < source.Length) destination.alloc(source.Length, EType.short_t, EMemorySpace.device, destination._model); destination._length = source.Length; unsafe { fixed (short* sourcep = &source[0]) { opcuda_memcpy_h2d(destination.ptr, (IntPtr)sourcep, (uint)(source.Length * sizeof(short))); } } int status = opcuda_get_status(); if (status != 0) throw new System.Exception(); return; } throw new System.Exception(); }
public static void copy(ref double[] destination, CArray source, CArray buf) { if (source.side == EMemorySpace.device) { if (EMemorySpace.device != source.side) throw new System.Exception(); if (EMemorySpace.host != buf.side) throw new System.Exception(); } if (source == null) { destination = null; return; } if (source.length == 0) { destination = new double[0]; return; } if (source.type == EType.double_t) { alloc(ref destination, source.length); unsafe { fixed (double* destinationp = &destination[0]) { opcuda_memcpy_h2d(source.ptr, (IntPtr)destinationp, (uint)(source.length * CArray.sizeof_type(source.type))); } } int status = opcuda_get_status(); if (status != 0) throw new System.Exception(); return; } if (source.type == EType.float_t) { if (buf == null) throw new System.Exception(); if (buf.type != EType.float_t) throw new System.Exception(); if (buf.length < source.length) throw new System.Exception(); alloc(ref destination, source.length); unsafe { opcuda_memcpy_d2h(source.ptr, buf.hptr, (uint)(source.length * CArray.sizeof_type(source.type))); float* bufp = (float*)buf.ptr; for (int a = 0; a < source.length; a++) { destination[a] = (double)bufp[a]; } } int status = opcuda_get_status(); if (status != 0) throw new System.Exception(); return; } throw new System.Exception(); }
public static void copy(ref int[] destination, CArray source) { if (source.type == EType.int_t) { if (destination.Length < source.length) destination = new int[source.length]; unsafe { fixed (int* destinationp = &destination[0]) { opcuda_memcpy_d2h(source.ptr, (IntPtr)destinationp, (uint)(source.Size_of_one * source.length)); } } int status = opcuda_get_status(); if (status != 0) throw new System.Exception(); return; } throw new System.Exception(); }
public static void copy(ref CArray destination, CArray source) { if (source._model != destination._model) throw new System.Exception(); destination._type = source.type; destination.alloc(source.length, source.type, destination.side, source._model); if (source.side == EMemorySpace.device && destination.side == EMemorySpace.host) { opcuda_memcpy_d2h(source.ptr, destination.hptr, (uint)(source.Size_of_one * source.length)); } if (source.side == EMemorySpace.device && destination.side == EMemorySpace.device) { if (source.Size_of_one == 4) { opcuda_scopy1(destination.ptr, source.ptr, (uint)(source.length)); } else { opcuda_memcpy_d2d(destination.ptr, source.ptr, (uint)(source.Size_of_one * source.length)); } } if (source.side == EMemorySpace.host && destination.side == EMemorySpace.device) { opcuda_memcpy_h2d(destination.ptr, source.hptr, (uint)(source.Size_of_one * source.length)); } if (source.side == EMemorySpace.host && destination.side == EMemorySpace.host) { throw new System.Exception(); } int status = opcuda_get_status(); if (status != 0) throw new System.Exception(); }
public static void alloc(ref CArray buf, int n, EType type, EMemorySpace side, CModel model, string name) { if (buf == null) { buf = new CArray(n, type, side, model, name); return; } if (buf._suballocated) throw new System.Exception(); if (buf.sz < n * sizeof_type(type)) { if (model != buf._model) throw new System.Exception(); buf = new CArray(n, type, side, model, name); return; } if (model != buf._model) throw new System.Exception(); }
public static void run_benchmark_gpu_mt(uint dev) { Console.WriteLine("running the Mersenne Twister benchmark on device " + dev); int nscen_per_batch = 4096 * 250; int nbatches = 200; int status = opcuda_cublas_init(); if (status != 0) throw new ExecutionEngineException(); opcuda_set_device(dev); FileStream stream; try { stream = new FileStream("MersenneTwister.dat", FileMode.Open, FileAccess.Read); } catch { Console.WriteLine("device_mc_init() failed on device " + dev + ". Aborting"); return; } byte[] MT = new byte[stream.Length]; stream.Read(MT, 0, (int)stream.Length); unsafe { fixed (byte* MTp = &MT[0]) { status = opcuda_mc_load_mt_gpu(MTp, stream.Length); if (status != 0) throw new System.Exception(); } } Random rand = new Random(); int nrng = opcuda_mc_nrng(); CArray host_seed_rg = new CArray(nrng, EType.int_t, EMemorySpace.host, null, "host_seed_rg"); unsafe { int* seed_rg = (int*)host_seed_rg.hptr; for (int rg = 0; rg < nrng; rg++) { seed_rg[rg] = (int)(rand.NextDouble() * int.MaxValue); } } CArray device_rgstatus = new CArray(opcuda_mc_status_sz(), EType.int_t, EMemorySpace.device, null, "mcbuf._device_rgstatus"); unsafe { opcuda_mc_setseed(host_seed_rg.hptr, device_rgstatus.ptr); } CArray device_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.device, null, "device_unif_s"); CArray host_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.host, null, "host_unif_s"); CStopWatch sw = new CStopWatch(); sw.Reset(); unsafe { for (int b = 0; b < nbatches; b++) { opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch); opcuda_memcpy_d2h(device_unif_s.ptr, host_unif_s.hptr, (uint)(sizeof(short) * host_unif_s.length)); } } opcuda_thread_synchronize(); double time = sw.Peek(); double nevals = (double)nbatches * (double)nscen_per_batch; double milion_evals_per_second = nevals / (1000000 * time); status = opcuda_shutdown(); if (status != 0) throw new ExecutionEngineException(); Console.WriteLine("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec"); sw.Reset(); unsafe { for (int b = 0; b < nbatches; b++) { opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch); } } opcuda_thread_synchronize(); time = sw.Peek(); nevals = (double)nbatches * (double)nscen_per_batch; milion_evals_per_second = nevals / (1000000 * time); status = opcuda_shutdown(); if (status != 0) throw new ExecutionEngineException(); Console.WriteLine("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec"); }
public void device_setall(ref CArray buf, double c) { opcuda_ssetall(buf.ptr, buf.length, (float)c, 1); }
public static double mismatch(CArray a, int[] b, ref double maxerror, ref int imax) { if (a.type == EType.int_t) { imax = -1; int[] a1 = new int[a.length]; copy(ref a1, a); double error; maxerror = 0; for (int i = 0; i < b.Length; i++) { error = Math.Abs(a1[i] - b[i]); if (error > maxerror) { maxerror = error; imax = i; } } return maxerror; } throw new System.Exception(); }
public static void setall(ref CArray buf, double c) { if (buf.side == EMemorySpace.device) { buf.model.device_setall(ref buf, (float)c); } else { unsafe { float* bufp = (float*)buf.ptr; float cf = (float)c; for (int i = 0; i < buf.length; i++) { bufp[i] = cf; } } } }
public void run_benchmark_gpu_mt(uint dev) { log.Add("running sglv1f, device " + dev); int nscen_per_batch = 4096 * 25; int nbatches = 20; int status = opcuda_cublas_init(); if (status != 0) throw new ExecutionEngineException(); opcuda_set_device(dev); opcuda_mc_load_mt_gpu(); Random rand = new Random(); int nrng = opcuda_mc_nrng(); CArray host_seed_rg = new CArray(nrng, EType.int_t, EMemorySpace.host, null, "host_seed_rg"); unsafe { int* seed_rg = (int*)host_seed_rg.hptr; for (int rg = 0; rg < nrng; rg++) { seed_rg[rg] = (int)(rand.NextDouble() * int.MaxValue); } } CArray device_rgstatus = new CArray(opcuda_mc_status_sz(), EType.int_t, EMemorySpace.device, null, "mcbuf._device_rgstatus"); unsafe { opcuda_mc_setseed(host_seed_rg.hptr, device_rgstatus.ptr); } CArray device_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.device, null, "device_unif_s"); CArray host_unif_s = new CArray(nscen_per_batch, EType.float_t, EMemorySpace.host, null, "host_unif_s"); CStopWatch sw = new CStopWatch(); sw.Reset(); unsafe { for (int b = 0; b < nbatches; b++) { opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch); opcuda_memcpy_d2h(device_unif_s.ptr, host_unif_s.hptr, (uint)(sizeof(short) * host_unif_s.length)); } } opcuda_thread_synchronize(); double time = sw.Peek(); double nevals = (double)nbatches * (double)nscen_per_batch; double milion_evals_per_second = nevals / (1000000 * time); status = opcuda_shutdown(); if (status != 0) throw new ExecutionEngineException(); if (benchmarks == null) benchmarks = new SBenchmarks(); if (benchmarks.gpu_mt_with_copy_performance_dev == null) benchmarks.gpu_mt_with_copy_performance_dev = new double[ndev]; benchmarks.gpu_mt_with_copy_performance_dev[dev] = milion_evals_per_second; log.Add("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec"); sw.Reset(); unsafe { for (int b = 0; b < nbatches; b++) { opcuda_mt_benchmark(device_rgstatus.ptr, device_unif_s.ptr, nscen_per_batch); } } opcuda_thread_synchronize(); time = sw.Peek(); nevals = (double)nbatches * (double)nscen_per_batch; milion_evals_per_second = nevals / (1000000 * time); status = opcuda_shutdown(); if (status != 0) throw new ExecutionEngineException(); if (benchmarks == null) benchmarks = new SBenchmarks(); if (benchmarks.gpu_mt_no_copy_performance_dev == null) benchmarks.gpu_mt_no_copy_performance_dev = new double[ndev]; benchmarks.gpu_mt_no_copy_performance_dev[dev] = milion_evals_per_second; log.Add("mc performance: " + String.Format("{0:0.0}", milion_evals_per_second) + " milion eval/sec"); opcuda_shutdown(); }
protected override void device_sgen() { int ni = grid.ni; int d = grid.d; CArray.alloc(ref this._device_gen_yy_i, ni * d * d, EType.float_t, EMemorySpace.device, this, "_device_gen_yy_i"); unsafe { _device_pars = new CArray(d + 6 * ni * d, EType.float_t, EMemorySpace.device, this, "_device_pars"); float[] spar = new float[d + 6 * ni * d]; for (int y0 = 0; y0 < d; y0++) { spar[y0] = (float)grid.host_d_xval(y0); for (int i = 0; i < ni; i++) { spar[d + 0 * ni * d + i * d + y0] = (float)(grid.host_d_ir_yi[d * i] * grid.host_d_xval(y0)); spar[d + 1 * ni * d + i * d + y0] = (float)SVol(i, y0); spar[d + 2 * ni * d + i * d + y0] = (float)VolDrift(i, y0); spar[d + 3 * ni * d + i * d + y0] = (float)VolVol(i, y0); spar[d + 4 * ni * d + i * d + y0] = (float)Jumpsz_minus(i, y0); spar[d + 5 * ni * d + i * d + y0] = (float)Jumpsz_plus(i, y0); } } CArray.copy(ref _device_pars, spar); } opcuda_ssetall(_device_gen_yy_i.ptr, grid.d * grid.d * grid.ni, 0, 1); float[] sgen = new float[_device_gen_yy_i.length]; CArray.copy(ref sgen, _device_gen_yy_i); opcuda_sgsvg(_device_gen_yy_i.ptr, ni, grid.nx, grid.nr, _device_invm.ptr, _device_pars.ptr); CArray.copy(ref sgen, _device_gen_yy_i); }