public JittedKernel(JitContext ctx) { _cfg = ctx.Cfg.Clone(); _ptx = ctx.Ptx; _hir = ctx.Hir.Fluent(blk => blk.FreezeForever()); var flds = ctx.Allocator.Fields; _memcpyHostToDevice = kernel => { var args = new KernelArguments(); flds.Keys.ForEach(fld => { var value = fld.GetValue(kernel); if (flds[fld] is SlotLayout) { args.Add(value.In()); } else if (flds[fld] is ArrayLayout) { var arr = value.AssertCast <Array>(); args.Add(arr.InOut()); var rank = arr.GetType().GetArrayRank(); 0.UpTo(rank - 1).ForEach(i => args.Add(arr.GetLength(i).In())); } else { throw AssertionHelper.Fail(); } }); return(args); }; _memcpyDeviceToHost = (result, kernel) => { var idx = 0; flds.Keys.ForEach(fld => { Object value; if (flds[fld] is SlotLayout) { value = result[idx]; idx += 1; } else if (flds[fld] is ArrayLayout) { value = result[idx]; idx += 3; } else { throw AssertionHelper.Fail(); } fld.SetValue(kernel, value); }); }; }
public JittedKernel(JitContext ctx) { _cfg = ctx.Cfg.Clone(); _ptx = ctx.Ptx; _hir = ctx.Hir.Fluent(blk => blk.FreezeForever()); var flds = ctx.Allocator.Fields; _memcpyHostToDevice = kernel => { var args = new KernelArguments(); flds.Keys.ForEach(fld => { var value = fld.GetValue(kernel); if (flds[fld] is SlotLayout) { args.Add(value.In()); } else if (flds[fld] is ArrayLayout) { var arr = value.AssertCast<Array>(); args.Add(arr.InOut()); var rank = arr.GetType().GetArrayRank(); 0.UpTo(rank - 1).ForEach(i => args.Add(arr.GetLength(i).In())); } else { throw AssertionHelper.Fail(); } }); return args; }; _memcpyDeviceToHost = (result, kernel) => { var idx = 0; flds.Keys.ForEach(fld => { Object value; if (flds[fld] is SlotLayout) { value = result[idx]; idx += 1; } else if (flds[fld] is ArrayLayout) { value = result[idx]; idx += 3; } else { throw AssertionHelper.Fail(); } fld.SetValue(kernel, value); }); }; }
public KernelInvocation(JittedFunction function, IEnumerable<KernelArgument> args) { CudaDriver.Ensure(); Function = function.AssertNotNull(); Args = new KernelArguments(args); }