// todo. cache jitted kernels public static JittedKernel DoCompile(CudaConfig cfg, Type t_kernel) { t_kernel = t_kernel.Hierarchy().AssertFirst(t => !t.Assembly.HasAttr <CompilerGeneratedAttribute>()); using (var ctx = new JitContext(cfg, t_kernel)) { var inliner_ctx = new ExpansionContext(Kernel); Hir = Hir.Expand(inliner_ctx); Log.EnsureBlankLine(); Log.WriteLine("After inlining:"); Log.WriteLine(Hir.DumpAsText()); MemoryAllocator.InferAllocationScheme(); Log.EnsureBlankLine(); Log.WriteLine("Non-standard allocations:"); var nonstandard_allocs = 0; Allocs.Fields.Where(kvp => kvp.Value != MemoryTier.Global).ForEach(kvp => { Log.WriteLine(kvp.Key.GetCSharpRef(ToCSharpOptions.Informative)); nonstandard_allocs++; }); Allocs.Symbols.Where(kvp => kvp.Value != MemoryTier.Private).ForEach(kvp => { Log.WriteLine(kvp.Key); nonstandard_allocs++; }); Log.WriteLine((nonstandard_allocs == 0 ? "None" : "") + Environment.NewLine); // todo. also implement the following: // 1) downgrade to SSA // 2) perform SCC from "Constant Propagation with Conditional Branches" // when performing SCC don't forget to funcletize stuff e.g. Impl::Cfg and Impl::Device // 3) eliminate dead code Generator.Traverse(Hir); Log.EnsureBlankLine(); Log.WriteLine("Generated PTX:"); Log.WriteLine(); Log.WriteLine(Ptx); return(new JittedKernel(ctx)); } }
// todo. cache jitted kernels public static JittedKernel DoCompile(CudaConfig cfg, Type t_kernel) { t_kernel = t_kernel.Hierarchy().AssertFirst(t => !t.Assembly.HasAttr<CompilerGeneratedAttribute>()); using (var ctx = new JitContext(cfg, t_kernel)) { var inliner_ctx = new ExpansionContext(Kernel); Hir = Hir.Expand(inliner_ctx); Log.EnsureBlankLine(); Log.WriteLine("After inlining:"); Log.WriteLine(Hir.DumpAsText()); MemoryAllocator.InferAllocationScheme(); Log.EnsureBlankLine(); Log.WriteLine("Non-standard allocations:"); var nonstandard_allocs = 0; Allocs.Fields.Where(kvp => kvp.Value != MemoryTier.Global).ForEach(kvp => { Log.WriteLine(kvp.Key.GetCSharpRef(ToCSharpOptions.Informative)); nonstandard_allocs++; }); Allocs.Symbols.Where(kvp => kvp.Value != MemoryTier.Private).ForEach(kvp => { Log.WriteLine(kvp.Key); nonstandard_allocs++; }); Log.WriteLine((nonstandard_allocs == 0 ? "None" : "") + Environment.NewLine); // todo. also implement the following: // 1) downgrade to SSA // 2) perform SCC from "Constant Propagation with Conditional Branches" // when performing SCC don't forget to funcletize stuff e.g. Impl::Cfg and Impl::Device // 3) eliminate dead code Generator.Traverse(Hir); Log.EnsureBlankLine(); Log.WriteLine("Generated PTX:"); Log.WriteLine(); Log.WriteLine(Ptx); return new JittedKernel(ctx); } }
public JittedKernel(JitContext ctx) { _cfg = ctx.Cfg.Clone(); _ptx = ctx.Ptx; _hir = ctx.Hir.Fluent(blk => blk.FreezeForever()); var flds = ctx.Allocator.Fields; _memcpyHostToDevice = kernel => { var args = new KernelArguments(); flds.Keys.ForEach(fld => { var value = fld.GetValue(kernel); if (flds[fld] is SlotLayout) { args.Add(value.In()); } else if (flds[fld] is ArrayLayout) { var arr = value.AssertCast <Array>(); args.Add(arr.InOut()); var rank = arr.GetType().GetArrayRank(); 0.UpTo(rank - 1).ForEach(i => args.Add(arr.GetLength(i).In())); } else { throw AssertionHelper.Fail(); } }); return(args); }; _memcpyDeviceToHost = (result, kernel) => { var idx = 0; flds.Keys.ForEach(fld => { Object value; if (flds[fld] is SlotLayout) { value = result[idx]; idx += 1; } else if (flds[fld] is ArrayLayout) { value = result[idx]; idx += 3; } else { throw AssertionHelper.Fail(); } fld.SetValue(kernel, value); }); }; }
public JittedKernel(JitContext ctx) { _cfg = ctx.Cfg.Clone(); _ptx = ctx.Ptx; _hir = ctx.Hir.Fluent(blk => blk.FreezeForever()); var flds = ctx.Allocator.Fields; _memcpyHostToDevice = kernel => { var args = new KernelArguments(); flds.Keys.ForEach(fld => { var value = fld.GetValue(kernel); if (flds[fld] is SlotLayout) { args.Add(value.In()); } else if (flds[fld] is ArrayLayout) { var arr = value.AssertCast<Array>(); args.Add(arr.InOut()); var rank = arr.GetType().GetArrayRank(); 0.UpTo(rank - 1).ForEach(i => args.Add(arr.GetLength(i).In())); } else { throw AssertionHelper.Fail(); } }); return args; }; _memcpyDeviceToHost = (result, kernel) => { var idx = 0; flds.Keys.ForEach(fld => { Object value; if (flds[fld] is SlotLayout) { value = result[idx]; idx += 1; } else if (flds[fld] is ArrayLayout) { value = result[idx]; idx += 3; } else { throw AssertionHelper.Fail(); } fld.SetValue(kernel, value); }); }; }
public JitContext(CudaConfig cfg, Type t_kernel) { Current.AssertNull(); Current = this; Cfg = cfg; Kernel = t_kernel.GetMethod("RunKernel", BF.All); Kernel.AssertNotNull(); Kernel.IsInstance().AssertTrue(); Kernel.Params().AssertEmpty(); (Kernel.Ret() == typeof(void)).AssertTrue(); Hir = Kernel.Decompile().Body; AllocationScheme = null; Allocator = new Allocator(); Generator = new Generator(); Module = new PtxModule(Cfg.Target, Cfg.Version); Entry = Module.AddEntry(Kernel.Name); Params = Entry.Params; }
public JitContext(CudaConfig cfg, Type t_kernel) { Current.AssertNull(); Current = this; Cfg = cfg; Kernel = t_kernel.GetMethod("RunKernel", BF.All); Kernel.AssertNotNull(); Kernel.IsInstance().AssertTrue(); Kernel.Params().AssertEmpty(); (Kernel.Ret() == typeof(void)).AssertTrue(); Hir = Kernel.Decompile().Body; AllocationScheme = null; Allocator = new Allocator(); Generator = new Generator(); Module = new PtxModule(Cfg.Target, Cfg.Version); Entry = Module.AddEntry(Kernel.Name); Params = Entry.Params; }
protected override void DisposeManagedResources() { (Current == this).AssertTrue(); Current = null; }
protected override void DisposeManagedResources() { (Current == this).AssertTrue(); Current = null; }