// todo. cache jitted kernels public static JittedKernel DoCompile(CudaConfig cfg, Type t_kernel) { t_kernel = t_kernel.Hierarchy().AssertFirst(t => !t.Assembly.HasAttr<CompilerGeneratedAttribute>()); using (var ctx = new JitContext(cfg, t_kernel)) { var inliner_ctx = new ExpansionContext(Kernel); Hir = Hir.Expand(inliner_ctx); Log.EnsureBlankLine(); Log.WriteLine("After inlining:"); Log.WriteLine(Hir.DumpAsText()); MemoryAllocator.InferAllocationScheme(); Log.EnsureBlankLine(); Log.WriteLine("Non-standard allocations:"); var nonstandard_allocs = 0; Allocs.Fields.Where(kvp => kvp.Value != MemoryTier.Global).ForEach(kvp => { Log.WriteLine(kvp.Key.GetCSharpRef(ToCSharpOptions.Informative)); nonstandard_allocs++; }); Allocs.Symbols.Where(kvp => kvp.Value != MemoryTier.Private).ForEach(kvp => { Log.WriteLine(kvp.Key); nonstandard_allocs++; }); Log.WriteLine((nonstandard_allocs == 0 ? "None" : "") + Environment.NewLine); // todo. also implement the following: // 1) downgrade to SSA // 2) perform SCC from "Constant Propagation with Conditional Branches" // when performing SCC don't forget to funcletize stuff e.g. Impl::Cfg and Impl::Device // 3) eliminate dead code Generator.Traverse(Hir); Log.EnsureBlankLine(); Log.WriteLine("Generated PTX:"); Log.WriteLine(); Log.WriteLine(Ptx); return new JittedKernel(ctx); } }
public JittedKernel(JitContext ctx) { _cfg = ctx.Cfg.Clone(); _ptx = ctx.Ptx; _hir = ctx.Hir.Fluent(blk => blk.FreezeForever()); var flds = ctx.Allocator.Fields; _memcpyHostToDevice = kernel => { var args = new KernelArguments(); flds.Keys.ForEach(fld => { var value = fld.GetValue(kernel); if (flds[fld] is SlotLayout) { args.Add(value.In()); } else if (flds[fld] is ArrayLayout) { var arr = value.AssertCast<Array>(); args.Add(arr.InOut()); var rank = arr.GetType().GetArrayRank(); 0.UpTo(rank - 1).ForEach(i => args.Add(arr.GetLength(i).In())); } else { throw AssertionHelper.Fail(); } }); return args; }; _memcpyDeviceToHost = (result, kernel) => { var idx = 0; flds.Keys.ForEach(fld => { Object value; if (flds[fld] is SlotLayout) { value = result[idx]; idx += 1; } else if (flds[fld] is ArrayLayout) { value = result[idx]; idx += 3; } else { throw AssertionHelper.Fail(); } fld.SetValue(kernel, value); }); }; }
public void WaveCuda() { var cfg = new CudaConfig(); cfg.Codebase.OptIn(t => t.Assembly.GetName().Name == "Conflux.Playground"); var kernel = cfg.Configure<WaveKernel>(); var empty = new Matrix<Cell>(202, 202); var after2s = kernel.Execute(empty); // todo. validate the result }
public JitContext(CudaConfig cfg, Type t_kernel) { Current.AssertNull(); Current = this; Cfg = cfg; Kernel = t_kernel.GetMethod("RunKernel", BF.All); Kernel.AssertNotNull(); Kernel.IsInstance().AssertTrue(); Kernel.Params().AssertEmpty(); (Kernel.Ret() == typeof(void)).AssertTrue(); Hir = Kernel.Decompile().Body; AllocationScheme = null; Allocator = new Allocator(); Generator = new Generator(); Module = new PtxModule(Cfg.Target, Cfg.Version); Entry = Module.AddEntry(Kernel.Name); Params = Entry.Params; }
protected void TestKernelCrosscompilation(Type t_kernel) { var cfg = new CudaConfig{BlockDim = new dim3(16, 16, 1)}; cfg.Codebase.OptIn(t => t.Assembly.GetName().Name == "Conflux.Playground"); using (Runtimes.Activate(new CudaRuntime(cfg, t_kernel))) { var result = JitCompiler.DoCompile(cfg, t_kernel); var s_ptx_actual = result.Ptx; var s_hir_actual = result.Hir.DumpAsText(); var asm = MethodInfo.GetCurrentMethod().DeclaringType.Assembly; var @namespace = MethodInfo.GetCurrentMethod().DeclaringType.Namespace; @namespace += ".Reference."; var ptx_fileName = asm.GetManifestResourceNames().SingleOrDefault2( n => String.Compare(n, @namespace + t_kernel.Name + ".ptx", true) == 0); var hir_fileName = asm.GetManifestResourceNames().SingleOrDefault2( n => String.Compare(n, @namespace + t_kernel.Name + ".hir", true) == 0); Verify(s_ptx_actual, ptx_fileName, "crosscompiled PTX", t_kernel); Verify(s_hir_actual, hir_fileName, "crosscompiled HIR", t_kernel); } }
protected CudaConfig(CudaConfig proto) : base(proto) { }
protected CudaConfig(CudaConfig proto) : base(proto) {}
public void BigTest_Cuda() { var cfg = new CudaConfig(); cfg.Codebase.OptIn(t => t.Assembly.GetName().Name == "Conflux.Playground"); 1.TimesDo(() => BigTest(cfg)); }