Ejemplo n.º 1
0
        public KernelResult Launch(dim3 gridDim, dim3 blockDim)
        {
            CudaDriver.Ensure();
            Args.AssertNone(p => p.IsDisposed);
            _hasCompletedExecution.AssertFalse();

            var offsets = Args.Scanbe(0, (offset, arg, _) => offset + arg.SizeInArgList);

            Args.Zip(offsets, (arg, offset) => arg.PassInto(this, offset));

            try
            {
                nvcuda.cuFuncSetBlockShape(Function, blockDim);
                nvcuda.cuFuncSetSharedSize(Function, (uint)Function.SharedSizeBytes);
                nvcuda.cuFuncSetCacheConfig(Function, CUfunc_cache.PreferNone);
                nvcuda.cuParamSetSize(Function, (uint)Args.Select(p => p.SizeInArgList).Sum());

                TraceBeforeLaunch(gridDim, blockDim);
                var wall_time = CudaProfiler.Benchmark(() => nvcuda.cuLaunchGrid(Function, gridDim));
                Log.WriteLine("Function execution succeeded in {0} ({1} = 0.5 {2}s).", wall_time, Syms.Epsilon, Syms.Mu);
                return(new KernelResult(this, wall_time));
            }
            finally
            {
                _hasCompletedExecution = true;
            }
        }
Ejemplo n.º 2
0
        public KernelArgument(ParameterDirection direction, Object value)
        {
            Direction = direction;
            _type     = value == null ? null : value.GetType();
            _value    = value.AssertNotNull();

            CudaDriver.Ensure();
            CopyHtoD();
        }
Ejemplo n.º 3
0
        public void Win7x64_Cuda32RC_Gtx260()
        {
            CudaDriver.Ensure();

            (CudaVersions.Driver == new Version("8.17.12.6061")).AssertTrue();
            (CudaVersions.Cuda == CudaVersion.CUDA_32).AssertTrue();
            (CudaVersions.SoftwareIsa == SoftwareIsa.PTX_21).AssertTrue();
            (CudaVersions.HardwareIsa == HardwareIsa.SM_13).AssertTrue();

            var gtx260 = CudaDevice.Current.AssertNotNull();

            VerifyResult(gtx260.DumpAsText());
        }
Ejemplo n.º 4
0
        public JittedFunction(CUfunction handle, String name)
        {
            CudaDriver.Ensure();
            Handle = handle.AssertThat(h => h.IsNotNull);
            Name   = name ?? "N/A";

            MaxThreadsPerBlock = nvcuda.cuFuncGetAttribute(CUfunction_attribute.MaxThreadsPerBlock, this);
            SharedSizeBytes    = nvcuda.cuFuncGetAttribute(CUfunction_attribute.SharedSizeBytes, this);
            ConstSizeBytes     = nvcuda.cuFuncGetAttribute(CUfunction_attribute.ConstSizeBytes, this);
            LocalSizeBytes     = nvcuda.cuFuncGetAttribute(CUfunction_attribute.LocalSizeBytes, this);
            NumRegs            = nvcuda.cuFuncGetAttribute(CUfunction_attribute.NumRegs, this);
            PtxVersion         = (HardwareIsa)nvcuda.cuFuncGetAttribute(CUfunction_attribute.PtxVersion, this);
            BinaryVersion      = (HardwareIsa)nvcuda.cuFuncGetAttribute(CUfunction_attribute.BinaryVersion, this);
        }
Ejemplo n.º 5
0
        public JittedModule(String ptx, CUmodule handle)
        {
            CudaDriver.Ensure();
            Ptx    = ptx.AssertNotNull();
            Handle = handle.AssertThat(h => h.IsNotNull);

            var match = Regex.Match(Ptx, @"\.entry\s*(?<entrypoint>\w*?)\s*\(");

            Functions = match.Unfoldi(m => m.NextMatch(), m => m.Success).Select(m =>
            {
                var name  = match.Result("${entrypoint}");
                var hfunc = nvcuda.cuModuleGetFunction(this, name);
                return(new JittedFunction(hfunc, name));
            }).ToReadOnly();
        }
Ejemplo n.º 6
0
        public CudaDevice(int index)
        {
            CudaDriver.Ensure();

            Index  = index;
            Handle = nvcuda.cuDeviceGet(index);

            Name        = nvcuda.cuDeviceGetName(this);
            PciBusId    = nvcuda.cuDeviceGetAttribute(CUdevice_attribute.PciBusId, this);
            PciDeviceId = nvcuda.cuDeviceGetAttribute(CUdevice_attribute.PciDeviceId, this);

            Simd   = new SimdSpec(this);
            Clock  = new ClockSpec(this);
            Memory = new MemorySpec(this);
            Caps   = new DeviceCaps(this);
        }
Ejemplo n.º 7
0
 static CudaConfig()
 {
     CudaDriver.Ensure();
 }
Ejemplo n.º 8
0
        public JitResult Compile(String ptx)
        {
            ptx.AssertNotNull();
            CudaDriver.Ensure();

            var log = Traces.Jit.Info;

            log.EnsureBlankLine();
            log.WriteLine("Peforming JIT compilation...");
            log.WriteLine("    PTX source text                              : {0}", "(see below)");
            log.WriteLine("    Target hardware ISA                          : {0}", TargetFromContext ? "(determined from context)" : Target.ToString());
            log.WriteLine("    Actual hardware ISA                          : {0}", CudaVersions.HardwareIsa);
            log.WriteLine("    Optimization level (0 - 4, higher is better) : {0}", OptimizationLevel);

            // here we attempt to rewrite PTX by injecting performance tuning directives directly into source codes
            if (Tuning.IsNotTrivial)
            {
                Tuning.Validate();

                log.EnsureBlankLine();
                log.WriteLine("Detected non-trivial performance tuning parameters...");
                Tuning.DumpAsText(log.Writer.Medium);

                log.EnsureBlankLine();
                log.WriteLine("To apply them it is necessary to perform PTX rewriting and inject corresponding directives directly into source codes.");
                log.WriteLine("Analyzing entries in PTX module...");
                var rx_entry = @"(?<header>\.entry\s+(?<name>([a-zA-Z][a-zA-Z0-9_$]*)|([_$%][a-zA-Z0-9_$]*))\s*(?<params>\(.*?\))?)\s*(?<directives>\..*?)?\s*\{";
                ptx = ptx.Replace(rx_entry, RegexOptions.Singleline, m =>
                {
                    var name         = m["name"];
                    var s_directives = m["directives"].Split(".".MkArray(), StringSplitOptions.None).Trim().Where(s => s.IsNotEmpty()).ToReadOnly();
                    var directives   = s_directives.Select(s => s.Parse(@"^(?<name>\w+)\s+(?<value>.*?)$")).ToDictionary(m1 => m1["name"].Trim(), m1 => m1["value"].Trim()).ToReadOnly();
                    if (directives.IsNotEmpty())
                    {
                        Func <String, dim3> parse_dim3 = s =>
                        {
                            var m1 = s.AssertParse(@"^(?<x>\d+)?(\s*,\s*(?<y>\d+))?(\s*,\s*(?<z>\d+))?$").ToDictionary();
                            m1     = m1.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.IsNullOrEmpty() ? null : kvp.Value);
                            return(new dim3(int.Parse(m1["x"]), int.Parse(m1["y"] ?? "1"), int.Parse(m1["z"] ?? "1")));
                        };

                        log.WriteLine("Found entry \"{0}\" tuned as follows: {1}.", name, directives.Select(kvp => String.Format("{0} = {1}", kvp.Key, kvp.Value)).StringJoin(", "));

                        var maxnreg = int.Parse(directives.GetOrDefault("maxnreg", "0"));
                        if (Maxnreg != 0)
                        {
                            if (maxnreg != 0 && !(Maxnreg <= maxnreg))
                            {
                                log.WriteLine("Conflict! New max registers per thread ({0}) is incompatible with original value ({1}).", Maxnreg, maxnreg);
                                throw AssertionHelper.Fail();
                            }
                            else
                            {
                                maxnreg = Maxnreg;
                            }
                        }

                        var maxntid = parse_dim3(directives.GetOrDefault("maxntid", "0, 0, 0"));
                        if (Maxntid != new dim3())
                        {
                            if (maxntid != new dim3() && !(Maxntid <= maxntid))
                            {
                                log.WriteLine("Conflict! New max threads in thread block ({0}, {1}, {2}) is incompatible with original value ({3}, {4}, {5}).", Maxntid.X, Maxntid.Y, Maxntid.Z, maxntid.X, maxntid.Y, maxntid.Z);
                                throw AssertionHelper.Fail();
                            }
                            else
                            {
                                maxntid = Maxntid;
                            }
                        }

                        var reqntid = parse_dim3(directives.GetOrDefault("reqntid", "0, 0, 0"));
                        if (Reqntid != new dim3())
                        {
                            if (reqntid != new dim3() && Reqntid != reqntid)
                            {
                                log.WriteLine("Conflict! New required threads in thread block ({0}, {1}, {2}) is incompatible with original value ({3}, {4}, {5}).", Reqntid.X, Reqntid.Y, Reqntid.Z, reqntid.X, reqntid.Y, reqntid.Z);
                                throw AssertionHelper.Fail();
                            }
                            else
                            {
                                reqntid = Reqntid;
                            }
                        }

                        if (maxntid != new dim3() && reqntid != new dim3())
                        {
                            if (!(reqntid <= maxntid))
                            {
                                log.WriteLine("Conflict! Required threads in thread block ({0}, {1}, {2}) is incompatible with max threads in thread block ({3}, {4}, {5}).", reqntid.X, reqntid.Y, reqntid.Z, maxntid.X, maxntid.Y, maxntid.Z);
                                throw AssertionHelper.Fail();
                            }
                            else
                            {
                                maxntid = new dim3(0, 0, 0);
                            }
                        }

                        var minnctapersm = int.Parse(directives.GetOrDefault("minnctapersm", "0"));
                        if (Minnctapersm != 0)
                        {
                            if (Minnctapersm < minnctapersm)
                            {
                                log.WriteLine("Conflict! New min thread blocks per SM ({0}) is incompatible with original value ({1}).", Minnctapersm, minnctapersm);
                                throw AssertionHelper.Fail();
                            }
                            else
                            {
                                minnctapersm = Minnctapersm;
                            }
                        }

                        var maxnctapersm = int.Parse(directives.GetOrDefault("maxnctapersm", "0"));
                        if (Maxnctapersm != 0)
                        {
                            if (Maxnctapersm > maxnctapersm)
                            {
                                log.WriteLine("Conflict! New max thread blocks per SM ({0}) is incompatible with original value ({1}).", Maxnctapersm, maxnctapersm);
                                throw AssertionHelper.Fail();
                            }
                            else
                            {
                                maxnctapersm = Maxnctapersm;
                            }
                        }

                        if (minnctapersm != 0 && maxnctapersm != 0)
                        {
                            if (minnctapersm > maxnctapersm)
                            {
                                log.WriteLine("Conflict! Min thread blocks per SM ({0}) and max thread blocks per SM ({1}) are incompatible.", minnctapersm, maxnctapersm);
                                throw AssertionHelper.Fail();
                            }
                        }

                        log.Write("Applying compilation parameters... ");
                        var tuning = new JitTuning {
                            Maxnreg = maxnreg, Maxntid = maxntid, Reqntid = reqntid, Minnctapersm = minnctapersm, Maxnctapersm = maxnctapersm
                        };
                        tuning.Validate();
                        var replacement = m["header"] + Environment.NewLine + tuning.RenderPtx() + Environment.NewLine + "{";

                        log.WriteLine("Success.");
                        return(replacement);
                    }
                    else
                    {
                        log.WriteLine("Found entry \"{0}\" without performance tuning directives.", name);

                        log.Write("Applying compilation parameters... ");
                        var replacement = m["header"] + Environment.NewLine + Tuning.RenderPtx() + Environment.NewLine + "{";

                        log.WriteLine("Success.");
                        return(replacement);
                    }
                });
            }

            log.EnsureBlankLine();
            log.WriteLine("*".Repeat(120));
            log.WriteLine(ptx.TrimEnd());
            log.WriteLine(120.Times("*"));

            var options = new CUjit_options();

            options.OptimizationLevel      = OptimizationLevel;
            options.PlannedThreadsPerBlock = Reqntid.Product();
            // todo. an attempt to pass the Target value directly leads to CUDA_ERROR_INVALID_VALUE
            // as of now, this feature is not really important, so I'm marking it as TBI
            options.TargetFromContext = TargetFromContext.AssertTrue();
            options.Target            = Target.ToCUjit_target();
            options.FallbackStrategy  = CUjit_fallbackstrategy.PreferPtx;

            var native_result = nvcuda.cuModuleLoadDataEx(ptx, options);

            return(new JitResult(this, ptx, native_result));
        }
Ejemplo n.º 9
0
 // todo. cache jitted kernels
 // this is of little priority though, since driver caches kernels as well
 public JitCompiler()
 {
     CudaDriver.Ensure();
 }
Ejemplo n.º 10
0
 public KernelInvocation(JittedFunction function, IEnumerable <KernelArgument> args)
 {
     CudaDriver.Ensure();
     Function = function.AssertNotNull();
     Args     = new KernelArguments(args);
 }