public static void Invoke(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string baseName, params object[] args) { ThrowIfAnyTensorInvalid(args); cudaContext.SetCurrent(); CudaDeviceProperties deviceInfo = context.DeviceInfoForContext(cudaContext); IEnumerable <Tensor> allTensors = args.OfType <Tensor>(); Tensor firstTensor = allTensors.First(); long elementCount = firstTensor.ElementCount(); ApplySpecialization spec = new ApplySpecialization(allTensors.ToArray()); ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args); ManagedCuda.VectorTypes.dim3 block = ApplyUtils.GetApplyBlock(); ManagedCuda.VectorTypes.dim3 grid = ApplyUtils.GetApplyGrid(deviceInfo, elementCount); string fullKernelName = PermutationGenerator.GetMangledName(baseName, spec); CudaKernel kernel = context.KernelCache.Get(cudaContext, ptx, fullKernelName); kernel.GridDimensions = grid; kernel.BlockDimensions = block; kernel.RunAsync(CUstream.NullStream, args); }
public static void InvokeReduceAll(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string kernelName, dim3 grid, dim3 block, uint smemSize, ApplySpecialization spec, params object[] args) { ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args); CudaKernel kernel = context.KernelCache.Get(cudaContext, ptx, kernelName); kernel.GridDimensions = grid; kernel.BlockDimensions = block; kernel.DynamicSharedMemory = smemSize; kernel.Run(args); }
private void Invoke(TSCudaContext context, CudaContext cudaContext, string kernelName, dim3 grid, dim3 block, uint smemSize, CUstream stream, bool index32, params object[] args) { ConvertTensorArgs.Convert(cudaContext, index32, args); byte[] ptx = GetPtx(context.Compiler); CudaKernel kernel = context.KernelCache.Get(cudaContext, ptx, kernelName); kernel.GridDimensions = grid; kernel.BlockDimensions = block; kernel.DynamicSharedMemory = smemSize; kernel.RunAsync(stream, args); }
/// <summary> /// Invokes the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="cudaContext">The cuda context.</param> /// <param name="ptx">The PTX.</param> /// <param name="baseName">Name of the base.</param> /// <param name="args">The arguments.</param> public static void Invoke(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string baseName, params object[] args) { ThrowIfAnyTensorInvalid(args); var deviceInfo = context.DeviceInfoForContext(cudaContext); var allTensors = args.OfType <NDArray>(); var firstTensor = allTensors.First(); var elementCount = firstTensor.ElementCount(); var spec = new ApplySpecialization(allTensors.ToArray()); ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args); var block = ApplyUtils.GetApplyBlock(); var grid = ApplyUtils.GetApplyGrid(deviceInfo, elementCount); var fullKernelName = PermutationGenerator.GetMangledName(baseName, spec); var kernel = context.KernelCache.Get(cudaContext, ptx, fullKernelName); kernel.GridDimensions = grid; kernel.BlockDimensions = block; kernel.RunAsync(CUstream.NullStream, args); }