private Accelerator GetGpu(Context context, bool prefCPU = false) { var groupedAccelerators = Accelerator.Accelerators .GroupBy(x => x.AcceleratorType) .ToDictionary(x => x.Key, x => x.ToList()); if (prefCPU) { return(new CPUAccelerator(context)); } else { if (groupedAccelerators.TryGetValue(AcceleratorType.Cuda, out var nv)) { return(Accelerator.Create(context, nv[0])); } if (groupedAccelerators.TryGetValue(AcceleratorType.OpenCL, out var cl)) { return(Accelerator.Create(context, cl[0])); } //fallback return(new CPUAccelerator(context)); } }
/// <summary> /// Performs different memory allocations and operations on all available accelerators. /// Note that a MemoryBuffer<T> can only be constructed for blittable T (see /// "https://msdn.microsoft.com/de-de/library/75dwhxf7(v=vs.110).aspx", the gist of /// which is that bool, char, and class types are not allowed). /// Furthermore, all buffers have to be disposed before their associated accelerator is disposed! /// </summary> static void Main(string[] args) { // Create main context using (var context = new Context()) { // Perform memory allocations and operations on all available accelerators foreach (var acceleratorId in Accelerator.Accelerators) { using (var accelerator = Accelerator.Create(context, acceleratorId)) { // Note: // - You can only transfer contiguous chunks of memory to and from memory buffers. // A transfer of non-contiguous chunks of memory results in undefined buffer contents. // - The memory layout of multi-dimensional arrays is different to the default memory layout of // a multi-dimensional array in the .Net framework. Addressing a 2D buffer, for example, // works as follows: y * width + x, where the buffer has dimensions (width, height). // - All allocated buffers have to be disposed before their associated accelerator is disposed. // - You have to keep a reference to the allocated buffer for as long as you want to access it. // Otherwise, the GC might dispose it. SampleInitialization(accelerator); Alloc1D(accelerator); Alloc2D(accelerator); Alloc3D(accelerator); DirectAccessFromCPU(accelerator); } } } }
/// <summary> /// gets the g p u accelerator. /// </summary> /// <returns>An Accelerator.</returns> private static void getGPUAccelerator() { if (gpuAccelerator != null) { return; } if (CudaAccelerator.CudaAccelerators.Length > 0) { if (context == null) { context = new Context(); } gpuAccelerator = Accelerator.Create(context, CudaAccelerator.CudaAccelerators[0]); return; } foreach (CLAcceleratorId aid in CLAccelerator.CLAccelerators) { if (aid.DeviceType == ILGPU.Runtime.OpenCL.API.CLDeviceType.CL_DEVICE_TYPE_GPU) { if (context == null) { context = new Context(); } gpuAccelerator = Accelerator.Create(context, aid); } } }
/// <summary> /// Demonstrates the use of a custom index type to work with indexed memory. /// </summary> static void Main(string[] args) { using (var context = new Context()) { // Perform memory allocations and operations on all available accelerators foreach (var acceleratorId in Accelerator.Accelerators) { using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); // Note: // - You can only transfer contiguous chunks of memory to and from memory buffers. // A transfer of non-contiguous chunks of memory results in undefined buffer contents. // - The memory layout of multi-dimensional arrays is different to the default memory layout of // a multi-dimensional array in the .Net framework. Addressing a 2D buffer, for example, // works as follows: y * width + x, where the buffer has dimensions (width, height). // - All allocated buffers have to be disposed before their associated accelerator is disposed. // - You have to keep a reference to the allocated buffer for as long as you want to access it. // Otherwise, the GC might dispose it. AllocND(accelerator, (idx, dimension) => idx.ComputeLinearIndex(dimension)); AllocND(accelerator, (idx, dimension) => (long)idx.ComputeLinearIndex(dimension)); var kernel = accelerator.LoadAutoGroupedStreamKernel <Index, ArrayView <int, MyIndex4> >(MyKernelND); using (var buffer = accelerator.Allocate <int, MyIndex4>(Dimension)) { kernel(Dimension.Size, buffer.View); accelerator.Synchronize(); } } } } }
static void Main(string[] args) { using (var context = new Context()) { // For each available accelerator... (without CPU) foreach (var acceleratorId in Accelerator.Accelerators.Where(id => id.AcceleratorType != AcceleratorType.CPU)) { using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); Reduce(accelerator); AtomicReduce(accelerator); } } // Create custom CPU context with a warp size > 1 using (var accelerator = new CPUAccelerator(context, 4, 4)) { Console.WriteLine($"Performing operations on {accelerator}"); Reduce(accelerator); AtomicReduce(accelerator); } } }
public void Initialize(Context context, AcceleratorId acceleratorId, double[,] independents, double[] dependants) { AcceleratorId = acceleratorId; AcceleratorType acceleratorType = AcceleratorId.AcceleratorType; if (acceleratorType == AcceleratorType.CPU) { Accelerator = Accelerator.Create(context, AcceleratorId); } else if (acceleratorType == AcceleratorType.OpenCL) { Accelerator = CLAccelerator.Create(context, AcceleratorId); } else if (acceleratorType == AcceleratorType.Cuda) { Accelerator = CudaAccelerator.Create(context, AcceleratorId); } EvaluationKernel = Accelerator.LoadAutoGroupedStreamKernel <Index2, ArrayView2D <double>, ArrayView <double>, ArrayView <NodeGPU>, ArrayView <int>, ArrayView2D <double> >(EvaluationKernelFunction); ProcessResultsKernel = Accelerator.LoadAutoGroupedStreamKernel <Index1, ArrayView2D <double>, ArrayView <double> >(ProcessResultsKernelFunction); IndependentsTableSize = new Index2(independents.GetUpperBound(0) + 1, independents.GetUpperBound(1) + 1); Independents = Accelerator.Allocate <double>(IndependentsTableSize); Independents.CopyFrom(independents, new Index2(), new Index2(), IndependentsTableSize); Dependants = Accelerator.Allocate <double>(dependants.Length); Dependants.CopyFrom(dependants, 0, 0, dependants.Length); }
/// <summary> /// Detects all available accelerators and prints device information about each /// of them on the command line. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id. // Note that all accelerators have to be disposed before the global context is disposed using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"AcceleratorId: {acceleratorId.AcceleratorType}, {accelerator.Name}"); PrintAcceleratorInfo(accelerator); Console.WriteLine(); } } // Accelerators can also be created manually with custom settings. // The following code snippet creates a CPU accelerator with 4 threads // and highest thread priority. using (var accelerator = new CPUAccelerator(context, 4, ThreadPriority.Highest)) { PrintAcceleratorInfo(accelerator); } } }
private static void Main(string[] args) { using var context = new Context(); foreach (var acceleratorId in Accelerator.Accelerators) { using var accelerator = Accelerator.Create(context, acceleratorId); Console.WriteLine($"Performing operations on {accelerator}"); var kernel = accelerator.LoadAutoGroupedStreamKernel <Index1, ArrayView <int>, int>(MyKernel); using var buffer = accelerator.Allocate <int>(1024); kernel(buffer.Length, buffer.View, 42); accelerator.Synchronize(); var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } Console.WriteLine("Done!"); }
public void GenerateHeightField(TerrainPatch patch) { using (var context = new Context()) { AcceleratorId aid = Accelerator.Accelerators.Where(id => id.AcceleratorType == AcceleratorType.Cuda).FirstOrDefault(); if (aid.AcceleratorType != AcceleratorType.Cuda) { Console.WriteLine(@"There is no CUDA accelerator present. Doing nothing."); return; } using (var accelerator = Accelerator.Create(context, aid)) using (var gpu_dem = accelerator.Allocate <short>(ViperEnvironment.Terrain.Data.Length)) using (var gpu_range = accelerator.Allocate <float>(cpu_range.Length)) using (var gpu_slope = accelerator.Allocate <float>(cpu_slope.Length)) using (var gpu_rise = accelerator.Allocate <short>(TerrainPatch.DefaultSize * TerrainPatch.DefaultSize)) { gpu_dem.CopyFrom(ViperEnvironment.Terrain.Data, 0, 0, ViperEnvironment.Terrain.Data.Length); gpu_range.CopyFrom(cpu_range, 0, 0, cpu_range.Length); var launchDimension = new Index2(TerrainPatch.DefaultSize, TerrainPatch.DefaultSize); var kernel1 = accelerator.LoadStreamKernel <Index2, ArrayView <short>, ArrayView <float>, ArrayView <short>, int, int>(RiseKernel1); } } }
/// <summary> /// Launches a simple 1D kernel using implicit and auto-grouping functionality. /// This sample demonstates the creation of launcher delegates in order to avoid boxing. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); // Compiles and launches an implicitly-grouped kernel with an automatically // determined group size. The latter is determined either by ILGPU or // the GPU driver. This is the most convenient way to launch kernels using ILGPU. CompileAndLaunchAutoGroupedKernel(accelerator); // Compiles and launches an implicitly-grouped kernel with a custom group // size. Note that a group size less than the warp size can cause // dramatic performance decreases since many lanes of a warp might remain // unused. CompileAndLaunchImplicitlyGroupedKernel(accelerator, accelerator.WarpSize); // Compiles and launches an explicitly-grouped kernel with a custom group // size. CompileAndLaunchKernel(accelerator, accelerator.WarpSize); } } } }
private Accelerator GetGpu(Context context, bool prefCPU = false) { var groupedAccelerators = Accelerator.Accelerators .GroupBy(x => x.AcceleratorType) .ToDictionary(x => x.Key, x => x.ToList()); if (prefCPU) { return(new CPUAccelerator(context)); } if (groupedAccelerators.TryGetValue(AcceleratorType.Cuda, out var nv)) { return(Accelerator.Create(context, nv[0])); } if (groupedAccelerators.TryGetValue(AcceleratorType.OpenCL, out var cl)) { return(Accelerator.Create(context, cl[0])); } //fallback Console.WriteLine("Warning : Could not find gpu, falling back to Default device"); return(new CPUAccelerator(context)); }
/// <summary> /// Demonstrates generic kernel functions to simulate lambda closures via generic types. /// </summary> static void Main() { const int DataSize = 1024; using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); var kernel = accelerator.LoadAutoGroupedStreamKernel < Index1, ArrayView <long>, int, LambdaClosure>(Kernel); using (var buffer = accelerator.Allocate <long>(DataSize)) { kernel(buffer.Length, buffer.View, 1, new LambdaClosure(20)); var data = buffer.GetAsArray(); } } } } }
static void Main() { using (var context = new Context()) { // Enable algorithms library context.EnableAlgorithms(); // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create the associated accelerator using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); var kernel = accelerator.LoadStreamKernel <ArrayView2D <int> >(KernelWithWarpExtensions); using (var buffer = accelerator.Allocate <int>(accelerator.WarpSize, 4)) { kernel((1, buffer.Width), buffer.View); accelerator.Synchronize(); var data = buffer.GetAs2DArray(); for (int i = 0, e = data.GetLength(0); i < e; ++i) { for (int j = 0, e2 = data.GetLength(1); j < e2; ++j) { Console.WriteLine($"Data[{i}, {j}] = {data[i, j]}"); } } } } } } }
/// <summary> /// Demonstrates kernels using static properties to access grid and group indices. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); var groupSize = accelerator.MaxNumThreadsPerGroup; KernelConfig kernelConfig = (2, groupSize); using (var buffer = accelerator.Allocate <int>(kernelConfig.Size)) { var groupedKernel = accelerator.LoadStreamKernel <ArrayView <int>, int>(GroupedKernel); groupedKernel(kernelConfig, buffer.View, 64); accelerator.Synchronize(); Console.WriteLine("Default grouped kernel"); var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } } } } }
static void Main() { using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // A lightning context encapsulates an ILGPU accelerator using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); using (var buffer = accelerator.Allocate <int>(64)) { // Initializes the first half by setting the value to 42. // Note that in this case, the initializer uses the default accelerator stream. accelerator.Initialize(buffer.View.GetSubView(0, buffer.Length / 2), 42); // Initializes the second half by setting the value to 23. // Note that this overload requires an explicit accelerator stream. accelerator.Initialize(accelerator.DefaultStream, buffer.View.GetSubView(buffer.Length / 2), 23); accelerator.Synchronize(); var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } // Calling the convenient Initialize function on the lightning context // involves internal heap allocations. This can be avoided by constructing // an initializer explicitly: var initializer = accelerator.CreateInitializer <CustomStruct>(); using (var buffer2 = accelerator.Allocate <CustomStruct>(64)) { // We can now use the initializer without any further heap allocations // during the invocation. Note that the initializer requires an explicit // accelerator stream. initializer(accelerator.DefaultStream, buffer2.View, new CustomStruct() { First = 23, Second = 42 }); accelerator.Synchronize(); var data = buffer2.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data2[{i}] = {data[i]}"); } } } } } }
public static double[][] ComputeDistances(double[][] dataSet) { int numSample = dataSet.Length; int dim = dataSet[0].Length; double[] dataset = new double[numSample * dim]; for (int i = 0; i < numSample; i++) { for (int j = 0; j < dim; j++) { dataset[i + j * numSample] = dataSet[i][j]; } } foreach (var acceleratorId in Accelerator.Accelerators) { if (acceleratorId.AcceleratorType == AcceleratorType.Cuda) { // We will use the first CUDA device. using (var context = new Context()) using (var accelerator = Accelerator.Create(context, acceleratorId)) { var kernel = accelerator.LoadAutoGroupedStreamKernel < Index, ArrayView2D <double>, ArrayView2D <double> >(CosineSimilarityKernel); using (var gpuDistances = accelerator.Allocate <double>(numSample * numSample)) using (var gpuDataset = accelerator.Allocate <double>(numSample * dim)) { gpuDataset.CopyFrom(dataset, 0, 0, dataset.Length); // Launch buffer.Length many threads and pass a view to buffer // Note that the kernel launch does not involve any boxing var a = gpuDataset.As2DView(numSample, dim); var b = gpuDistances.As2DView(numSample, numSample); kernel(numSample * numSample, a, b); // Wait for the kernel to finish... accelerator.Synchronize(); // Resolve and verify data var data = gpuDistances.GetAsArray(); double[][] distancesVector = new double[numSample][]; for (int i = 0; i < numSample; i++) { distancesVector[i] = new double[numSample]; for (int j = 0; j < numSample; j++) { distancesVector[i][j] = data[i + j * numSample]; } } return(distancesVector); } } } } throw new Exception("No GPU found."); }
static TriangleGroupGpu() { context = new Context(); var cudaId = Accelerator.Accelerators.FirstOrDefault(acceleratorId => acceleratorId.AcceleratorType == AcceleratorType.Cuda); cuda = Accelerator.Create(context, cudaId); kernel = cuda.LoadAutoGroupedStreamKernel <Index1, ArrayView <TriangleData>, ArrayView <TriangleResult>, RayData>(ComputeKernel); }
static void Main() { using (var context = new Context()) { // Enable algorithms library context.EnableAlgorithms(); // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create the associated accelerator using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); using (var buffer = accelerator.Allocate <int>(64)) { // Initializes all values by setting the value to 23. accelerator.Initialize(accelerator.DefaultStream, buffer.View, 23); accelerator.Synchronize(); var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } // Calling the convenient Initialize function on the accelerator // involves internal heap allocations. This can be avoided by constructing // an initializer explicitly: var initializer = accelerator.CreateInitializer <CustomStruct>(); using (var buffer2 = accelerator.Allocate <CustomStruct>(64)) { // We can now use the initializer without any further heap allocations // during the invocation. Note that the initializer requires an explicit // accelerator stream. initializer(accelerator.DefaultStream, buffer2.View, new CustomStruct() { First = 23, Second = 42 }); accelerator.Synchronize(); var data = buffer2.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data2[{i}] = {data[i]}"); } } } } } }
static void Main() { using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); int groupSize = accelerator.MaxNumThreadsPerGroup; // Scenario 1: simple version using (var buffer = accelerator.Allocate <int>(groupSize)) { var kernel = accelerator.LoadStreamKernel < ArrayView <int>, SpecializedValue <int> >(SpecializedKernel); kernel((1, groupSize), buffer.View, SpecializedValue.New(2)); kernel((1, groupSize), buffer.View, SpecializedValue.New(23)); kernel((1, groupSize), buffer.View, SpecializedValue.New(42)); } // Scenario 2: custom structure using (var buffer = accelerator.Allocate <int>(groupSize)) { var kernel = accelerator.LoadStreamKernel < ArrayView <int>, SpecializedValue <CustomStruct> >(SpecializedCustomStructKernel); kernel( (1, groupSize), buffer.View, SpecializedValue.New( new CustomStruct(1, 7))); kernel( (1, groupSize), buffer.View, SpecializedValue.New( new CustomStruct(23, 42))); } // Scenario 3: generic kernel using (var buffer = accelerator.Allocate <long>(groupSize)) { var kernel = accelerator.LoadStreamKernel < ArrayView <long>, SpecializedValue <long> >(SpecializedGenericKernel); kernel((1, groupSize), buffer.View, SpecializedValue.New(23L)); kernel((1, groupSize), buffer.View, SpecializedValue.New(42L)); } } } } }
/// <summary> /// Demonstates different use cases of constants and static fields. /// </summary> static void Main() { // All kernels reject read accesses to write-enabled static fields by default. // However, you can disable this restriction via: // ContextFlags.InlineMutableStaticFieldValues. // All kernels reject write accesses to static fields by default. // However, you can skip such assignments by via: // ContextFlags.IgnoreStaticFieldStores. // Create main context using (var context = new Context( ContextFlags.InlineMutableStaticFieldValues | ContextFlags.IgnoreStaticFieldStores)) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); // Launch ConstantKernel: LaunchKernel( accelerator, ConstantKernel, ConstantValue); // Launch StaticFieldAccessKernel: LaunchKernel( accelerator, StaticFieldAccessKernel, ReadOnlyValue); // Launch StaticNonReadOnlyFieldAccessKernel while inlining static field values: WriteEnabledValue = DefaultWriteEnabledValue; LaunchKernel( accelerator, StaticNonReadOnlyFieldAccessKernel, DefaultWriteEnabledValue); // Note that a change of the field WriteEnabledValue will not change the result // of a previously compiled kernel that accessed the field WriteEnabledValue. // Launch StaticFieldWriteAccessKernel while ignoring static stores: // Note that the CPU accelerator will write to static field during execution! LaunchKernel( accelerator, StaticFieldWriteAccessKernel, null); } } } }
public GPU() { Buffers = new Dictionary <int, List <MemoryBuffer <double> > >(); Context = new Context(); var acceleratorId = Accelerator.Accelerators.First(a => a.AcceleratorType == AcceleratorType.Cuda); Accelerator = Accelerator.Create(Context, acceleratorId); KernelProduct = Accelerator.LoadAutoGroupedStreamKernel <Index, ArrayView2D <double>, ArrayView2D <double> >(Product); //KernelProduct2 = Accelerator.LoadAutoGroupedStreamKernel<Index, double[], double[], VariableView<double>>(KernelProduct2); }
/// <summary> /// Launches a simple 1D kernel using warp intrinsics. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); var dimension = new GroupedIndex(1, accelerator.WarpSize); using (var dataTarget = accelerator.Allocate <int>(accelerator.WarpSize)) { // Load the explicitly grouped kernel var shuffleDownKernel = accelerator.LoadStreamKernel <GroupedIndex, ArrayView <int> >(ShuffleDownKernel); dataTarget.MemSetToZero(); shuffleDownKernel(dimension, dataTarget.View); accelerator.Synchronize(); Console.WriteLine("Shuffle-down kernel"); var target = dataTarget.GetAsArray(); for (int i = 0, e = target.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {target[i]}"); } } using (var dataTarget = accelerator.Allocate <long>(accelerator.WarpSize)) { // Load the explicitly grouped kernel var reduceKernel = accelerator.LoadStreamKernel <GroupedIndex, ArrayView <long> >( ShuffleDownKernel <ShuffleDownInt64>); dataTarget.MemSetToZero(); reduceKernel(dimension, dataTarget.View); accelerator.Synchronize(); Console.WriteLine("Generic shuffle-down kernel"); var target = dataTarget.GetAsArray(); for (int i = 0, e = target.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {target[i]}"); } } } } } }
public Image <Rgba32> AddFilter(IImageFilter filter) { using (var context = new Context()) { var acceleratorId = Accelerator.Accelerators.Where(a => a.AcceleratorType == AcceleratorType.CPU) .FirstOrDefault(); using (var accelerator = Accelerator.Create(context, acceleratorId)) { return(filter.PerformFilter(accelerator)); } } }
/// <summary> /// Launches a simple 1D kernel. /// </summary> static void Main() { Console.WriteLine("Hello before ILGPU starts"); // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); // Compiles and loads the implicitly grouped kernel with an automatically determined // group size and an associated default stream. // This function automatically compiles the kernel (or loads the kernel from cache) // and returns a specialized high-performance kernel launcher. // Use LoadAutoGroupedKernel to create a launcher that requires an additional accelerator-stream // parameter. In this case the corresponding call will look like this: // var kernel = accelerator.LoadautoGroupedKernel<Index, ArrayView<int>, int>(MyKernel); // For more detail refer to the ImplicitlyGroupedKernels or ExplicitlyGroupedKernels sample. var kernel = accelerator.LoadAutoGroupedStreamKernel < ILGPU.Index, ArrayView <int>, int>(MyKernel); using (var buffer = accelerator.Allocate <int>(1024)) { // Launch buffer.Length many threads and pass a view to buffer // Note that the kernel launch does not involve any boxing kernel(buffer.Length, buffer.View, 42); // Wait for the kernel to finish... accelerator.Synchronize(); // Resolve and verify data var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } } } } }
public void Test1() { using (var context = new Context()) { var cudaid = Accelerator.Accelerators.Where(id => id.AcceleratorType == AcceleratorType.Cuda).FirstOrDefault(); if (cudaid.AcceleratorType != AcceleratorType.Cuda) { Console.WriteLine(@"There is no CUDA accelerator present. Doing nothing."); return; } using (var cuda = Accelerator.Create(context, cudaid)) { Console.WriteLine($"Performing operations on {cuda}"); PrintAcceleratorInfo(cuda); } } }
// This method will be called for each input received from the pipeline to this cmdlet; if no input is received, this method is not called protected override void ProcessRecord() { var context = new Context(); // For each available accelerator... int id = 0; foreach (var acceleratorId in Accelerator.Accelerators) { var accelerator = Accelerator.Create(context, acceleratorId); // Output the accelerator information WriteObject(new { Id = id, Name = accelerator.Name, Type = acceleratorId.AcceleratorType, MaxNumThreads = accelerator.MaxNumThreads }); id++; } }
static void Main() { using (var context = new Context()) { // Enable algorithms library context.EnableAlgorithms(); // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create the associated accelerator using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); using (var buffer = accelerator.Allocate <float>(64)) { void WriteData() { accelerator.Synchronize(); var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } Console.WriteLine(nameof(KernelWithXMath)); var xmathKernel = accelerator.LoadAutoGroupedStreamKernel <Index1, ArrayView <float>, float>( KernelWithXMath); xmathKernel(buffer.Length, buffer, 0.1f); WriteData(); Console.WriteLine(nameof(KernelWithMath)); var mathKernel = accelerator.LoadAutoGroupedStreamKernel <Index1, ArrayView <float>, float>( KernelWithMath); mathKernel(buffer.Length, buffer, 0.1f); WriteData(); } } } } }
/// <summary> /// Launches a simple 1D kernel using implicit and auto-grouping functionality. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); // Compiles and launches an implicitly-grouped kernel with an automatically // determined group size. The latter is determined either by ILGPU or // the GPU driver. This is the most convenient way to launch kernels using ILGPU. // Accelerator.LoadAutoGroupedStreamKernel creates a typed launcher // that implicitly uses the default accelerator stream. // In order to create a launcher that receives a custom accelerator stream // use: accelerator.LoadAutoGroupedKernel<Index, ArrayView<int>, int>(...) var myAutoGroupedKernel = accelerator.LoadAutoGroupedStreamKernel < Index1, ArrayView <int>, int>(MyKernel); LaunchKernel(accelerator, myAutoGroupedKernel); // Compiles and launches an implicitly-grouped kernel with a custom group // size. Note that a group size less than the warp size can cause // dramatic performance decreases since many lanes of a warp might remain // unused. // Accelerator.LoadImplicitlyGroupedStreamKernel creates a typed launcher // that implicitly uses the default accelerator stream. // In order to create a launcher that receives a custom accelerator stream // use: accelerator.LoadImplicitlyGroupedKernel<Index, ArrayView<int>, int>(...) var myImplicitlyGroupedKernel = accelerator.LoadImplicitlyGroupedStreamKernel < Index1, ArrayView <int>, int>(MyKernel, accelerator.WarpSize); LaunchKernel(accelerator, myImplicitlyGroupedKernel); } } } }
public void BenchmarkGpuAccelerator() { using (var context = new Context()) { foreach (var acceleratorId in Accelerator.Accelerators) { if (acceleratorId.AcceleratorType == AcceleratorType.Cuda) { using var accelerator = Accelerator.Create(context, acceleratorId); //Console.WriteLine($"Performing operations on {accelerator}"); var kernel = accelerator.LoadAutoGroupedStreamKernel <Index1, ArrayView <float>, int>(MyKernel); using (var buffer = accelerator.Allocate <float>(1000000)) { Stopwatch sw = new Stopwatch(); sw.Start(); // Launch buffer.Length many threads and pass a view to buffer // Note that the kernel launch does not involve any boxing kernel(buffer.Length, buffer.View, 42); // Wait for the kernel to finish... accelerator.Synchronize(); sw.Stop(); Console.WriteLine($"GPU: {sw.ElapsedTicks}"); // Resolve and verify data var data = buffer.GetAsArray(); /* * for (int i = 0, e = data.Length; i < e; ++i) * { * if (data[i] != 42 + i) * Console.WriteLine($"Error at element location {i}: {data[i]} found"); * } */ } } } } }
static void Main(string[] args) { using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); Sequence(accelerator); RepeatedSequence(accelerator); BatchedSequence(accelerator); RepeatedBatchedSequence(accelerator); } } } }