/// <summary> /// Demonstrates kernels using static properties to access grid and group indices. /// </summary> static void Main() { // Create main context using (var context = Context.CreateDefault()) { // For each available device... foreach (var device in context) { // Create accelerator for the given device using (var accelerator = device.CreateAccelerator(context)) { Console.WriteLine($"Performing operations on {accelerator}"); var groupSize = accelerator.MaxNumThreadsPerGroup; KernelConfig kernelConfig = (2, groupSize); using (var buffer = accelerator.Allocate1D <int>(kernelConfig.Size)) { var groupedKernel = accelerator.LoadStreamKernel <ArrayView <int>, int>(GroupedKernel); groupedKernel(kernelConfig, buffer.View, 64); accelerator.Synchronize(); Console.WriteLine("Default grouped kernel"); var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } } } } }
public void GroupedIndex3EntryPoint(int length) { var end = (int)Math.Pow(Accelerator.MaxNumThreadsPerGroup, 1.0 / 3.0); for (int i = 1; i <= end; i <<= 1) { var stride = new Index3D(i, i, i); var extent = new KernelConfig( new Index3D(length, length, length), stride); using var buffer = Accelerator.Allocate1D <int>(extent.Size); buffer.MemSetToZero(Accelerator.DefaultStream); Execute(extent, buffer.View, stride, extent.GridDim); var expected = new int[extent.Size]; for (int j = 0; j < length * length * length; ++j) { var gridIdx = Index3D.ReconstructIndex(j, extent.GridDim); for (int k = 0; k < i * i * i; ++k) { var groupIdx = Index3D.ReconstructIndex(k, extent.GroupDim); var idx = (gridIdx * stride + groupIdx).ComputeLinearIndex( extent.GridDim); expected[idx] = idx; } } Verify(buffer.View, expected); } }
public void GroupDivergentControlFlow(int length) { // IMPORTANT: Iteration range has been limited to the warp size of the // accelerator. // // Some OpenCL drivers have been known to deadlock when the group dimensions // are larger than the warp size. It is also important to use the latest // drivers. // // e.g. Intel HD Graphics 630 drivers for OpenCL v1.2, with WarpSize = 16 // v21.20.16.4550 deadlocks when group dimensions are larger than 8 // v26.20.100.7263 deadlocks when group dimensions are larger than 16 // for (int i = 2; i <= Accelerator.WarpSize; i <<= 1) { using var buffer = Accelerator.Allocate <int>(length * i); buffer.MemSetToZero(); Accelerator.Synchronize(); var extent = new KernelConfig(length, i); Execute(extent, buffer.View); var expected = Enumerable.Repeat(Enumerable.Range(0, i), length) .SelectMany(x => x).ToArray(); Verify(buffer, expected); } }
/// <summary> /// Demonstrates kernels using static properties to access grid and group indices. /// </summary> static void Main() { // Create main context using var context = Context.CreateDefault(); // For each available device... foreach (var device in context) { // Create accelerator for the given device using var accelerator = device.CreateAccelerator(context); Console.WriteLine($"Performing operations on {accelerator}"); var groupSize = accelerator.MaxNumThreadsPerGroup; KernelConfig kernelConfig = (2, groupSize); using var buffer = accelerator.Allocate1D <int>(kernelConfig.Size); var groupedKernel = accelerator.LoadStreamKernel <ArrayView <int>, int>(GroupedKernel); groupedKernel(kernelConfig, buffer.View, 64); // Reads data from the GPU buffer into a new CPU array. // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure // that the kernel and memory copy are completed first. Console.WriteLine("Default grouped kernel"); var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {data[i]}"); } } }
/// <summary> /// Constructs a new accelerator task. /// </summary> /// <param name="kernelExecutionDelegate">The execution method.</param> /// <param name="userConfig">The user-defined grid configuration.</param> /// <param name="config">The global task configuration.</param> public CPUAcceleratorTask( CPUKernelExecutionHandler kernelExecutionDelegate, KernelConfig userConfig, RuntimeKernelConfig config) { Debug.Assert( kernelExecutionDelegate != null, "Invalid execution delegate"); if (!userConfig.IsValid) { throw new ArgumentOutOfRangeException( nameof(userConfig), RuntimeErrorMessages.InvalidGridDimension); } if (!config.IsValid) { throw new ArgumentOutOfRangeException( nameof(config), RuntimeErrorMessages.InvalidGridDimension); } KernelExecutionDelegate = kernelExecutionDelegate; TotalUserDim = userConfig.GridDim * userConfig.GroupDim; GridDim = config.GridDim; GroupDim = config.GroupDim; DynamicSharedMemoryConfig = config.SharedMemoryConfig.DynamicConfig; }
public async Task ExecutesUpdateCycle() { // Arrange settings.UpdateInterval = TimeSpan.Zero; var telemetryClient = new TelemetryClient(); var runOnce = true; var kernel = KernelConfig.CreateKernel(); kernel.Rebind <string>() .ToConstant(databaseConnectionString) .WhenInjectedInto(typeof(NecroDancerContextOptionsBuilder), typeof(LeaderboardsStoreClient)); kernel.Rebind <ISteamClientApiClient>() .To <FakeSteamClientApiClient>() .InParentScope(); using (var context = kernel.Get <NecroDancerContext>()) { context.EnsureSeedData(); } var log = mockLog.Object; // Act using (var worker = new WorkerRole(settings, telemetryClient, runOnce, kernel, log)) { worker.Start(); await worker.Completion; } // Assert Assert.True(context.DailyLeaderboards.Any(l => l.LastUpdate != null)); }
public RuntimeKernelConfig( KernelConfig kernelConfig, SharedMemorySpecification specification) { GridDim = kernelConfig.GridDim; GroupDim = kernelConfig.GroupDim; SharedMemoryConfig = new RuntimeSharedMemoryConfig( specification, kernelConfig.SharedMemoryConfig); }
public void GroupBroadcast(int length) { for (int i = 2; i < Accelerator.MaxNumThreadsPerGroup; i <<= 1) { using var buffer = Accelerator.Allocate <int>(length * i); var extent = new KernelConfig(length, i); Execute(extent, buffer.View); var expected = Enumerable.Repeat(i - 1, buffer.Length).ToArray(); Verify(buffer, expected); } }
/// <summary> /// Launches a simple 1D kernel using warp intrinsics. /// </summary> static void Main() { // Create main context using var context = Context.CreateDefault(); // For each available device... foreach (var device in context) { // Create accelerator for the given device using var accelerator = device.CreateAccelerator(context); Console.WriteLine($"Performing operations on {accelerator}"); KernelConfig dimension = (1, accelerator.WarpSize); using (var dataTarget = accelerator.Allocate1D <int>(accelerator.WarpSize)) { // Load the explicitly grouped kernel var shuffleDownKernel = accelerator.LoadStreamKernel <ArrayView <int> >(ShuffleDownKernel); dataTarget.MemSetToZero(); shuffleDownKernel(dimension, dataTarget.View); // Reads data from the GPU buffer into a new CPU array. // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure // that the kernel and memory copy are completed first. Console.WriteLine("Shuffle-down kernel"); var target = dataTarget.GetAsArray1D(); for (int i = 0, e = target.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {target[i]}"); } } using (var dataTarget = accelerator.Allocate1D <ComplexStruct>(accelerator.WarpSize)) { // Load the explicitly grouped kernel var reduceKernel = accelerator.LoadStreamKernel <ArrayView <ComplexStruct>, ComplexStruct>( ShuffleGeneric); dataTarget.MemSetToZero(); reduceKernel(dimension, dataTarget.View, new ComplexStruct(2, 40.0f, 16.0)); // Reads data from the GPU buffer into a new CPU array. // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure // that the kernel and memory copy are completed first. Console.WriteLine("Generic shuffle kernel"); var target = dataTarget.GetAsArray1D(); for (int i = 0, e = target.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {target[i]}"); } } } }
public void MemoryFenceGroupLevel() { for (int i = 1; i < Accelerator.MaxNumThreadsPerGroup; i <<= 1) { var extent = new KernelConfig(Length, i); using var buffer = Accelerator.Allocate <int>(extent.Size); Execute(extent, buffer.View); var expected = Enumerable.Range(0, (int)extent.Size).ToArray(); Verify(buffer, expected); } }
/// <summary> /// Launches a simple 1D kernel using warp intrinsics. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); KernelConfig dimension = (1, accelerator.WarpSize); using (var dataTarget = accelerator.Allocate <int>(accelerator.WarpSize)) { // Load the explicitly grouped kernel var shuffleDownKernel = accelerator.LoadStreamKernel <ArrayView <int> >(ShuffleDownKernel); dataTarget.MemSetToZero(); shuffleDownKernel(dimension, dataTarget.View); accelerator.Synchronize(); Console.WriteLine("Shuffle-down kernel"); var target = dataTarget.GetAsArray(); for (int i = 0, e = target.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {target[i]}"); } } using (var dataTarget = accelerator.Allocate <ComplexStruct>(accelerator.WarpSize)) { // Load the explicitly grouped kernel var reduceKernel = accelerator.LoadStreamKernel <ArrayView <ComplexStruct>, ComplexStruct>( ShuffleGeneric); dataTarget.MemSetToZero(); reduceKernel(dimension, dataTarget.View, new ComplexStruct(2, 40.0f, 16.0)); accelerator.Synchronize(); Console.WriteLine("Generic shuffle kernel"); var target = dataTarget.GetAsArray(); for (int i = 0, e = target.Length; i < e; ++i) { Console.WriteLine($"Data[{i}] = {target[i]}"); } } } } } }
public KernelConfig GetConfig() { KernelConfig result = new KernelConfig(); result.Set("INDEX_TYPE", Use32BitIndices ? IndexType32 : IndexType64); for (int i = 0; i < TensorDims.Length; ++i) { char tensorName = (char)('A' + i); result.Set("DIMS" + tensorName, TensorDims[i].ToString()); } return(result); }
public void GroupBarrier(int length) { for (int i = 1; i <= Accelerator.MaxNumThreadsPerGroup; i <<= 1) { using var buffer = Accelerator.Allocate <int>(length * i); var extent = new KernelConfig( length, i); Execute(extent, buffer.View); var expected = Enumerable.Range(0, length * i).ToArray(); Verify(buffer, expected); } }
public void MoveLoadsBarrier() { using var source = Accelerator.Allocate1D <int>(Length + 1); using var target = Accelerator.Allocate1D <Int2>(Length); Initialize(source.View, 23); KernelConfig config = (1, Length); Execute(config, source.View, target.View, 13); var expected = Enumerable.Repeat(new Int2(23, 36), Length).ToArray(); Verify(target.View, expected); }
/// <summary> /// Creates the kernel that will manage your application. /// </summary> /// <returns>The created kernel.</returns> private static IKernel CreateKernel() { var kernel = new StandardKernel(); kernel.Bind <Func <IKernel> >().ToMethod(ctx => () => new Bootstrapper().Kernel); kernel.Bind <IHttpModule>().To <HttpApplicationInitializationHttpModule>(); KernelConfig.RegisterDependenciesFor(kernel, Infrastructure.Constants.HostType.SSRS); // register dependencies // install ninject based dependency resolver into the web api configuration GlobalConfiguration.Configuration.DependencyResolver = new NinjectWebApiDependencyResolver(kernel); return(kernel); }
public void ExceedGroupSize() { var groupSize = Accelerator.MaxNumThreadsPerGroup + 1; var extent = new KernelConfig(2, groupSize); Action act = () => Execute(extent, 0); act.Should().Throw <Exception>() .Which.GetBaseException() .Should().Match(x => x is CudaException || x is CLException || x is NotSupportedException); }
public void WarpBroadcast(int length) { var warpSize = Accelerator.WarpSize; using var buffer = Accelerator.Allocate <int>(length * warpSize); var extent = new KernelConfig( length, warpSize); Execute(extent, buffer.View); var expected = Enumerable.Repeat(warpSize - 1, length * warpSize).ToArray(); Verify(buffer, expected); }
public void WarpBarrier(int length) { var warpSize = Accelerator.WarpSize; using var buffer = Accelerator.Allocate1D <int>(length * warpSize); var extent = new KernelConfig( length, warpSize); Execute(extent, buffer.View); var expected = Enumerable.Range(0, length * warpSize).ToArray(); Verify(buffer.View, expected); }
protected override void OnStartup(StartupEventArgs e) { KernelConfig.RegisterInstance <IWindowsVolumeListenerService>(new WindowsVolumeListenerService()); var encryptionService = new EncryptionService(); var videoFileService = new VideoFileService(); KernelConfig.RegisterInstance <IWindowService>(new WindowService()); KernelConfig.RegisterInstance <IEncryptionService>(encryptionService); KernelConfig.RegisterInstance <IVideoFileService>(videoFileService); KernelConfig.RegisterInstance <IConfigurationFileService>(new ConfigurationFileService(encryptionService, videoFileService)); KernelConfig.RegisterInstance(); base.OnStartup(e); }
public void MoveLoadsStoresAddressSpaces() { using var source = Accelerator.Allocate1D <int>(Length + 1); using var target = Accelerator.Allocate1D <Int2>(Length); Initialize(source.View, 23); Initialize(target.View, new Int2(1, 2)); KernelConfig config = (1, Length); Execute(config, source.View, target.View); var expected = Enumerable.Repeat(new Int2(65, 23), Length).ToArray(); Verify(target.View, expected); }
public void GroupBarrierPopCount(int length) { for (int i = 2; i <= Accelerator.MaxNumThreadsPerGroup; i <<= 1) { using var buffer = Accelerator.Allocate <int>(length * i); using var buffer2 = Accelerator.Allocate <int>(length * i); var extent = new KernelConfig(length, i); Execute(extent, buffer.View, buffer2.View, new Index1(i)); var expected = Enumerable.Repeat(i, (int)buffer.Length).ToArray(); Verify(buffer, expected); var expected2 = Enumerable.Repeat(0, (int)buffer.Length).ToArray(); Verify(buffer2, expected2); } }
public VideosTransfererService() { var encryptionService = new EncryptionService(); _videoFileService = new VideoFileService(); _configurationFileService = new ConfigurationFileService(encryptionService, _videoFileService); m_WindowsVolumeListenerService = new WindowsVolumeListenerService(); _logger = new FileLogger(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Logs")); KernelConfig.RegisterInstance <ILogger>(_logger); KernelConfig.RegisterInstance <IEncryptionService>(encryptionService); KernelConfig.RegisterInstance <IVideoFileService>(_videoFileService); KernelConfig.RegisterInstance <IConfigurationFileService>(_configurationFileService); KernelConfig.RegisterInstance <IWindowsVolumeListenerService>(m_WindowsVolumeListenerService); InitializeComponent(); }
public void GroupBarrierOr(int length) { Skip.If(length > Accelerator.MaxNumThreadsPerGroup); for (int i = 2; i <= Accelerator.MaxNumThreadsPerGroup; i <<= 1) { using var buffer = Accelerator.Allocate1D <int>(length * i); var extent = new KernelConfig(length, i); Execute(extent, buffer.View, new Index1D(1)); var expected = Enumerable.Repeat(1, (int)buffer.Length).ToArray(); Verify(buffer.View, expected); Execute(extent, buffer.View, new Index1D(0)); expected = Enumerable.Repeat(0, (int)buffer.Length).ToArray(); Verify(buffer.View, expected); } }
public static float[] RunMatrixMulShared(float[][] a, float[][] b, int N, ref Stopwatch sw) { //Create context and accelerator var gpu = new CudaAccelerator(new Context()); //Create typed launcher var matrixMulKernelShared = gpu.LoadStreamKernel < ArrayView <float>, ArrayView <float>, ArrayView <float>, int>(MatrixMulShared); //Allocate memory var buffSize = N * N; MemoryBuffer <float> d_a = gpu.Allocate <float>(buffSize); MemoryBuffer <float> d_b = gpu.Allocate <float>(buffSize); MemoryBuffer <float> d_c = gpu.Allocate <float>(buffSize); d_a.CopyFrom(FlatternArr(a), 0, Index1.Zero, buffSize); d_b.CopyFrom(FlatternArr(b), 0, Index1.Zero, buffSize); //Groups per grid dimension int GrPerDim = (int)Math.Ceiling((float)N / groupSize); KernelConfig dimension = ( new Index2(GrPerDim, GrPerDim), // Number of groups new Index2(groupSize, groupSize)); // Group size (thread count in group) sw.Restart(); matrixMulKernelShared(dimension, d_a.View, d_b.View, d_c.View, N); // Wait for the kernel to finish... gpu.Synchronize(); sw.Stop(); var c = d_c.GetAsArray(); return(c); }
public void GroupDimension3D() { var end = (int)Math.Pow(Accelerator.MaxNumThreadsPerGroup, 1.0 / 3.0); for (int i = 1; i <= end; i <<= 1) { using var buffer = Accelerator.Allocate <int>(3); var extent = new KernelConfig( new Index3(1, 1, 1), new Index3(i, i, i)); Execute(extent, buffer.View); var expected = new int[] { extent.GroupDim.X, extent.GroupDim.Y, extent.GroupDim.Z, }; Verify(buffer, expected); } }
public void GroupedIndex1EntryPoint(int length) { for (int i = 1; i < Accelerator.MaxNumThreadsPerGroup; i <<= 1) { var extent = new KernelConfig(length, i); using var buffer = Accelerator.Allocate1D <int>(extent.Size); Execute(extent, buffer.View, i); var expected = new int[extent.Size]; for (int j = 0; j < length; ++j) { for (int k = 0; k < i; ++k) { var idx = j * i + k; expected[idx] = idx; } } Verify(buffer.View, expected); } }
public void GroupDimension1D(int xMask, int yMask, int zMask) { for (int i = 2; i <= Math.Min(8, Accelerator.MaxNumThreadsPerGroup); i <<= 1) { using var buffer = Accelerator.Allocate <int>(3); var extent = new KernelConfig( new Index3(1, 1, 1), new Index3( Math.Max(i * xMask, 1), Math.Max(i * yMask, 1), Math.Max(i * zMask, 1))); Execute(extent, buffer.View); var expected = new int[] { extent.GroupDim.X, extent.GroupDim.Y, extent.GroupDim.Z, }; Verify(buffer, expected); } }
public void GridDimension(int xMask, int yMask, int zMask) { for (int i = 2; i <= Accelerator.MaxNumThreadsPerGroup; i <<= 1) { using var buffer = Accelerator.Allocate1D <int>(3); var extent = new KernelConfig( new Index3D( Math.Max(i * xMask, 1), Math.Max(i * yMask, 1), Math.Max(i * zMask, 1)), Index3D.One); Execute(extent, buffer.View); var expected = new int[] { extent.GridDim.X, extent.GridDim.Y, extent.GridDim.Z, }; Verify(buffer.View, expected); } }
public unsafe BufferedFastMatrix <T> AddShared(BufferedFastMatrix <T> one, BufferedFastMatrix <T> two) { if (one == null || two == null) { throw new ArgumentNullException(); } if ((one.Rows != two.Rows) || (one.Columns != two.Columns)) { throw new BadDimensionException(one.Rows, one.Columns, two.Rows, two.Columns); } MemoryBuffer2D <T> resultBuffer; one.CopyToGPU(); two.CopyToGPU(); resultBuffer = accelerator.Allocate <T>(one.Rows, one.Columns); one.WaitForCopy(); two.WaitForCopy(); KernelConfig config = new KernelConfig(accelerator.MaxGridSize.X, accelerator.MaxNumThreadsPerGroup); AddSharedKernel(config, one.buffer.View, two.buffer.View, resultBuffer); Console.WriteLine(accelerator.MaxGridSize.X); accelerator.Synchronize(); var tempArray = resultBuffer.GetAs2DArray(); accelerator.Synchronize(); BufferedFastMatrix <T> returnMatrix = new BufferedFastMatrix <T>(tempArray); return(returnMatrix); }
protected override void OnStartup(object sender, StartupEventArgs e) { base.OnStartup(sender, e); HandleExceptions(() => { kernel.Bind <IWindowManager>().To <WindowManager>().InSingletonScope(); var kernelConfig = KernelConfig.EnableFor(kernel); kernel.Bind <IKernelConfig>().ToConstant(kernelConfig); IMessageBus messageBus = new MessageBus(); kernel.Bind <IMessageBus>().ToConstant(messageBus); kernelConfig.AddPostInitializeActivations( (context, reference) => { if (reference.Instance is IHandle) { messageBus.Subscribe(reference.Instance); } }, (context, reference) => { if (reference.Instance is IHandle) { messageBus.Unsubscribe(reference.Instance); } }); var conventionBindingManager = new ConventionBindingManager( kernel, new[] { this.GetType().Assembly }.Concat(pluginManager.PluginAssemblies).ToArray()); conventionBindingManager.BindAssembliesByConvention(); var persistentDataManager = kernel.Get <PersistenceEnabler>(); persistentDataManager.SetupPersistenceActivation(); var logger = GetLogger(); var backupManager = kernel.Get <BackupManager>(); var userSettings = kernel.Get <DataBackupUserSettings>(); if (userSettings.CreateNewBackupRequested) { backupManager.CreateDataBackup(); userSettings.CreateNewBackupRequested = false; userSettings.Save(); } if (TryRestoreDataBackupIfRequested(backupManager, userSettings, logger)) { // For the persistence systems to properly initialize, restart is required. RestartCurrentApp(); return; } backupManager.CreateTodaysDataBackupIfNotExists(); backupManager.TrimOldDataBackups(userSettings.BackupRetentionTreshhold); backupManager.LockRestoreDataBackupForThisSession(); var appMigrationsManager = kernel.Get <IAppMigrationsManager>(); appMigrationsManager.RunMigrations(); var featureManager = kernel.Get <IFeaturesManager>(); featureManager.InitFeaturesAsync(); DisplayRootViewFor <MainViewModel>(); var appManager = kernel.Get <AppRuntimeManager>(); appManager.ExecuteAfterStartupSteps(); foreach (var dllLoadError in pluginManager.DllLoadErrors) { logger.Error(dllLoadError.Exception, "Failed to load plugin DLL: " + dllLoadError.DllFileName); } System.Windows.Forms.Application.ThreadException += ApplicationOnThreadException; }); }