/// <summary> /// Uses the CPU accelerator to allocate pinned chunks of memory in CPU host memory. /// </summary> /// <param name="accelerator">The current accelerator.</param> /// <param name="dataSize">The number of elements to copy.</param> static void PerformPinnedCopyToCPUAccelerator(Accelerator accelerator, int dataSize) { using (var cpuAccl = new CPUAccelerator(accelerator.Context)) { // All buffers allocated through the CPUAccelerator class are automatically pinned // in memory to enable async memory transfers via AcceleratorStreams using (var pinnedCPUBuffer = cpuAccl.Allocate <int>(dataSize)) { var stream = accelerator.DefaultStream; // Allocate buffer on this device using (var bufferOnGPU = accelerator.Allocate <int>(pinnedCPUBuffer.Length)) { // Use an accelerator stream to perform an async copy operation. // Note that you should use the CopyTo function from the associated GPU // buffer to perform the copy operation using the associated accelerator stream. bufferOnGPU.CopyTo(stream, pinnedCPUBuffer, 0); // // Perform other operations... // // Wait for the copy operation to finish stream.Synchronize(); } } } }
/// <summary> /// Constructs a new CPU texture 2D. /// </summary> /// <param name="accelerator">The target accelerator.</param> /// <param name="d3dDevice">The target DX device.</param> /// <param name="texture">The target DX texture.</param> /// <param name="bufferFlags">The used buffer flags.</param> /// <param name="viewFlags">The used view flags.</param> internal CPUDirectXTexture2DArray( CPUAccelerator accelerator, Device d3dDevice, Texture2D texture, DirectXBufferFlags bufferFlags, DirectXViewFlags viewFlags) : base(accelerator, d3dDevice, texture, bufferFlags, viewFlags) { var desc = texture.Description; var stagingDesc = new Texture2DDescription() { ArraySize = desc.ArraySize, MipLevels = 1, Format = desc.Format, OptionFlags = ResourceOptionFlags.None, SampleDescription = new global::SharpDX.DXGI.SampleDescription(1, 0), BindFlags = BindFlags.None, CpuAccessFlags = CpuAccessFlags.Read | CpuAccessFlags.Write, Usage = ResourceUsage.Staging, Width = desc.Width, Height = desc.Height }; stagingTexture = new Texture2D(d3dDevice, stagingDesc); }
private static int[] Process(Template tempate, float[] series, float[][] clusters, float error) { var clustersHeight = clusters.Length; var clustersWidth = clusters[0].Length; var clusters2d = new float[clustersHeight, clustersWidth]; for (int i = 0; i < clustersHeight; i++) { for (int j = 0; j < clustersWidth; j++) { clusters2d[i, j] = clusters[i][j]; } } using (var context = new Context()) { using (var accelerator = new CPUAccelerator(context)) { var paintKernel = accelerator.LoadAutoGroupedStreamKernel <Index, Template, ArrayView <float>, ArrayView2D <float>, ArrayView <int>, float>(PaintKernel); using (var seriesBuffer = accelerator.Allocate <float>(series.Count())) using (var clustersBuffer = accelerator.Allocate <float>(clustersHeight, clustersWidth)) using (var buffer = accelerator.Allocate <int>(series.Count())) { seriesBuffer.CopyFrom(series, 0, 0, series.Count()); clustersBuffer.CopyFrom(clusters2d, new Index2(0, 0), new Index2(0, 0), new Index2(clustersHeight, clustersWidth)); paintKernel(clustersHeight, tempate, seriesBuffer, clustersBuffer, buffer, error); accelerator.Synchronize(); var data = buffer.GetAsArray(); return(data); } } } }
/// <summary> /// Detects all available accelerators and prints device information about each /// of them on the command line. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // For each available accelerator... foreach (var acceleratorId in Accelerator.Accelerators) { // Create default accelerator for the given accelerator id. // Note that all accelerators have to be disposed before the global context is disposed using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"AcceleratorId: {acceleratorId.AcceleratorType}, {accelerator.Name}"); PrintAcceleratorInfo(accelerator); Console.WriteLine(); } } // Accelerators can also be created manually with custom settings. // The following code snippet creates a CPU accelerator with 4 threads // and highest thread priority. using (var accelerator = new CPUAccelerator(context, 4, ThreadPriority.Highest)) { PrintAcceleratorInfo(accelerator); } } }
static void Main(string[] args) { using (var context = new Context()) { // For each available accelerator... (without CPU) foreach (var acceleratorId in Accelerator.Accelerators.Where(id => id.AcceleratorType != AcceleratorType.CPU)) { using (var accelerator = Accelerator.Create(context, acceleratorId)) { Console.WriteLine($"Performing operations on {accelerator}"); Reduce(accelerator); AtomicReduce(accelerator); } } // Create custom CPU context with a warp size > 1 using (var accelerator = new CPUAccelerator(context, 4, 4)) { Console.WriteLine($"Performing operations on {accelerator}"); Reduce(accelerator); AtomicReduce(accelerator); } } }
/// <summary> /// Constructs a new memory-buffer cache. /// </summary> /// <param name="accelerator"> /// The associated accelerator to allocate memory on. /// </param> /// <param name="initialLength">The initial length of the buffer.</param> public CPUMemoryBufferCache(CPUAccelerator accelerator, long initialLength) : base(accelerator) { if (initialLength > 0) { cache = accelerator.Allocate1D <byte>(initialLength); } }
/// <summary> /// Creates a new CPU accelerator based on the configuration provided via /// the environment variable <see cref="CPUKindEnvVariable"/>. /// </summary> /// <param name="context">The parent context to use.</param> /// <returns>The created (parallel) CPU accelerator instance.</returns> /// <remarks> /// If the environment variables does not exists or does not contain a valid kind /// (specified by the <see cref="CPUAcceleratorKind"/> enumeration), this /// function creates a simulator compatible with the kind /// <see cref="CPUAcceleratorKind.Default"/>. /// </remarks> private static CPUAccelerator CreateCPUAccelerator(Context context) { var cpuConfig = Environment.GetEnvironmentVariable(CPUKindEnvVariable); if (!Enum.TryParse(cpuConfig, out CPUAcceleratorKind kind)) { kind = CPUAcceleratorKind.Default; } return(CPUAccelerator.Create(context, kind, CPUAcceleratorMode.Parallel)); }
public static int[] Process(IList <Template> tempate, IList <float[][]> clusterCollection, float[] series, float error) { var templatesCount = clusterCollection.Count; var clustersCount = clusterCollection.Max(x => x.Length); var pointsInClusters = clusterCollection[0][0].Length; float[,,] clsts = new float[templatesCount, clustersCount, pointsInClusters]; for (int k = 0; k < templatesCount; k++) { for (int i = 0; i < clustersCount; i++) { if (i >= clusterCollection[k].Length) { break; } for (int j = 0; j < pointsInClusters; j++) { clsts[k, i, j] = clusterCollection[k][i][j]; } } } using (var context = new Context()) { using (var accelerator = new CPUAccelerator(context)) { var paintKernel = accelerator.LoadAutoGroupedStreamKernel <Index2, ArrayView <Template>, ArrayView <float>, ArrayView3D <float>, ArrayView <int>, float>(PaintKernel2d); using (var seriesBuffer = accelerator.Allocate <float>(series.Count())) using (var templatesBuffer = accelerator.Allocate <Template>(templatesCount)) using (var clustersBuffer = accelerator.Allocate <float>(templatesCount, clustersCount, pointsInClusters)) using (var buffer = accelerator.Allocate <int>(series.Count())) { seriesBuffer.CopyFrom(series, 0, 0, series.Count()); clustersBuffer.CopyFrom(clsts, new Index3(0, 0, 0), new Index3(0, 0, 0), new Index3(templatesCount, clustersCount, pointsInClusters)); paintKernel(new Index2(templatesCount, clustersCount), templatesBuffer, seriesBuffer, clustersBuffer, buffer, error); accelerator.Synchronize(); var data = buffer.GetAsArray(); return(data); } } } }
/// <summary cref="DisposeBase.Dispose(bool)"/> protected override void Dispose(bool disposing) { if (disposing) { CPUAccelerator.Dispose(); codeGenerationSemaphore.Dispose(); IRContext.Dispose(); ILFrontend.Dispose(); DefautltILBackend.Dispose(); DebugInformationManager.Dispose(); TypeContext.Dispose(); } base.Dispose(disposing); }
/// <summary> /// Constructs a new CPU buffer for DX interop. /// </summary> /// <param name="accelerator">The target accelerator.</param> /// <param name="d3dDevice">The target DX device.</param> /// <param name="buffer">The target DX buffer.</param> /// <param name="bufferFlags">The buffer flags.</param> /// <param name="viewFlags">The registration flags.</param> internal CPUDirectXBuffer( CPUAccelerator accelerator, Device d3dDevice, Buffer buffer, DirectXBufferFlags bufferFlags, DirectXViewFlags viewFlags) : base(accelerator, d3dDevice, buffer, bufferFlags, viewFlags) { cpuMemory = Accelerator.Allocate <T, Index1>(Length); var desc = new BufferDescription() { BindFlags = BindFlags.None, CpuAccessFlags = CpuAccessFlags.Read | CpuAccessFlags.Write, OptionFlags = ResourceOptionFlags.None, SizeInBytes = ElementSize * Length, StructureByteStride = ElementSize, Usage = ResourceUsage.Staging, }; stagingBuffer = new Buffer(D3DDevice, desc); }
/// <summary> /// Demonstrates the use of array views. Operations on array views are /// supported on all accelerators. /// </summary> static void Main() { // Create main context using (var context = new Context()) { // We perform all operations in CPU memory here using (var accelerator = new CPUAccelerator(context)) { using (var buffer = accelerator.Allocate <int>(1024)) { // Retrieve a view to the whole buffer. ArrayView <int> bufferView = buffer.View; // Note that accessing an array view which points to memory // that is not accessible in the current context triggers // an invalid access exception. // For instance, array views that point to CUDA memory are // inaccessible from the CPU by default (and vice-versa). // We can ignore this restriction in the current context since we // perform all operations in CPU memory. // Perform some unsafe operations on array views. UnsafeAccess(bufferView); // SubView access SubViewAccess(bufferView); // VariableView access VariableViewAccess(bufferView); // Perform some unsafe operations on variable views. UnsafeVariableViewAccess(bufferView); } } } }
/// <summary> /// Constructs a new CPU DX-interop accelerator. /// </summary> /// <param name="accelerator">The target CPU accelerator.</param> /// <param name="d3dDevice">The target DX device.</param> internal CPUDirectXAccelerator(CPUAccelerator accelerator, Device d3dDevice) : base(accelerator, d3dDevice) { }
/// <summary> /// Constructs a new ILGPU main context /// </summary> /// <param name="builder">The parent builder instance.</param> /// <param name="devices">The array of accelerator descriptions.</param> internal Context( Builder builder, ImmutableArray <Device> devices) { InstanceId = InstanceId.CreateNew(); TargetPlatform = Backend.RuntimePlatform; RuntimeSystem = new RuntimeSystem(); Properties = builder.InstantiateProperties(); // Initialize verifier Verifier = builder.EnableVerifier ? Verifier.Instance : Verifier.Empty; // Initialize main contexts TypeContext = new IRTypeContext(this); IRContext = new IRContext(this); // Initialize intrinsic manager IntrinsicManager = builder.IntrinsicManager; // Create frontend DebugInformationManager frontendDebugInformationManager = Properties.DebugSymbolsMode > DebugSymbolsMode.Disabled ? DebugInformationManager : null; ILFrontend = builder.EnableParallelCodeGenerationInFrontend ? new ILFrontend(this, frontendDebugInformationManager) : new ILFrontend(this, frontendDebugInformationManager, 1); // Create default IL backend DefautltILBackend = new DefaultILBackend(this); // Initialize default transformer ContextTransformer = Optimizer.CreateTransformer( Properties.OptimizationLevel, TransformerConfiguration.Transformed, Properties.InliningMode); // Initialize the default CPU device CPUAccelerator = new CPUAccelerator( this, CPUDevice.Implicit, CPUAcceleratorMode.Parallel, ThreadPriority.Lowest); // Initialize all devices Devices = devices; if (devices.IsDefaultOrEmpty) { // Add a default CPU device Devices = ImmutableArray.Create <Device>(CPUDevice.Default); } // Create a mapping deviceMapping = new Dictionary <AcceleratorType, List <Device> >(Devices.Length); foreach (var device in Devices) { if (!deviceMapping.TryGetValue(device.AcceleratorType, out var devs)) { devs = new List <Device>(8); deviceMapping.Add(device.AcceleratorType, devs); } devs.Add(device); } }
/// <summary> /// Constructs a new memory-buffer cache. /// </summary> /// <param name="accelerator"> /// The associated accelerator to allocate memory on. /// </param> public CPUMemoryBufferCache(CPUAccelerator accelerator) : this(accelerator, 0) { }
public DirectXInteropAccelerator CreateCPUExtension(CPUAccelerator accelerator) { return(new CPUDirectXAccelerator(accelerator, D3DDevice)); }
public RadixSortPairsProviderImplementation CreateCPUExtension(CPUAccelerator accelerator) { return(new CPU.CPURadixSortPairsProviderImplementation(accelerator)); }
//Note: This program runs a *lot* faster in Release mode than Debug mode (because bounds checking is disabled in ILGPU). static void Main(string[] args) { //needed for the other method (Parallel.For) //int maxLength = 0; //long iterations = 0; long originalMin = 0; //As a note, it takes around 2 hours on CPU (Core i7 4790K) to search ~120B numbers long min = 0; long max = 113373373373; //Length of an array of longs. //Since longs are int64, multiply by ~8 for actual memory use in bytes const long allocatedMemory = 200000000; //Cache to store results that work in. As this grows, so does the time to search it for good results. const int resultCacheSize = 100; //Stores the minimum value for the specific depth long minForMax = long.MaxValue; //The chain length to search for: //E.G. 100 (OneHundred) -> 10 (Ten) -> 3 (Three) -> 5 (Five) -> 4 [end] is of length 4. const int chainLength = 8; //Include punctuation in the count or not. //E.G., With punctuation 137 -> "One Hundred and Thirty-Seven" (28 characters) //& Without punctuation, 137 -> "OneHundredandThirtySeven" (24 characters) const bool includePunctuation = false; //Stop when one number is found with specified chain length (obviously invalidates the percentage count) const bool stopAtOneFound = false; //min is increased as the program runs, so we need a copy of its original value for calculating the % done. originalMin = min; using (var context = new Context()) { Accelerator acc; try { acc = new CudaAccelerator(context); } catch (Exception) { //no cuda acc = new CPUAccelerator(context); } var a = acc; Console.WriteLine("Performing ops on " + a.Name + ". " + a.NumMultiprocessors.ToString() + " processors."); //Set up two kernels to get the data var searchKernel = a.LoadAutoGroupedStreamKernel <Index, ArrayView <UInt64>, long, long, bool>(SearchForChain); var resultKernel = a.LoadAutoGroupedStreamKernel <Index, ArrayView <UInt64>, ArrayView <UInt64> >(FindNonZero); using (var buffa = a.Allocate <UInt64>((int)allocatedMemory)) { using (var buffb = a.Allocate <UInt64>(resultCacheSize)) { //Loop while we haven't gone over the maximum value in search range while (min < max) { //Search for numbers first (Kernel) searchKernel((int)allocatedMemory, buffa.View, min, chainLength, !includePunctuation); a.Synchronize(); //Read back array to find nonzero entries (Kernel) resultKernel(resultCacheSize, buffb, buffa); a.Synchronize(); var arr = buffb.GetAsArray(); bool found = false; //Read back the results array for nonzero entries (Normal .net) for (int i = 0; i < buffb.Length; i++) { if (arr[i] != 0) { found = true; Console.WriteLine(arr[i]); if (arr[i] < (ulong)minForMax) { minForMax = (long)arr[i]; } } } //break if we have found a number and had to stop at one found if (found && stopAtOneFound) { break; } min += allocatedMemory; long total = max - originalMin; long diff = min - originalMin; //For displaying the percentage complete Console.WriteLine((((decimal)diff / (decimal)total) * 100).ToString() + "% complete"); } } } } //The code commented below is for doing this via a Parallel.for. /* * Parallel.For(min,max,(i)=> { * int chainLength = searchGPU(i,true); * if (chainLength > maxLength) * { * maxLength = chainLength; * Console.WriteLine(NumberToString(i) + " <=> (" + i.ToString() + ") gave chain length: " + chainLength.ToString()); * minForMax = long.MaxValue; * } * * if (chainLength == maxLength && Math.Abs(i) < Math.Abs(minForMax)) * { * minForMax = i; * Console.WriteLine(i.ToString() + " was a better candidate for " + chainLength.ToString()); * } * * iterations++; * if (iterations % ((max - min) / 10000) == 0) * { * * decimal percent = (decimal)iterations / (decimal)((max - min)); * * Console.WriteLine((percent * 100)+" Percent done."); * } * });*/ Console.WriteLine(NumberToString(minForMax) + " <=> (" + minForMax.ToString() + ") gave chain length: " + chainLength.ToString()); }
public ScanProviderImplementation CreateCPUExtension(CPUAccelerator accelerator) { return(new CPU.CPUScanProviderImplementation(accelerator)); }