public Mandelbrot( Platform platform, int width, int height ) { openCLPlatform = platform; openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL); openCLContext = openCLPlatform.CreateDefaultContext(); openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE); mandelBrotProgram = openCLContext.CreateProgramWithSource(File.ReadAllText("Mandelbrot.cl")); try { mandelBrotProgram.Build(); } catch (OpenCLException) { string buildLog = mandelBrotProgram.GetBuildLog(openCLDevices[0]); MessageBox.Show(buildLog,"Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)"); Application.Exit(); } mandelbrotKernel = mandelBrotProgram.CreateKernel("Mandelbrot"); Left = -2.0f; Top = 2.0f; Right = 2.0f; Bottom = -2.0f; BitmapWidth = width; BitmapHeight = height; mandelbrotMemBuffer = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.WRITE_ONLY), width*height*4, IntPtr.Zero); }
public OpenCLMiner(OpenCLManager OCLMan, Vendor vendor, int deviceIndex, bool shouldUseVectors) { this.vendor = vendor; this.deviceIndex = deviceIndex; this.OCLMan = OCLMan; Device device = OCLMan.Context.Devices[deviceIndex]; Name = device.Vendor + ":" + device.Name + " (" + deviceIndex + ")"; OCLMan.Defines += "\r\n#define OUTPUT_SIZE 256"; OCLMan.Defines += "\r\n#define OUTPUT_MASK 255"; if (device.Extensions.Contains("cl_amd_media_ops")) { OCLMan.Defines += "\r\n#define BITALIGN 1"; // note: defining BFI_INT resulted in invalid calculations on my Barts (8760) //if (Array.IndexOf(AMD_Devices, device.Name) != -1) // OCLMan.Defines += "\r\n#define BFI_INT 1"; } if (shouldUseVectors) OCLMan.Defines += "\r\n#define VECTORS 1"; _shouldUseVectors = shouldUseVectors; searchKernel = OCLMan.CompileFile("General.cl").CreateKernel("search"); unsafe { IntPtr size = (IntPtr)8; long[] values = new long[1]; long[] sizeTest = new long[1]; fixed (long* valuep = &values[0]) { IntPtr sizeOuts; OpenCL.GetKernelWorkGroupInfo(searchKernel.KernelID, device.DeviceID, KernelWorkGroupInfo.WORK_GROUP_SIZE, size, (void*)valuep, out sizeOuts); workGroupSize = (uint)values[0]; } } unit = workGroupSize * 256u; globalThreads = (uint)(unit * 10); Initialized = true; }
private void InitializeOpenCL() { if (OpenCL.NumberOfPlatforms == 0) { MessageBox.Show("OpenCL не поддерживается вашей системой!"); Application.Exit(); } manager = new OpenCLManager(); manager.AttemptUseBinaries = true; manager.AttemptUseSource = true; manager.RequireImageSupport = false; manager.BuildOptions = ""; manager.CreateDefaultContext(0, DeviceType.ALL); // Компиляция OpenCL кода program = manager.CompileSource(Properties.Resources.DVR); kernel = program.CreateKernel("DVR"); }
public static void EnqueueNDRangeKernel(this CommandQueue queue, Kernel kernel, long localSizeX, long localSizeY, long globalSizeX, long globalSizeY) { queue.EnqueueNDRangeKernel(kernel, 2, null, new long[] { localSizeX * globalSizeX, localSizeY * globalSizeY }, new long[] { localSizeX, localSizeY }); }
public static void EnqueueNDRangeKernel(this CommandQueue queue, Kernel kernel, long localSize, long globalSize) { if (localSize == 0) queue.EnqueueNDRangeKernel(kernel, 1, null, new long[] { globalSize }, null); else queue.EnqueueNDRangeKernel(kernel, 1, null, new long[] { localSize * globalSize }, new long[] { localSize }); }
unsafe public FLACCLTask(Program _openCLProgram, int channelsCount, int channels, uint bits_per_sample, int max_frame_size, FLACCLWriter writer, int groupSize, bool gpuOnly, bool gpuRice) { this.UseGPUOnly = gpuOnly; this.UseGPURice = gpuOnly && gpuRice; this.UseMappedMemory = writer._settings.MappedMemory || writer._settings.DeviceType == OpenCLDeviceType.CPU; this.groupSize = groupSize; this.channels = channels; this.channelsCount = channelsCount; this.writer = writer; openCLProgram = _openCLProgram; #if DEBUG var prop = CommandQueueProperties.PROFILING_ENABLE; #else var prop = CommandQueueProperties.NONE; #endif openCLCQ = openCLProgram.Context.CreateCommandQueue(openCLProgram.Context.Devices[0], prop); int MAX_ORDER = this.writer.eparams.max_prediction_order; int MAX_FRAMES = this.writer.framesPerTask; int MAX_CHANNELSIZE = MAX_FRAMES * ((writer.eparams.block_size + 3) & ~3); residualTasksLen = sizeof(FLACCLSubframeTask) * 32 * channelsCount * MAX_FRAMES; bestResidualTasksLen = sizeof(FLACCLSubframeTask) * channels * MAX_FRAMES; int samplesBufferLen = writer.PCM.BlockAlign * MAX_CHANNELSIZE * channelsCount; int residualBufferLen = sizeof(int) * MAX_CHANNELSIZE * channels; // need to adjust residualOffset? int partitionsLen = sizeof(int) * ((writer.PCM.BitsPerSample > 16 ? 31 : 15) * 2 << 8) * channels * MAX_FRAMES; int riceParamsLen = sizeof(int) * (4 << 8) * channels * MAX_FRAMES; int autocorLen = sizeof(float) * (MAX_ORDER + 1) * lpc.MAX_LPC_WINDOWS * channelsCount * MAX_FRAMES; int lpcDataLen = autocorLen * 32; int resOutLen = sizeof(int) * channelsCount * (lpc.MAX_LPC_WINDOWS * lpc.MAX_LPC_ORDER + 8) * MAX_FRAMES; int wndLen = sizeof(float) * MAX_CHANNELSIZE /** 2*/ * lpc.MAX_LPC_WINDOWS; int selectedLen = sizeof(int) * 32 * channelsCount * MAX_FRAMES; int riceLen = sizeof(int) * channels * MAX_CHANNELSIZE; if (!this.UseMappedMemory) { clSamplesBytes = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, samplesBufferLen / 2); clResidual = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, residualBufferLen); clBestRiceParams = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, riceParamsLen / 4); clResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, residualTasksLen); clBestResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, bestResidualTasksLen); clWindowFunctions = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, wndLen); clSelectedTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, selectedLen); clRiceOutput = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, riceLen); clSamplesBytesPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, samplesBufferLen / 2); clResidualPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualBufferLen); clBestRiceParamsPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceParamsLen / 4); clResidualTasksPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualTasksLen); clBestResidualTasksPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, bestResidualTasksLen); clWindowFunctionsPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, wndLen); clSelectedTasksPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, selectedLen); clRiceOutputPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceLen); clSamplesBytesPtr = openCLCQ.EnqueueMapBuffer(clSamplesBytesPinned, true, MapFlags.READ_WRITE, 0, samplesBufferLen / 2); clResidualPtr = openCLCQ.EnqueueMapBuffer(clResidualPinned, true, MapFlags.READ_WRITE, 0, residualBufferLen); clBestRiceParamsPtr = openCLCQ.EnqueueMapBuffer(clBestRiceParamsPinned, true, MapFlags.READ_WRITE, 0, riceParamsLen / 4); clResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clResidualTasksPinned, true, MapFlags.READ_WRITE, 0, residualTasksLen); clBestResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clBestResidualTasksPinned, true, MapFlags.READ_WRITE, 0, bestResidualTasksLen); clWindowFunctionsPtr = openCLCQ.EnqueueMapBuffer(clWindowFunctionsPinned, true, MapFlags.READ_WRITE, 0, wndLen); clSelectedTasksPtr = openCLCQ.EnqueueMapBuffer(clSelectedTasksPinned, true, MapFlags.READ_WRITE, 0, selectedLen); clRiceOutputPtr = openCLCQ.EnqueueMapBuffer(clRiceOutputPinned, true, MapFlags.READ_WRITE, 0, riceLen); } else { clSamplesBytes = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, (uint)samplesBufferLen / 2); clResidual = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualBufferLen); clBestRiceParams = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceParamsLen / 4); clResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualTasksLen); clBestResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, bestResidualTasksLen); clWindowFunctions = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, wndLen); clSelectedTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, selectedLen); clRiceOutput = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceLen); clSamplesBytesPtr = openCLCQ.EnqueueMapBuffer(clSamplesBytes, true, MapFlags.READ_WRITE, 0, samplesBufferLen / 2); clResidualPtr = openCLCQ.EnqueueMapBuffer(clResidual, true, MapFlags.READ_WRITE, 0, residualBufferLen); clBestRiceParamsPtr = openCLCQ.EnqueueMapBuffer(clBestRiceParams, true, MapFlags.READ_WRITE, 0, riceParamsLen / 4); clResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clResidualTasks, true, MapFlags.READ_WRITE, 0, residualTasksLen); clBestResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clBestResidualTasks, true, MapFlags.READ_WRITE, 0, bestResidualTasksLen); clWindowFunctionsPtr = openCLCQ.EnqueueMapBuffer(clWindowFunctions, true, MapFlags.READ_WRITE, 0, wndLen); clSelectedTasksPtr = openCLCQ.EnqueueMapBuffer(clSelectedTasks, true, MapFlags.READ_WRITE, 0, selectedLen); clRiceOutputPtr = openCLCQ.EnqueueMapBuffer(clRiceOutput, true, MapFlags.READ_WRITE, 0, riceLen); //clSamplesBytesPtr = clSamplesBytes.HostPtr; //clResidualPtr = clResidual.HostPtr; //clBestRiceParamsPtr = clBestRiceParams.HostPtr; //clResidualTasksPtr = clResidualTasks.HostPtr; //clBestResidualTasksPtr = clBestResidualTasks.HostPtr; //clWindowFunctionsPtr = clWindowFunctions.HostPtr; } clSamples = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, samplesBufferLen); clLPCData = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, lpcDataLen); clAutocorOutput = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, autocorLen); clSelectedTasksSecondEstimate = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, selectedLen); clSelectedTasksBestMethod = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, selectedLen); if (UseGPUOnly) { clPartitions = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, partitionsLen); clRiceParams = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, riceParamsLen); } //openCLCQ.EnqueueMapBuffer(clSamplesBytes, true, MapFlags.WRITE, 0, samplesBufferLen / 2); clComputeAutocor = openCLProgram.CreateKernel("clComputeAutocor"); clStereoDecorr = openCLProgram.CreateKernel("clStereoDecorr"); //cudaChannelDecorr = openCLProgram.CreateKernel("clChannelDecorr"); clChannelDecorr2 = openCLProgram.CreateKernel("clChannelDecorr2"); clChannelDecorrX = openCLProgram.CreateKernel("clChannelDecorrX"); clFindWastedBits = openCLProgram.CreateKernel("clFindWastedBits"); clComputeLPC = openCLProgram.CreateKernel("clComputeLPC"); clQuantizeLPC = openCLProgram.CreateKernel("clQuantizeLPC"); //cudaComputeLPCLattice = openCLProgram.CreateKernel("clComputeLPCLattice"); clSelectStereoTasks = openCLProgram.CreateKernel("clSelectStereoTasks"); clEstimateResidual = openCLProgram.CreateKernel("clEstimateResidual"); clChooseBestMethod = openCLProgram.CreateKernel("clChooseBestMethod"); if (UseGPUOnly) { clEncodeResidual = openCLProgram.CreateKernel("clEncodeResidual"); if (openCLCQ.Device.DeviceType != DeviceType.CPU) { clCalcPartition = openCLProgram.CreateKernel("clCalcPartition"); clCalcPartition16 = openCLProgram.CreateKernel("clCalcPartition16"); } clSumPartition = openCLProgram.CreateKernel("clSumPartition"); clFindRiceParameter = openCLProgram.CreateKernel("clFindRiceParameter"); clFindPartitionOrder = openCLProgram.CreateKernel("clFindPartitionOrder"); if (UseGPURice) { clCalcOutputOffsets = openCLProgram.CreateKernel("clCalcOutputOffsets"); clRiceEncoding = openCLProgram.CreateKernel("clRiceEncoding"); } } samplesBuffer = new int[MAX_CHANNELSIZE * channelsCount]; outputBuffer = new byte[max_frame_size * MAX_FRAMES + 1]; frame = new FlacFrame(channelsCount); frame.writer = new BitWriter(outputBuffer, 0, outputBuffer.Length); if (writer._settings.DoVerify) { verify = new FlakeReader(new AudioPCMConfig((int)bits_per_sample, channels, 44100)); verify.DoCRC = false; } }
public void EnqueueNDRangeKernel(Kernel kernel, int workDim, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize, int numEventsInWaitList, Event[] event_wait_list) { ErrorCode result; IntPtr* pGlobalWorkOffset = stackalloc IntPtr[workDim]; IntPtr* pGlobalWorkSize = stackalloc IntPtr[workDim]; IntPtr* pLocalWorkSize = stackalloc IntPtr[workDim]; IntPtr* repackedEvents = stackalloc IntPtr[numEventsInWaitList]; if (globalWorkOffset == null) pGlobalWorkOffset = null; if (globalWorkSize == null) pGlobalWorkSize = null; if (localWorkSize == null) pLocalWorkSize = null; if (event_wait_list == null) repackedEvents = null; InteropTools.AToIntPtr(workDim, globalWorkOffset, pGlobalWorkOffset); InteropTools.AToIntPtr(workDim, globalWorkSize, pGlobalWorkSize); InteropTools.AToIntPtr(workDim, localWorkSize, pLocalWorkSize); InteropTools.ConvertEventsToEventIDs(numEventsInWaitList, event_wait_list, repackedEvents); result = OpenCL.EnqueueNDRangeKernel(CommandQueueID, kernel.KernelID, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, repackedEvents, null); if (result != ErrorCode.SUCCESS) throw new OpenCLException("EnqueueNDRangeKernel failed with error code " + result, result); }
public void ReleaseDeviceResources() { oclFullyInitialized = false; if (OCLSampler != null) { OCLSampler.Dispose(); OCLSampler = null; } if (OCLInputImage != null) { OCLInputImage.Dispose(); OCLInputImage = null; } if (OCLOutputImage != null) { OCLOutputImage.Dispose(); OCLOutputImage = null; } if (FilterKernel != null) { FilterKernel.Dispose(); FilterKernel = null; } if (oclProgram != null) { oclProgram.Dispose(); oclProgram = null; } if (oclCQ != null) { oclCQ.Dispose(); oclCQ = null; } if (oclContext != null) { oclContext.Dispose(); oclContext = null; } }
/// <summary> /// Test all versions of: /// /// EnqueueNDRangeKernel /// /// The tests just issue a dummy kernel a bunch of times with the various overloads /// </summary> /// <param name="c"></param> /// <param name="cq"></param> private void TestEnqueueNDRangeKernel(Context c, CommandQueue cq, Kernel k ) { Output("Testing EnqueueNDRangeKernel"); Event event0 = null; Event event1 = null; try { { IntPtr[] globalWorkSize = new IntPtr[] { (IntPtr)10 }; IntPtr[] localWorkSize = new IntPtr[] { (IntPtr)1 }; cq.EnqueueNDRangeKernel(k, (uint)1, null, globalWorkSize, localWorkSize); cq.EnqueueNDRangeKernel(k, (uint)1, null, globalWorkSize, localWorkSize, 0, null, out event0); Event[] waitList = new Event[] { event0 }; cq.EnqueueNDRangeKernel(k, (uint)1, null, globalWorkSize, localWorkSize, 1, waitList, out event1); cq.Finish(); event0.Dispose(); event1.Dispose(); } { int[] globalWorkSize = new int[] { (int)10 }; int[] localWorkSize = new int[] { (int)1 }; cq.EnqueueNDRangeKernel(k, 1, null, globalWorkSize, localWorkSize); cq.EnqueueNDRangeKernel(k, 1, null, globalWorkSize, localWorkSize, 0, null, out event0); Event[] waitList = new Event[] { event0 }; cq.EnqueueNDRangeKernel(k, 1, null, globalWorkSize, localWorkSize, 1, waitList, out event1); cq.Finish(); event0.Dispose(); event1.Dispose(); } { long[] globalWorkSize = new long[] { (long)10 }; long[] localWorkSize = new long[] { (long)1 }; cq.EnqueueNDRangeKernel(k, 1, null, globalWorkSize, localWorkSize); cq.EnqueueNDRangeKernel(k, 1, null, globalWorkSize, localWorkSize, 0, null, out event0); Event[] waitList = new Event[] { event0 }; cq.EnqueueNDRangeKernel(k, 1, null, globalWorkSize, localWorkSize, 1, waitList, out event1); cq.Finish(); event0.Dispose(); event1.Dispose(); } } catch (Exception e) { Error("Exception during testing: " + e.ToString()); } finally { if (event0 != null) event0.Dispose(); if (event1 != null) event1.Dispose(); } }
private void InitializeOpenCL() { if (OpenCL.NumberOfPlatforms == 0){ MessageBox.Show("OpenCL is not supported on your system!"); Application.Exit(); } manager = new OpenCLManager(); manager.AttemptUseBinaries = true; manager.AttemptUseSource = true; manager.RequireImageSupport = false; manager.BuildOptions = ""; manager.CreateDefaultContext(0, DeviceType.ALL); // Compiling OpenCL code program = manager.CompileSource(Properties.Resources.DVR); kernel = program.CreateKernel("DVR"); }
/// <summary> /// Create a OpenCLManager, configure it, and then create a context using all devices in platform 0, /// Once the context is up and running we compile our source file "OpenCLFunctions.cl" /// The Helper automatically compiles and creates kernels. /// We can then extract named kernels using the GetKernel method. /// /// For more advanced scenarios, one might use the functions in the Platform class /// to query devices, create contexts etc. Platforms can be enumerated using /// for( int i=0; i<OpenCL.NumberofPlatforms; i++ ) /// Platform p = OpenCL.GetPlatform(i); /// </summary> private void InitializeOpenCL() { if (OpenCL.NumberOfPlatforms == 0) { MessageBox.Show("OpenCL not available"); Application.Exit(); } OCLMan = new OpenCLManager(); // Attempt to save binaries after compilation, as well as load precompiled binaries // to avoid compilation. Usually you'll want this to be true. OCLMan.AttemptUseBinaries = true; // Attempt to compile sources. This should probably be true for almost all projects. // Setting it to false means that when you attempt to compile "mysource.cl", it will // only scan the precompiled binary directory for a binary corresponding to a source // with that name. There's a further restriction that the compiled binary also has to // use the same Defines and BuildOptions OCLMan.AttemptUseSource = true; // Binary and source paths // This is where we store our sources and where compiled binaries are placed OCLMan.BinaryPath = @"OpenCL\bin"; OCLMan.SourcePath = @"OpenCL\src"; // If true, RequireImageSupport will filter out any devices without image support // In this project we don't need image support though, so we set it to false OCLMan.RequireImageSupport = false; // The Defines string gets prepended to any and all sources that are compiled // and serve as a convenient way to pass configuration information to the compilation process OCLMan.Defines = "#define MyCompany_MyProject_Define 1"; // The BuildOptions string is passed directly to clBuild and can be used to do debug builds etc OCLMan.BuildOptions = ""; OCLMan.CreateDefaultContext(0, DeviceType.ALL); OCLProgram = OCLMan.CompileFile("OpenCLFunctions.cl"); for (int i = 0; i < OCLMan.Context.Devices.Length; i++) comboBoxDeviceSelector.Items.Add(OCLMan.Context.Devices[i].Vendor+":"+OCLMan.Context.Devices[i].Name); comboBoxDeviceSelector.SelectedIndex = 0; CrossFadeKernel = OCLProgram.CreateKernel("CrossFade"); }
/// <summary> /// Execute a simple kernel /// </summary> /// <param name="kernel"></param> public void EnqueueTask(Kernel kernel) { ErrorCode result; result = (ErrorCode)OpenCL.EnqueueTask(CommandQueueID, kernel.KernelID, (uint)0, null, null); if (result != ErrorCode.SUCCESS) throw new OpenCLException("EnqueueTask failed with error code " + result, result); }
/// <summary> /// Execute a simple kernel /// </summary> /// <param name="kernel"></param> /// <param name="numEventsInWaitList"></param> /// <param name="event_wait_list"></param> public void EnqueueTask(Kernel kernel, int numEventsInWaitList, Event[] event_wait_list) { ErrorCode result; result = (ErrorCode)OpenCL.EnqueueTask(CommandQueueID, kernel.KernelID, (uint)numEventsInWaitList, InteropTools.ConvertEventsToEventIDs(event_wait_list), null); if (result != ErrorCode.SUCCESS) throw new OpenCLException("EnqueueTask failed with error code " + result, result); }
public void EnqueueNDRangeKernel(Kernel kernel, int workDim, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) { ErrorCode result; IntPtr* pGlobalWorkOffset = stackalloc IntPtr[workDim]; IntPtr* pGlobalWorkSize = stackalloc IntPtr[workDim]; IntPtr* pLocalWorkSize = stackalloc IntPtr[workDim]; if (globalWorkOffset == null) pGlobalWorkOffset = null; if (globalWorkSize == null) pGlobalWorkSize = null; if (localWorkSize == null) pLocalWorkSize = null; InteropTools.AToIntPtr(workDim, globalWorkOffset, pGlobalWorkOffset); InteropTools.AToIntPtr(workDim, globalWorkSize, pGlobalWorkSize); InteropTools.AToIntPtr(workDim, localWorkSize, pLocalWorkSize); result = OpenCL.EnqueueNDRangeKernel(CommandQueueID, kernel.KernelID, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 0, null, null); if (result != ErrorCode.SUCCESS) throw new OpenCLException("EnqueueNDRangeKernel failed with error code " + result, result); }
public void EnqueueNDRangeKernel(Kernel kernel, uint workDim, IntPtr[] globalWorkOffset, IntPtr[] globalWorkSize, IntPtr[] localWorkSize) { ErrorCode result; result = OpenCL.EnqueueNDRangeKernel(CommandQueueID, kernel.KernelID, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, null, null); if (result != ErrorCode.SUCCESS) throw new OpenCLException("EnqueueNDRangeKernel failed with error code " + result, result); }
private unsafe void TestVecKernel(Context c, CommandQueue cq, Kernel k) { Float2 f2 = new Float2(0.0f,1.0f); float[] memory = new float[2]; fixed (float* pMemory = memory) { Mem mem = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE | (ulong)MemFlags.USE_HOST_PTR), 4 * 2, pMemory); k.SetArg(0, f2); k.SetArg(1, mem); cq.EnqueueTask(k); cq.EnqueueBarrier(); IntPtr pMap = cq.EnqueueMapBuffer(mem, true, MapFlags.READ, 0, 2 * 4); cq.EnqueueUnmapMemObject(mem, pMap); } }
private void TestCommandQueueAsync(Context c, CommandQueue cq, Kernel kernel ) { List<Event> events = new List<Event>(); Event clEvent; Output("Testing asynchronous task issuing (clEnqueueTask) and waiting for events"); // Issue a bunch of slow operations kernel.SetArg(0, 5000000); for (int i = 0; i < 10; i++) { cq.EnqueueTask(kernel, 0, null, out clEvent); events.Add(clEvent); } // Issue a bunch of fast operations kernel.SetArg(0, 500); for (int i = 0; i < 10; i++) { cq.EnqueueTask(kernel, 0, null, out clEvent); events.Add(clEvent); } Event[] eventList = events.ToArray(); cq.EnqueueWaitForEvents(eventList.Length, eventList); while (events.Count > 0) { if ((int)events[0].ExecutionStatus < 0) { Output(cq.Device.Name + ": TestCommandQueueAsync failed with error code " + (ErrorCode)events[0].ExecutionStatus); } events[0].Dispose(); events.RemoveAt(0); } }
public Core(int Nxp,int Nyp, int Nzp, int Ntm, double Bbeta, double Flux) { Nx = Nxp; Ny = Nyp; Nz = Nzp; Nt = Ntm; betagauge = (floattype)Bbeta; flux = (floattype)Flux; N = Nx * Ny * Nz * Nt; Nspace = Nx * Ny * Nz; string strforcompiler = "-D Nt=" + Nt.ToString() + " -D Nxyz=" + (Nx * Ny * Nz).ToString() + " -D Nxy=" + (Nx*Ny).ToString() + " -D Nx="+(Nx).ToString()+" -D Ny="+(Ny).ToString()+" -D Nz="+(Nz).ToString(); strforcompiler += typeof(floattype) == typeof(double) ? " -D floattype=double -D floattype2=double2 -D floattype4=double4" : " -D floattype=float -D floattype2=float2 -D floattype4=float4"; strforcompiler += " -D phi=" + flux.ToString().Replace(',', '.') + " -D KAPPA=" + kappa.ToString().Replace(',', '.'); string fp64support = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; Plocalsize = AdjustLocalSize(Nspace); Slocalsize = AdjustLocalSize(N / 2); XhermYlocalsize = AdjustLocalSize(4 * N); // Plocalsize = 16; Slocalsize = 16; PNumGroups = Nx * Ny * Nz / Plocalsize; SNumGroups = N/2 / Slocalsize; XhermYNumGroups = 4*4*N / XhermYlocalsize; BufferLength = N * 4 * 9 * 2 * sizeof(floattype); SeedBufLen = N * sizeof(Int32)/2 * 4; AllocBuffers(); openCLPlatform = OpenCL.GetPlatform(0); openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL); openCLContext = openCLPlatform.CreateDefaultContext(); openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE); MyKernelProgram = openCLContext.CreateProgramWithSource( (typeof(floattype)==typeof(double)?fp64support:"") + File.ReadAllText("MyKernel.cl")+File.ReadAllText("dirak_mul.cl")); try { MyKernelProgram.Build(openCLDevices, strforcompiler, null, IntPtr.Zero); } catch (OpenCLException) { string buildLog = MyKernelProgram.GetBuildLog(openCLDevices[0]); MessageBox.Show(buildLog, "Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)"); // Application.Exit(); } MyKernelKernel = MyKernelProgram.CreateKernel("MyKernel"); PReductionKernel = MyKernelProgram.CreateKernel("PLoop"); SReductionKernel = MyKernelProgram.CreateKernel("CalcS"); DiralMulKernel = MyKernelProgram.CreateKernel("dirakMatrMul"); FillWithKernel = MyKernelProgram.CreateKernel("FillWith"); FillLinkWithKernel = MyKernelProgram.CreateKernel("FillLinkWith"); FillWithRandomKernel = MyKernelProgram.CreateKernel("FillWithRandom"); AXPYKernel = MyKernelProgram.CreateKernel("AXPY"); XhermYKernel = MyKernelProgram.CreateKernel("XhermY"); BackupLinkKernel = MyKernelProgram.CreateKernel("BackupLink"); RestoreLinkKernel = MyKernelProgram.CreateKernel("RestoreLink"); SeedMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedBufLen, IntPtr.Zero); LinkMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), BufferLength, IntPtr.Zero); PGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * PNumGroups, IntPtr.Zero); PResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); SGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * SNumGroups, IntPtr.Zero); SResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); XhermYGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2*XhermYNumGroups, IntPtr.Zero); XhermYresMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2, IntPtr.Zero); XhermYrespointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize * 2); SeedVectorMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedVectorBuf.Length * sizeof(int), IntPtr.Zero); StorageMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), linksize, IntPtr.Zero); dSmem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); dSpointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize); MyKernelKernel.SetArg(0, (byte)EvenOdd); MyKernelKernel.SetArg(1, (floattype)betagauge); MyKernelKernel.SetArg(2, (floattype)flux); MyKernelKernel.SetArg(3, SeedMem); MyKernelKernel.SetArg(4, LinkMem); PReductionKernel.SetArg(0, LinkMem); PReductionKernel.SetArg(1, PGroupMem); PReductionKernel.SetArg(2, PResMem); IntPtr ptr = new IntPtr(Plocalsize * floatsize); PReductionKernel.SetArg(3, ptr, IntPtr.Zero); SReductionKernel.SetArg(0, LinkMem); SReductionKernel.SetArg(1, SGroupMem); SReductionKernel.SetArg(2, SResMem); IntPtr ptr1 = new IntPtr(Slocalsize * floatsize); SReductionKernel.SetArg(3, ptr1, IntPtr.Zero); XhermYKernel.SetArg(2, XhermYresMem); XhermYKernel.SetArg(3, XhermYGroupMem); XhermYKernel.SetArg(4, new IntPtr(XhermYlocalsize*floatsize*2),IntPtr.Zero); openCLCQ.EnqueueWriteBuffer(SeedMem, true, 0, SeedBufLen, ipseed); openCLCQ.EnqueueWriteBuffer(LinkMem, true, 0, BufferLength, ip); openCLCQ.EnqueueWriteBuffer(SeedVectorMem, true, 0, SeedVectorBuf.Length*sizeof(int), ipseedvector); rhat0 = new Vector(); //init BICGStab vectors phi = new Vector(); r0 = new Vector(); //rprev = new Vector(); pi = new Vector(); vi = new Vector(); t = new Vector(); s = new Vector(); // xprev = new Vector(); // vprev = new Vector(); // pprev = new Vector(); temp = new Vector(); ri = new Vector(); x = new Vector(); //for fermion update chi = new Vector(); CalculateS(); double s1 = S[0]; BackupLink(0, 0,1, 0, 1); CalculateS(); double s2 = S[0]; RestoreLink(0, 0, 1, 0, 1); CalculateS(); double s3 = S[0]; //MessageBox.Show(s1.ToString() + s2.ToString() + s3.ToString()); }
private unsafe void TestKernel(Context c, CommandQueue cq, Kernel argIOKernel) { Mem outArgBuffer = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR|(ulong)MemFlags.READ_WRITE), sizeof(IOKernelArgs), IntPtr.Zero); byte[] data = new byte[sizeof(IOKernelArgs)]; Output("Testing kernel - Argument return"); argIOKernel.SetArg(0, 1); argIOKernel.SetArg(1, 65L); argIOKernel.SetArg(2, 38.4f); argIOKernel.SetArg(3, outArgBuffer); Event ev; cq.EnqueueTask(argIOKernel,0,null,out ev); cq.Finish(); if ((int)ev.ExecutionStatus < 0) { Error(cq.Device.Name + ": argIOKernel failed with error code " + (ErrorCode)ev.ExecutionStatus); ev.Dispose(); } else { outArgBuffer.Read(cq, 0L, data, 0, sizeof(IOKernelArgs)); IntPtr outArgPtr = cq.EnqueueMapBuffer(outArgBuffer, true, MapFlags.READ, IntPtr.Zero, (IntPtr)sizeof(IOKernelArgs)); IOKernelArgs args = (IOKernelArgs)Marshal.PtrToStructure(outArgPtr, typeof(IOKernelArgs)); cq.EnqueueUnmapMemObject(outArgBuffer, outArgPtr); if (args.outInt != 1) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); if (args.outLong != 65) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); if (args.outSingle != 38.4f) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); } }
/// <summary> /// Create all kernels in the program and return them as an array /// </summary> /// <returns></returns> public Kernel[] CreateKernels() { uint numKernels; ErrorCode result; result = (ErrorCode)OpenCL.CreateKernelsInProgram( ProgramID, 0, null, out numKernels ); if( result!=ErrorCode.SUCCESS ) throw new OpenCLException( "CreateKernels failed with error code "+result, result); IntPtr[] kernelIDs = new IntPtr[numKernels]; result = (ErrorCode)OpenCL.CreateKernelsInProgram( ProgramID, numKernels, kernelIDs, out numKernels ); if( result!=ErrorCode.SUCCESS ) throw new OpenCLException( "CreateKernels failed with error code "+result, result); Kernel[] kernels = new Kernel[numKernels]; for( int i=0; i<kernels.Length; i++ ) kernels[i] = new Kernel( Context, this, kernelIDs[i] ); return kernels; }
public void BuildOCLSource(string source) { oclProgram = oclContext.CreateProgramWithSource(source); oclProgram.Build(); FilterKernel = oclProgram.CreateKernel("FilterImage"); }
public void EnqueueNDRangeKernel(Kernel kernel, uint workDim, IntPtr[] globalWorkOffset, IntPtr[] globalWorkSize, IntPtr[] localWorkSize, uint numEventsInWaitList, Event[] event_wait_list) { ErrorCode result; result = OpenCL.EnqueueNDRangeKernel(CommandQueueID, kernel.KernelID, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, InteropTools.ConvertEventsToEventIDs(event_wait_list), null); if (result != ErrorCode.SUCCESS) throw new OpenCLException("EnqueueNDRangeKernel failed with error code " + result, result); }