public Mandelbrot( Platform platform, int width, int height ) { openCLPlatform = platform; openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL); openCLContext = openCLPlatform.CreateDefaultContext(); openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE); mandelBrotProgram = openCLContext.CreateProgramWithSource(File.ReadAllText("Mandelbrot.cl")); try { mandelBrotProgram.Build(); } catch (OpenCLException) { string buildLog = mandelBrotProgram.GetBuildLog(openCLDevices[0]); MessageBox.Show(buildLog,"Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)"); Application.Exit(); } mandelbrotKernel = mandelBrotProgram.CreateKernel("Mandelbrot"); Left = -2.0f; Top = 2.0f; Right = 2.0f; Bottom = -2.0f; BitmapWidth = width; BitmapHeight = height; mandelbrotMemBuffer = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.WRITE_ONLY), width*height*4, IntPtr.Zero); }
public void Initialize(int index) { // Query Devices, create a Context+Command Queue and compile a program context = devices[index].Platform.CreateDefaultContext(); queue = context.CreateCommandQueue(devices[index], CommandQueueProperties.PROFILING_ENABLE); Console.WriteLine(devices[index].Vendor + " " + devices[index].Name); // Load and build source+create a kernel OpenCLNet.Program programForTimeSeries = context.CreateProgramWithSource(processTimeSeriesSource); try { programForTimeSeries.Build(); } catch { Console.WriteLine(programForTimeSeries.GetBuildLog(devices[index])); } kernelForTimeSeries = programForTimeSeries.CreateKernel("KernelFunction"); OpenCLNet.Program programForDataSets = context.CreateProgramWithSource(processDataSetSource); try { programForDataSets.Build(); } catch { Console.WriteLine(programForDataSets.GetBuildLog(devices[index])); } kernelForDataSets = programForDataSets.CreateKernel("KernelFunction"); selectedDevice = index; workGroupSize = (int)devices[index].MaxWorkGroupSize; }
protected virtual void Initialize(DeviceType deviceType, string source) { Devices = Platform.QueryDevices(deviceType); if (Devices.Length == 0) { throw new OpenCLException("No devices of type " + deviceType + " present"); } Context = Platform.CreateContext(null, Devices, null, IntPtr.Zero); CQs = new CommandQueue[Devices.Length]; for (int i = 0; i < CQs.Length; i++) { CQs[i] = Context.CreateCommandQueue(Devices[i], CommandQueueProperties.PROFILING_ENABLE); } CQ = CQs[0]; Program = Context.CreateProgramWithSource(source); Program.Build(); Kernels = Program.CreateKernelDictionary(); }
public void BuildOCLSource(string source) { oclProgram = oclContext.CreateProgramWithSource(source); oclProgram.Build(); FilterKernel = oclProgram.CreateKernel("FilterImage"); }
public unsafe void InitTasks() { bool doMidside = channels == 2 && eparams.do_midside; int channelCount = doMidside ? 2 * channels : channels; if (!inited) { if (OpenCL.NumberOfPlatforms < 1) throw new Exception("no opencl platforms found"); int groupSize = _settings.DeviceType == OpenCLDeviceType.CPU ? 1 : _settings.GroupSize; OCLMan = new OpenCLManager(); // Attempt to save binaries after compilation, as well as load precompiled binaries // to avoid compilation. Usually you'll want this to be true. OCLMan.AttemptUseBinaries = true; // true; // Attempt to compile sources. This should probably be true for almost all projects. // Setting it to false means that when you attempt to compile "mysource.cl", it will // only scan the precompiled binary directory for a binary corresponding to a source // with that name. There's a further restriction that the compiled binary also has to // use the same Defines and BuildOptions OCLMan.AttemptUseSource = true; // Binary and source paths // This is where we store our sources and where compiled binaries are placed //OCLMan.BinaryPath = @"OpenCL\bin"; //OCLMan.SourcePath = @"OpenCL\src"; // If true, RequireImageSupport will filter out any devices without image support // In this project we don't need image support though, so we set it to false OCLMan.RequireImageSupport = false; // The BuildOptions string is passed directly to clBuild and can be used to do debug builds etc OCLMan.BuildOptions = ""; OCLMan.SourcePath = System.IO.Path.GetDirectoryName(GetType().Assembly.Location); OCLMan.BinaryPath = System.IO.Path.Combine(System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "CUE Tools"), "OpenCL"); int platformId = 0; if (_settings.Platform != null) { platformId = -1; string platforms = ""; for (int i = 0; i < OpenCL.NumberOfPlatforms; i++) { var platform = OpenCL.GetPlatform(i); platforms += " \"" + platform.Name + "\""; if (platform.Name.Equals(_settings.Platform, StringComparison.InvariantCultureIgnoreCase)) { platformId = i; break; } } if (platformId < 0) throw new Exception("unknown platform \"" + _settings.Platform + "\". Platforms available:" + platforms); } OCLMan.CreateDefaultContext(platformId, (DeviceType)_settings.DeviceType); this.framesPerTask = (int)OCLMan.Context.Devices[0].MaxComputeUnits * Math.Max(1, _settings.TaskSize / channels); bool UseGPUOnly = _settings.GPUOnly && OCLMan.Context.Devices[0].Extensions.Contains("cl_khr_local_int32_extended_atomics"); bool UseGPURice = UseGPUOnly && _settings.DoRice; if (_blocksize == 0) { if (eparams.block_size == 0) eparams.block_size = select_blocksize(sample_rate, eparams.block_time_ms); _blocksize = eparams.block_size; } else eparams.block_size = _blocksize; int maxBS = 1 << (BitReader.log2i(eparams.block_size - 1) + 1); // The Defines string gets prepended to any and all sources that are compiled // and serve as a convenient way to pass configuration information to the compilation process OCLMan.Defines = "#define MAX_ORDER " + eparams.max_prediction_order.ToString() + "\n" + "#define GROUP_SIZE " + groupSize.ToString() + "\n" + "#define FLACCL_VERSION \"" + vendor_string + "\"\n" + (UseGPUOnly ? "#define DO_PARTITIONS\n" : "") + (UseGPURice ? "#define DO_RICE\n" : "") + "#define BITS_PER_SAMPLE " + PCM.BitsPerSample + "\n" + "#define MAX_BLOCKSIZE " + maxBS + "\n" + "#define MAX_CHANNELS " + PCM.ChannelCount + "\n" + #if DEBUG "#define DEBUG\n" + #endif (_settings.DeviceType == OpenCLDeviceType.CPU ? "#define FLACCL_CPU\n" : "") + _settings.Defines + "\n"; var exts = new string[] { "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics", "cl_khr_fp64", "cl_amd_fp64" }; foreach (string extension in exts) if (OCLMan.Context.Devices[0].Extensions.Contains(extension)) { OCLMan.Defines += "#pragma OPENCL EXTENSION " + extension + ": enable\n"; OCLMan.Defines += "#define HAVE_" + extension + "\n"; } try { openCLProgram = OCLMan.CompileFile("flac.cl"); } catch (OpenCLBuildException ex) { string buildLog = ex.BuildLogs[0]; throw ex; } //using (Stream kernel = GetType().Assembly.GetManifestResourceStream(GetType(), "flac.cl")) //using (StreamReader sr = new StreamReader(kernel)) //{ // try // { // openCLProgram = OCLMan.CompileSource(sr.ReadToEnd()); ; // } // catch (OpenCLBuildException ex) // { // string buildLog = ex.BuildLogs[0]; // throw ex; // } //} #if TTTTKJHSKJH var openCLPlatform = OpenCL.GetPlatform(0); openCLContext = openCLPlatform.CreateDefaultContext(); using (Stream kernel = GetType().Assembly.GetManifestResourceStream(GetType(), "flac.cl")) using (StreamReader sr = new StreamReader(kernel)) openCLProgram = openCLContext.CreateProgramWithSource(sr.ReadToEnd()); try { openCLProgram.Build(); } catch (OpenCLException) { string buildLog = openCLProgram.GetBuildLog(openCLProgram.Devices[0]); throw; } #endif if (_IO == null) _IO = new FileStream(_path, FileMode.Create, FileAccess.Write, FileShare.Read); int header_size = flake_encode_init(); _IO.Write(header, 0, header_size); _totalSize += header_size; if (_IO.CanSeek) first_frame_offset = _IO.Position; task1 = new FLACCLTask(openCLProgram, channelCount, channels, bits_per_sample, max_frame_size, this, groupSize, UseGPUOnly, UseGPURice); task2 = new FLACCLTask(openCLProgram, channelCount, channels, bits_per_sample, max_frame_size, this, groupSize, UseGPUOnly, UseGPURice); if (_settings.CPUThreads > 0) { cpu_tasks = new FLACCLTask[_settings.CPUThreads]; for (int i = 0; i < cpu_tasks.Length; i++) cpu_tasks[i] = new FLACCLTask(openCLProgram, channelCount, channels, bits_per_sample, max_frame_size, this, groupSize, UseGPUOnly, UseGPURice); } inited = true; } }
protected virtual void Initialize(DeviceType deviceType, string source) { Devices = Platform.QueryDevices(deviceType); if (Devices.Length == 0) throw new OpenCLException("No devices of type "+deviceType+" present"); Context = Platform.CreateContext(null,Devices,null, IntPtr.Zero); CQs = new CommandQueue[Devices.Length]; for( int i=0; i<CQs.Length; i++ ) CQs[i] = Context.CreateCommandQueue(Devices[i], CommandQueueProperties.PROFILING_ENABLE); CQ = CQs[0]; Program = Context.CreateProgramWithSource(source); Program.Build(); Kernels = Program.CreateKernelDictionary(); }
public Core(int Nxp,int Nyp, int Nzp, int Ntm, double Bbeta, double Flux) { Nx = Nxp; Ny = Nyp; Nz = Nzp; Nt = Ntm; betagauge = (floattype)Bbeta; flux = (floattype)Flux; N = Nx * Ny * Nz * Nt; Nspace = Nx * Ny * Nz; string strforcompiler = "-D Nt=" + Nt.ToString() + " -D Nxyz=" + (Nx * Ny * Nz).ToString() + " -D Nxy=" + (Nx*Ny).ToString() + " -D Nx="+(Nx).ToString()+" -D Ny="+(Ny).ToString()+" -D Nz="+(Nz).ToString(); strforcompiler += typeof(floattype) == typeof(double) ? " -D floattype=double -D floattype2=double2 -D floattype4=double4" : " -D floattype=float -D floattype2=float2 -D floattype4=float4"; strforcompiler += " -D phi=" + flux.ToString().Replace(',', '.') + " -D KAPPA=" + kappa.ToString().Replace(',', '.'); string fp64support = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; Plocalsize = AdjustLocalSize(Nspace); Slocalsize = AdjustLocalSize(N / 2); XhermYlocalsize = AdjustLocalSize(4 * N); // Plocalsize = 16; Slocalsize = 16; PNumGroups = Nx * Ny * Nz / Plocalsize; SNumGroups = N/2 / Slocalsize; XhermYNumGroups = 4*4*N / XhermYlocalsize; BufferLength = N * 4 * 9 * 2 * sizeof(floattype); SeedBufLen = N * sizeof(Int32)/2 * 4; AllocBuffers(); openCLPlatform = OpenCL.GetPlatform(0); openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL); openCLContext = openCLPlatform.CreateDefaultContext(); openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE); MyKernelProgram = openCLContext.CreateProgramWithSource( (typeof(floattype)==typeof(double)?fp64support:"") + File.ReadAllText("MyKernel.cl")+File.ReadAllText("dirak_mul.cl")); try { MyKernelProgram.Build(openCLDevices, strforcompiler, null, IntPtr.Zero); } catch (OpenCLException) { string buildLog = MyKernelProgram.GetBuildLog(openCLDevices[0]); MessageBox.Show(buildLog, "Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)"); // Application.Exit(); } MyKernelKernel = MyKernelProgram.CreateKernel("MyKernel"); PReductionKernel = MyKernelProgram.CreateKernel("PLoop"); SReductionKernel = MyKernelProgram.CreateKernel("CalcS"); DiralMulKernel = MyKernelProgram.CreateKernel("dirakMatrMul"); FillWithKernel = MyKernelProgram.CreateKernel("FillWith"); FillLinkWithKernel = MyKernelProgram.CreateKernel("FillLinkWith"); FillWithRandomKernel = MyKernelProgram.CreateKernel("FillWithRandom"); AXPYKernel = MyKernelProgram.CreateKernel("AXPY"); XhermYKernel = MyKernelProgram.CreateKernel("XhermY"); BackupLinkKernel = MyKernelProgram.CreateKernel("BackupLink"); RestoreLinkKernel = MyKernelProgram.CreateKernel("RestoreLink"); SeedMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedBufLen, IntPtr.Zero); LinkMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), BufferLength, IntPtr.Zero); PGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * PNumGroups, IntPtr.Zero); PResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); SGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * SNumGroups, IntPtr.Zero); SResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); XhermYGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2*XhermYNumGroups, IntPtr.Zero); XhermYresMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2, IntPtr.Zero); XhermYrespointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize * 2); SeedVectorMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedVectorBuf.Length * sizeof(int), IntPtr.Zero); StorageMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), linksize, IntPtr.Zero); dSmem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); dSpointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize); MyKernelKernel.SetArg(0, (byte)EvenOdd); MyKernelKernel.SetArg(1, (floattype)betagauge); MyKernelKernel.SetArg(2, (floattype)flux); MyKernelKernel.SetArg(3, SeedMem); MyKernelKernel.SetArg(4, LinkMem); PReductionKernel.SetArg(0, LinkMem); PReductionKernel.SetArg(1, PGroupMem); PReductionKernel.SetArg(2, PResMem); IntPtr ptr = new IntPtr(Plocalsize * floatsize); PReductionKernel.SetArg(3, ptr, IntPtr.Zero); SReductionKernel.SetArg(0, LinkMem); SReductionKernel.SetArg(1, SGroupMem); SReductionKernel.SetArg(2, SResMem); IntPtr ptr1 = new IntPtr(Slocalsize * floatsize); SReductionKernel.SetArg(3, ptr1, IntPtr.Zero); XhermYKernel.SetArg(2, XhermYresMem); XhermYKernel.SetArg(3, XhermYGroupMem); XhermYKernel.SetArg(4, new IntPtr(XhermYlocalsize*floatsize*2),IntPtr.Zero); openCLCQ.EnqueueWriteBuffer(SeedMem, true, 0, SeedBufLen, ipseed); openCLCQ.EnqueueWriteBuffer(LinkMem, true, 0, BufferLength, ip); openCLCQ.EnqueueWriteBuffer(SeedVectorMem, true, 0, SeedVectorBuf.Length*sizeof(int), ipseedvector); rhat0 = new Vector(); //init BICGStab vectors phi = new Vector(); r0 = new Vector(); //rprev = new Vector(); pi = new Vector(); vi = new Vector(); t = new Vector(); s = new Vector(); // xprev = new Vector(); // vprev = new Vector(); // pprev = new Vector(); temp = new Vector(); ri = new Vector(); x = new Vector(); //for fermion update chi = new Vector(); CalculateS(); double s1 = S[0]; BackupLink(0, 0,1, 0, 1); CalculateS(); double s2 = S[0]; RestoreLink(0, 0, 1, 0, 1); CalculateS(); double s3 = S[0]; //MessageBox.Show(s1.ToString() + s2.ToString() + s3.ToString()); }
static void Main(string[] args) { for (int i = 0; i < OpenCL.NumberOfPlatforms; ++i) { Platform clPlatformTemp = OpenCL.GetPlatform(i); Console.WriteLine("Platform {0} -> {1}, {2}", i, clPlatformTemp.Vendor, clPlatformTemp.Name); Device[] clDevicesTemp = clPlatformTemp.QueryDevices(DeviceType.ALL); foreach (Device clDeviceTemp in clDevicesTemp) { Console.WriteLine("-----> {0}, {1}, {2} Bytes, {3}", clDeviceTemp.Name, clDeviceTemp.DeviceType, clDeviceTemp.GlobalMemSize, clDeviceTemp.OpenCL_C_Version); } } Platform clPlatform = null; Device clDevice = null; for (int i = 0; i < OpenCL.NumberOfPlatforms; ++i) { clPlatform = OpenCL.GetPlatform(i); if (!clPlatform.Name.Contains("AMD") && !clPlatform.Name.Contains("NVIDIA") && !clPlatform.Name.Contains("INTEL")) { continue; } Device[] clDevices = clPlatform.QueryDevices(DeviceType.GPU); clDevice = clDevices[0]; break; } Console.WriteLine(); Context clContext = null; clContext = clPlatform.CreateDefaultContext(); CommandQueue clCommandQueue = null; clCommandQueue = clContext.CreateCommandQueue(clDevice, CommandQueueProperties.NONE); int[] ArrayA = { 1, 2 }; int[] ArrayB = { 2, 3 }; int[] ArrayC = new int[2]; int N = 100; float[] array = new float[N]; for (int i = 0; i < N; ++i) { array[i] = 1.0f * i / N; } GCHandle arrayHandle; arrayHandle = GCHandle.Alloc(array, GCHandleType.Pinned); GCHandle ArrayAHandle; ArrayAHandle = GCHandle.Alloc(array, GCHandleType.Pinned); GCHandle ArrayBHandle; ArrayBHandle = GCHandle.Alloc(array, GCHandleType.Pinned); GCHandle ArrayCHandle; ArrayCHandle = GCHandle.Alloc(array, GCHandleType.Pinned); Mem arrayBuffer = null; arrayBuffer = clContext.CreateBuffer(MemFlags.COPY_HOST_PTR, array.Length * sizeof(float), arrayHandle.AddrOfPinnedObject()); Mem ArrayABuffer = null; ArrayABuffer = clContext.CreateBuffer(MemFlags.COPY_HOST_PTR, ArrayA.Length * sizeof(int), ArrayAHandle.AddrOfPinnedObject()); Mem ArrayBBuffer = null; ArrayBBuffer = clContext.CreateBuffer(MemFlags.COPY_HOST_PTR, ArrayB.Length * sizeof(int), ArrayBHandle.AddrOfPinnedObject()); Mem ArrayCBuffer = null; ArrayCBuffer = clContext.CreateBuffer(MemFlags.COPY_HOST_PTR, ArrayC.Length * sizeof(int), ArrayCHandle.AddrOfPinnedObject()); //cos clCommandQueue.EnqueueReadBuffer(arrayBuffer, true, 0, array.Length * sizeof(float), arrayHandle.AddrOfPinnedObject()); OpenCLNet.Program clProgram = null; clProgram = clContext.CreateProgramWithSource(File.ReadAllText("openCLkernels.cl")); clProgram.Build(); Kernel clKernel = null; clKernel = clProgram.CreateKernel("Multiply"); clKernel.SetArg(0, arrayBuffer); clKernel.SetArg(1, 25); Kernel clKernel2 = null; clKernel2 = clProgram.CreateKernel("Add"); clKernel2.SetArg(0, ArrayABuffer); clKernel2.SetArg(1, ArrayBBuffer); clKernel2.SetArg(2, ArrayCBuffer); clCommandQueue.EnqueueNDRangeKernel(clKernel, 1, null, new int[] { N }, null); clCommandQueue.EnqueueReadBuffer(arrayBuffer, true, 0, array.Length * sizeof(float), arrayHandle.AddrOfPinnedObject()); clCommandQueue.EnqueueNDRangeKernel(clKernel2, 1, null, new int[] { 2 }, null); clCommandQueue.EnqueueReadBuffer(ArrayABuffer, true, 0, ArrayA.Length * sizeof(int), ArrayAHandle.AddrOfPinnedObject()); clCommandQueue.EnqueueReadBuffer(ArrayBBuffer, true, 0, ArrayB.Length * sizeof(int), ArrayBHandle.AddrOfPinnedObject()); clCommandQueue.EnqueueReadBuffer(ArrayCBuffer, true, 0, ArrayC.Length * sizeof(int), ArrayCHandle.AddrOfPinnedObject()); // for (int i = 0; i < N; ++i) // Console.Write("{0} ", array[i]); Console.WriteLine(); Console.WriteLine(); for (int i = 0; i < 2; i++) { Console.WriteLine("{0}", ArrayC[i]); } }