private unsafe void TestKernel(Context c, CommandQueue cq, Kernel argIOKernel) { Mem outArgBuffer = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR|(ulong)MemFlags.READ_WRITE), sizeof(IOKernelArgs), IntPtr.Zero); byte[] data = new byte[sizeof(IOKernelArgs)]; Output("Testing kernel - Argument return"); argIOKernel.SetArg(0, 1); argIOKernel.SetArg(1, 65L); argIOKernel.SetArg(2, 38.4f); argIOKernel.SetArg(3, outArgBuffer); Event ev; cq.EnqueueTask(argIOKernel,0,null,out ev); cq.Finish(); if ((int)ev.ExecutionStatus < 0) { Error(cq.Device.Name + ": argIOKernel failed with error code " + (ErrorCode)ev.ExecutionStatus); ev.Dispose(); } else { outArgBuffer.Read(cq, 0L, data, 0, sizeof(IOKernelArgs)); IntPtr outArgPtr = cq.EnqueueMapBuffer(outArgBuffer, true, MapFlags.READ, IntPtr.Zero, (IntPtr)sizeof(IOKernelArgs)); IOKernelArgs args = (IOKernelArgs)Marshal.PtrToStructure(outArgPtr, typeof(IOKernelArgs)); cq.EnqueueUnmapMemObject(outArgBuffer, outArgPtr); if (args.outInt != 1) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); if (args.outLong != 65) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); if (args.outSingle != 38.4f) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); } }
private unsafe void TestVecKernel(Context c, CommandQueue cq, Kernel k) { Float2 f2 = new Float2(0.0f,1.0f); float[] memory = new float[2]; fixed (float* pMemory = memory) { Mem mem = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE | (ulong)MemFlags.USE_HOST_PTR), 4 * 2, pMemory); k.SetArg(0, f2); k.SetArg(1, mem); cq.EnqueueTask(k); cq.EnqueueBarrier(); IntPtr pMap = cq.EnqueueMapBuffer(mem, true, MapFlags.READ, 0, 2 * 4); cq.EnqueueUnmapMemObject(mem, pMap); } }
private void TestCommandQueueAsync(Context c, CommandQueue cq, Kernel kernel ) { List<Event> events = new List<Event>(); Event clEvent; Output("Testing asynchronous task issuing (clEnqueueTask) and waiting for events"); // Issue a bunch of slow operations kernel.SetArg(0, 5000000); for (int i = 0; i < 10; i++) { cq.EnqueueTask(kernel, 0, null, out clEvent); events.Add(clEvent); } // Issue a bunch of fast operations kernel.SetArg(0, 500); for (int i = 0; i < 10; i++) { cq.EnqueueTask(kernel, 0, null, out clEvent); events.Add(clEvent); } Event[] eventList = events.ToArray(); cq.EnqueueWaitForEvents(eventList.Length, eventList); while (events.Count > 0) { if ((int)events[0].ExecutionStatus < 0) { Output(cq.Device.Name + ": TestCommandQueueAsync failed with error code " + (ErrorCode)events[0].ExecutionStatus); } events[0].Dispose(); events.RemoveAt(0); } }
public Core(int Nxp,int Nyp, int Nzp, int Ntm, double Bbeta, double Flux) { Nx = Nxp; Ny = Nyp; Nz = Nzp; Nt = Ntm; betagauge = (floattype)Bbeta; flux = (floattype)Flux; N = Nx * Ny * Nz * Nt; Nspace = Nx * Ny * Nz; string strforcompiler = "-D Nt=" + Nt.ToString() + " -D Nxyz=" + (Nx * Ny * Nz).ToString() + " -D Nxy=" + (Nx*Ny).ToString() + " -D Nx="+(Nx).ToString()+" -D Ny="+(Ny).ToString()+" -D Nz="+(Nz).ToString(); strforcompiler += typeof(floattype) == typeof(double) ? " -D floattype=double -D floattype2=double2 -D floattype4=double4" : " -D floattype=float -D floattype2=float2 -D floattype4=float4"; strforcompiler += " -D phi=" + flux.ToString().Replace(',', '.') + " -D KAPPA=" + kappa.ToString().Replace(',', '.'); string fp64support = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; Plocalsize = AdjustLocalSize(Nspace); Slocalsize = AdjustLocalSize(N / 2); XhermYlocalsize = AdjustLocalSize(4 * N); // Plocalsize = 16; Slocalsize = 16; PNumGroups = Nx * Ny * Nz / Plocalsize; SNumGroups = N/2 / Slocalsize; XhermYNumGroups = 4*4*N / XhermYlocalsize; BufferLength = N * 4 * 9 * 2 * sizeof(floattype); SeedBufLen = N * sizeof(Int32)/2 * 4; AllocBuffers(); openCLPlatform = OpenCL.GetPlatform(0); openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL); openCLContext = openCLPlatform.CreateDefaultContext(); openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE); MyKernelProgram = openCLContext.CreateProgramWithSource( (typeof(floattype)==typeof(double)?fp64support:"") + File.ReadAllText("MyKernel.cl")+File.ReadAllText("dirak_mul.cl")); try { MyKernelProgram.Build(openCLDevices, strforcompiler, null, IntPtr.Zero); } catch (OpenCLException) { string buildLog = MyKernelProgram.GetBuildLog(openCLDevices[0]); MessageBox.Show(buildLog, "Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)"); // Application.Exit(); } MyKernelKernel = MyKernelProgram.CreateKernel("MyKernel"); PReductionKernel = MyKernelProgram.CreateKernel("PLoop"); SReductionKernel = MyKernelProgram.CreateKernel("CalcS"); DiralMulKernel = MyKernelProgram.CreateKernel("dirakMatrMul"); FillWithKernel = MyKernelProgram.CreateKernel("FillWith"); FillLinkWithKernel = MyKernelProgram.CreateKernel("FillLinkWith"); FillWithRandomKernel = MyKernelProgram.CreateKernel("FillWithRandom"); AXPYKernel = MyKernelProgram.CreateKernel("AXPY"); XhermYKernel = MyKernelProgram.CreateKernel("XhermY"); BackupLinkKernel = MyKernelProgram.CreateKernel("BackupLink"); RestoreLinkKernel = MyKernelProgram.CreateKernel("RestoreLink"); SeedMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedBufLen, IntPtr.Zero); LinkMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), BufferLength, IntPtr.Zero); PGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * PNumGroups, IntPtr.Zero); PResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); SGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * SNumGroups, IntPtr.Zero); SResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); XhermYGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2*XhermYNumGroups, IntPtr.Zero); XhermYresMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2, IntPtr.Zero); XhermYrespointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize * 2); SeedVectorMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedVectorBuf.Length * sizeof(int), IntPtr.Zero); StorageMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), linksize, IntPtr.Zero); dSmem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); dSpointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize); MyKernelKernel.SetArg(0, (byte)EvenOdd); MyKernelKernel.SetArg(1, (floattype)betagauge); MyKernelKernel.SetArg(2, (floattype)flux); MyKernelKernel.SetArg(3, SeedMem); MyKernelKernel.SetArg(4, LinkMem); PReductionKernel.SetArg(0, LinkMem); PReductionKernel.SetArg(1, PGroupMem); PReductionKernel.SetArg(2, PResMem); IntPtr ptr = new IntPtr(Plocalsize * floatsize); PReductionKernel.SetArg(3, ptr, IntPtr.Zero); SReductionKernel.SetArg(0, LinkMem); SReductionKernel.SetArg(1, SGroupMem); SReductionKernel.SetArg(2, SResMem); IntPtr ptr1 = new IntPtr(Slocalsize * floatsize); SReductionKernel.SetArg(3, ptr1, IntPtr.Zero); XhermYKernel.SetArg(2, XhermYresMem); XhermYKernel.SetArg(3, XhermYGroupMem); XhermYKernel.SetArg(4, new IntPtr(XhermYlocalsize*floatsize*2),IntPtr.Zero); openCLCQ.EnqueueWriteBuffer(SeedMem, true, 0, SeedBufLen, ipseed); openCLCQ.EnqueueWriteBuffer(LinkMem, true, 0, BufferLength, ip); openCLCQ.EnqueueWriteBuffer(SeedVectorMem, true, 0, SeedVectorBuf.Length*sizeof(int), ipseedvector); rhat0 = new Vector(); //init BICGStab vectors phi = new Vector(); r0 = new Vector(); //rprev = new Vector(); pi = new Vector(); vi = new Vector(); t = new Vector(); s = new Vector(); // xprev = new Vector(); // vprev = new Vector(); // pprev = new Vector(); temp = new Vector(); ri = new Vector(); x = new Vector(); //for fermion update chi = new Vector(); CalculateS(); double s1 = S[0]; BackupLink(0, 0,1, 0, 1); CalculateS(); double s2 = S[0]; RestoreLink(0, 0, 1, 0, 1); CalculateS(); double s3 = S[0]; //MessageBox.Show(s1.ToString() + s2.ToString() + s3.ToString()); }