/// <summary> /// Test all versions of: /// /// EnqueueReadBuffer /// EnqueueWriteBuffer /// EnqueueCopyBuffer /// /// The test just copies the entirety of a buffer and checks if the result is equal to the original. /// An error indicates that one of the above functions failed and further manual analysis is required /// to pinpoint the error. /// </summary> /// <param name="c"></param> /// <param name="cq"></param> private void TestReadWriteCopyOps(Context c, CommandQueue cq) { Output("Testing read/write/copy functions"); Mem buf0 = null; Mem buf1 = null; Mem buf2 = null; int bufLen = 1024 * 1024; byte[] srcData = new byte[bufLen]; byte[] cmpData = new byte[bufLen]; Event event0; Event event1; Event event2; Event event3; Event event4; Event event5; for (int i = 0; i < srcData.Length; i++) srcData[i] = (byte)(i); Array.Clear(cmpData, 0, cmpData.Length); try { buf0 = c.CreateBuffer(MemFlags.READ_WRITE, bufLen, IntPtr.Zero); buf1 = c.CreateBuffer(MemFlags.READ_WRITE, bufLen, IntPtr.Zero); buf2 = c.CreateBuffer(MemFlags.READ_WRITE, bufLen, IntPtr.Zero); #region Test EnqueueReadBuffer EnqueueWriteBuffer EnqueueCopyBuffer fixed (byte* pSrc = srcData) { fixed (byte* pCmp = cmpData) { { Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pSrc); cq.EnqueueCopyBuffer(buf0, buf1, (IntPtr)0, (IntPtr)0, (IntPtr)bufLen); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pCmp); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(IntPtr version): Copy not identical to source"); Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pSrc, 0, null, out event0 ); cq.EnqueueCopyBuffer(buf0, buf1, (IntPtr)0, (IntPtr)0, (IntPtr)bufLen, 0, null, out event1); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pCmp, 0, null, out event2); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(IntPtr version): Copy not identical to source"); Event[] events = new Event[] { event0, event1, event2 }; Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pSrc, 3, events); cq.EnqueueCopyBuffer(buf0, buf1, (IntPtr)0, (IntPtr)0, (IntPtr)bufLen, 3, events); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pCmp, 3, events); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(IntPtr version): Copy not identical to source"); Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pSrc, 3, events, out event3); cq.EnqueueCopyBuffer(buf0, buf1, (IntPtr)0, (IntPtr)0, (IntPtr)bufLen, 3, events, out event4); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, (IntPtr)0, (IntPtr)bufLen, (IntPtr)pCmp, 3, events, out event5); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(IntPtr version): Copy not identical to source"); event0.Dispose(); event1.Dispose(); event2.Dispose(); event3.Dispose(); event4.Dispose(); event5.Dispose(); } { Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0, bufLen, (IntPtr)pSrc); cq.EnqueueCopyBuffer(buf0, buf1, 0, 0, bufLen); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0, bufLen, (IntPtr)pCmp); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(int version): Copy not identical to source"); Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0, bufLen, (IntPtr)pSrc, 0, null, out event0); cq.EnqueueCopyBuffer(buf0, buf1, 0, 0, bufLen, 0, null, out event1); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0, bufLen, (IntPtr)pCmp, 0, null, out event2); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(int version): Copy not identical to source"); Event[] events = new Event[] { event0, event1, event2 }; Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0, bufLen, (IntPtr)pSrc, 3, events); cq.EnqueueCopyBuffer(buf0, buf1, 0, 0, bufLen, 3, events); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0, bufLen, (IntPtr)pCmp, 3, events); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(int version): Copy not identical to source"); Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0, bufLen, (IntPtr)pSrc, 3, events, out event3); cq.EnqueueCopyBuffer(buf0, buf1, 0, 0, bufLen, 3, events, out event4); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0, bufLen, (IntPtr)pCmp, 3, events, out event5); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(int version): Copy not identical to source"); event0.Dispose(); event1.Dispose(); event2.Dispose(); event3.Dispose(); event4.Dispose(); event5.Dispose(); } { Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0L, (long)bufLen, (IntPtr)pSrc); cq.EnqueueCopyBuffer(buf0, buf1, 0L, 0L, (long)bufLen); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0L, (long)bufLen, (IntPtr)pCmp); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(long version): Copy not identical to source"); Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0L, (long)bufLen, (IntPtr)pSrc, 0, null, out event0); cq.EnqueueCopyBuffer(buf0, buf1, 0L, 0L, (long)bufLen, 0, null, out event1); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0L, (long)bufLen, (IntPtr)pCmp, 0, null, out event2); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(long version): Copy not identical to source"); Event[] events = new Event[] { event0, event1, event2 }; Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0L, (long)bufLen, (IntPtr)pSrc, 3, events); cq.EnqueueCopyBuffer(buf0, buf1, 0L, 0L, (long)bufLen, 3, events); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0L, (long)bufLen, (IntPtr)pCmp, 3, events); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(long version): Copy not identical to source"); Array.Clear(cmpData, 0, cmpData.Length); cq.EnqueueWriteBuffer(buf0, true, 0L, (long)bufLen, (IntPtr)pSrc, 3, events, out event3); cq.EnqueueCopyBuffer(buf0, buf1, 0L, 0L, (long)bufLen, 3, events, out event4); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(buf1, true, 0L, (long)bufLen, (IntPtr)pCmp, 3, events, out event5); if (!CompareArray(cmpData, srcData)) Error("TestReadWriteCopyOps(long version): Copy not identical to source"); event0.Dispose(); event1.Dispose(); event2.Dispose(); event3.Dispose(); event4.Dispose(); event5.Dispose(); } } } #endregion } catch (Exception e) { Error("Exception during testing: " + e.ToString()); } finally { if (buf0 != null) buf0.Dispose(); if (buf1 != null) buf1.Dispose(); if (buf2 != null) buf2.Dispose(); } }
private void TestCommandQueueMemCopy(Context c, CommandQueue cq) { Output("Testing synchronous host memory->memory copy"); AlignedArrayFloat aafSrc = new AlignedArrayFloat(1024 * 1024, 64); AlignedArrayFloat aafDst = new AlignedArrayFloat(1024 * 1024, 64); SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); /// Test HOST_PTR -> HOST_PTR copy /// The call to EnqueueMapBuffer synchronizes caches before testing the result using (Mem memSrc = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE+(ulong)MemFlags.USE_HOST_PTR), aafSrc.ByteLength, aafSrc)) { using (Mem memDst = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE+(ulong)MemFlags.USE_HOST_PTR), aafDst.ByteLength, aafDst)) { cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.EnqueueBarrier(); IntPtr mappedPtr = cq.EnqueueMapBuffer(memDst, true, MapFlags.READ_WRITE, (IntPtr)0, (IntPtr)aafDst.ByteLength); if (!TestAAF(aafDst, 0.0f)) Error("EnqueueCopyBuffer failed, destination is invalid"); cq.EnqueueUnmapMemObject(memDst, mappedPtr); cq.EnqueueBarrier(); } } /// Test COPY_HOST_PTR -> COPY_HOST_PTR copy /// Verify that original source buffers are intact and that the copy was successful SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); using (Mem memSrc = c.CreateBuffer(MemFlags.COPY_HOST_PTR, aafSrc.ByteLength, aafSrc)) { using (Mem memDst = c.CreateBuffer(MemFlags.COPY_HOST_PTR, aafSrc.ByteLength, aafDst)) { SetAAF(aafSrc, 2.0f); SetAAF(aafDst, 3.0f); cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.Finish(); if (!TestAAF(aafSrc, 2.0f)) Error("Memory copy destroyed src buffer"); if (!TestAAF(aafDst, 3.0f)) Error("Memory copy destroyed dst buffer"); Event ev; cq.EnqueueReadBuffer(memDst, false, IntPtr.Zero, (IntPtr)aafDst.ByteLength, aafDst,0, null, out ev); cq.EnqueueWaitForEvents(1, new Event[] { ev }); ev.Dispose(); cq.Finish(); if (!TestAAF(aafDst, 0.0f)) Error("Memory copy failed"); } } /// Test ALLOC_HOST_PTR -> ALLOC_HOST_PTR copy SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); using (Mem memSrc = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.READ_WRITE), aafSrc.ByteLength, IntPtr.Zero)) { using (Mem memDst = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.WRITE_ONLY), aafSrc.ByteLength, IntPtr.Zero)) { cq.EnqueueWriteBuffer(memSrc, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueWriteBuffer(memDst, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueBarrier(); cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(memDst, true, IntPtr.Zero, (IntPtr)aafDst.ByteLength, aafDst); if (!TestAAF(aafDst, 0.0f)) Error("Memory copy failed"); } } /// Test DEFAULT -> DEFAULT copy SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); using (Mem memSrc = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.READ_ONLY), aafSrc.ByteLength, IntPtr.Zero)) { using (Mem memDst = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.WRITE_ONLY), aafSrc.ByteLength, IntPtr.Zero)) { cq.EnqueueWriteBuffer(memSrc, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueWriteBuffer(memDst, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueBarrier(); cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(memDst, true, IntPtr.Zero, (IntPtr)aafDst.ByteLength, aafDst); if (!TestAAF(aafDst, 0.0f)) Error("Memory copy failed"); } } }
public Core(int Nxp,int Nyp, int Nzp, int Ntm, double Bbeta, double Flux) { Nx = Nxp; Ny = Nyp; Nz = Nzp; Nt = Ntm; betagauge = (floattype)Bbeta; flux = (floattype)Flux; N = Nx * Ny * Nz * Nt; Nspace = Nx * Ny * Nz; string strforcompiler = "-D Nt=" + Nt.ToString() + " -D Nxyz=" + (Nx * Ny * Nz).ToString() + " -D Nxy=" + (Nx*Ny).ToString() + " -D Nx="+(Nx).ToString()+" -D Ny="+(Ny).ToString()+" -D Nz="+(Nz).ToString(); strforcompiler += typeof(floattype) == typeof(double) ? " -D floattype=double -D floattype2=double2 -D floattype4=double4" : " -D floattype=float -D floattype2=float2 -D floattype4=float4"; strforcompiler += " -D phi=" + flux.ToString().Replace(',', '.') + " -D KAPPA=" + kappa.ToString().Replace(',', '.'); string fp64support = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; Plocalsize = AdjustLocalSize(Nspace); Slocalsize = AdjustLocalSize(N / 2); XhermYlocalsize = AdjustLocalSize(4 * N); // Plocalsize = 16; Slocalsize = 16; PNumGroups = Nx * Ny * Nz / Plocalsize; SNumGroups = N/2 / Slocalsize; XhermYNumGroups = 4*4*N / XhermYlocalsize; BufferLength = N * 4 * 9 * 2 * sizeof(floattype); SeedBufLen = N * sizeof(Int32)/2 * 4; AllocBuffers(); openCLPlatform = OpenCL.GetPlatform(0); openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL); openCLContext = openCLPlatform.CreateDefaultContext(); openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE); MyKernelProgram = openCLContext.CreateProgramWithSource( (typeof(floattype)==typeof(double)?fp64support:"") + File.ReadAllText("MyKernel.cl")+File.ReadAllText("dirak_mul.cl")); try { MyKernelProgram.Build(openCLDevices, strforcompiler, null, IntPtr.Zero); } catch (OpenCLException) { string buildLog = MyKernelProgram.GetBuildLog(openCLDevices[0]); MessageBox.Show(buildLog, "Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)"); // Application.Exit(); } MyKernelKernel = MyKernelProgram.CreateKernel("MyKernel"); PReductionKernel = MyKernelProgram.CreateKernel("PLoop"); SReductionKernel = MyKernelProgram.CreateKernel("CalcS"); DiralMulKernel = MyKernelProgram.CreateKernel("dirakMatrMul"); FillWithKernel = MyKernelProgram.CreateKernel("FillWith"); FillLinkWithKernel = MyKernelProgram.CreateKernel("FillLinkWith"); FillWithRandomKernel = MyKernelProgram.CreateKernel("FillWithRandom"); AXPYKernel = MyKernelProgram.CreateKernel("AXPY"); XhermYKernel = MyKernelProgram.CreateKernel("XhermY"); BackupLinkKernel = MyKernelProgram.CreateKernel("BackupLink"); RestoreLinkKernel = MyKernelProgram.CreateKernel("RestoreLink"); SeedMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedBufLen, IntPtr.Zero); LinkMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), BufferLength, IntPtr.Zero); PGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * PNumGroups, IntPtr.Zero); PResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); SGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * SNumGroups, IntPtr.Zero); SResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); XhermYGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2*XhermYNumGroups, IntPtr.Zero); XhermYresMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2, IntPtr.Zero); XhermYrespointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize * 2); SeedVectorMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedVectorBuf.Length * sizeof(int), IntPtr.Zero); StorageMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), linksize, IntPtr.Zero); dSmem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero); dSpointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize); MyKernelKernel.SetArg(0, (byte)EvenOdd); MyKernelKernel.SetArg(1, (floattype)betagauge); MyKernelKernel.SetArg(2, (floattype)flux); MyKernelKernel.SetArg(3, SeedMem); MyKernelKernel.SetArg(4, LinkMem); PReductionKernel.SetArg(0, LinkMem); PReductionKernel.SetArg(1, PGroupMem); PReductionKernel.SetArg(2, PResMem); IntPtr ptr = new IntPtr(Plocalsize * floatsize); PReductionKernel.SetArg(3, ptr, IntPtr.Zero); SReductionKernel.SetArg(0, LinkMem); SReductionKernel.SetArg(1, SGroupMem); SReductionKernel.SetArg(2, SResMem); IntPtr ptr1 = new IntPtr(Slocalsize * floatsize); SReductionKernel.SetArg(3, ptr1, IntPtr.Zero); XhermYKernel.SetArg(2, XhermYresMem); XhermYKernel.SetArg(3, XhermYGroupMem); XhermYKernel.SetArg(4, new IntPtr(XhermYlocalsize*floatsize*2),IntPtr.Zero); openCLCQ.EnqueueWriteBuffer(SeedMem, true, 0, SeedBufLen, ipseed); openCLCQ.EnqueueWriteBuffer(LinkMem, true, 0, BufferLength, ip); openCLCQ.EnqueueWriteBuffer(SeedVectorMem, true, 0, SeedVectorBuf.Length*sizeof(int), ipseedvector); rhat0 = new Vector(); //init BICGStab vectors phi = new Vector(); r0 = new Vector(); //rprev = new Vector(); pi = new Vector(); vi = new Vector(); t = new Vector(); s = new Vector(); // xprev = new Vector(); // vprev = new Vector(); // pprev = new Vector(); temp = new Vector(); ri = new Vector(); x = new Vector(); //for fermion update chi = new Vector(); CalculateS(); double s1 = S[0]; BackupLink(0, 0,1, 0, 1); CalculateS(); double s2 = S[0]; RestoreLink(0, 0, 1, 0, 1); CalculateS(); double s3 = S[0]; //MessageBox.Show(s1.ToString() + s2.ToString() + s3.ToString()); }