コード例 #1
ファイル: Form1.cs プロジェクト: ctapang/GPUCyclops
        private unsafe void TestKernel(Context c, CommandQueue cq, Kernel argIOKernel)
            Mem outArgBuffer = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR|(ulong)MemFlags.READ_WRITE), sizeof(IOKernelArgs), IntPtr.Zero);
            byte[] data = new byte[sizeof(IOKernelArgs)];
            Output("Testing kernel - Argument return");

            argIOKernel.SetArg(0, 1);
            argIOKernel.SetArg(1, 65L);
            argIOKernel.SetArg(2, 38.4f);
            argIOKernel.SetArg(3, outArgBuffer);

            Event ev;
            cq.EnqueueTask(argIOKernel,0,null,out ev);

            if ((int)ev.ExecutionStatus < 0)
                Error(cq.Device.Name + ": argIOKernel failed with error code " + (ErrorCode)ev.ExecutionStatus);
                outArgBuffer.Read(cq, 0L, data, 0, sizeof(IOKernelArgs));
                IntPtr outArgPtr = cq.EnqueueMapBuffer(outArgBuffer, true, MapFlags.READ, IntPtr.Zero, (IntPtr)sizeof(IOKernelArgs));
                IOKernelArgs args = (IOKernelArgs)Marshal.PtrToStructure(outArgPtr, typeof(IOKernelArgs));
                cq.EnqueueUnmapMemObject(outArgBuffer, outArgPtr);

                if (args.outInt != 1)
                    Error(cq.Device.Name + ": argIOKernel failed to return correct arguments");
                if (args.outLong != 65)
                    Error(cq.Device.Name + ": argIOKernel failed to return correct arguments");
                if (args.outSingle != 38.4f)
                    Error(cq.Device.Name + ": argIOKernel failed to return correct arguments");
コード例 #2
ファイル: Form1.cs プロジェクト: ctapang/GPUCyclops
        private unsafe void TestVecKernel(Context c, CommandQueue cq, Kernel k)
            Float2 f2 = new Float2(0.0f,1.0f);
            float[] memory = new float[2];

            fixed (float* pMemory = memory)
                Mem mem = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE | (ulong)MemFlags.USE_HOST_PTR), 4 * 2, pMemory);

                k.SetArg(0, f2);
                k.SetArg(1, mem);
                IntPtr pMap = cq.EnqueueMapBuffer(mem, true, MapFlags.READ, 0, 2 * 4);
                cq.EnqueueUnmapMemObject(mem, pMap);
コード例 #3
ファイル: Form1.cs プロジェクト: ctapang/GPUCyclops
        private void TestCommandQueueAsync(Context c, CommandQueue cq, Kernel kernel )
            List<Event> events = new List<Event>();
            Event clEvent;

            Output("Testing asynchronous task issuing (clEnqueueTask) and waiting for events");

            // Issue a bunch of slow operations
            kernel.SetArg(0, 5000000);
            for (int i = 0; i < 10; i++)
                cq.EnqueueTask(kernel, 0, null, out clEvent);

            // Issue a bunch of fast operations
            kernel.SetArg(0, 500);
            for (int i = 0; i < 10; i++)
                cq.EnqueueTask(kernel, 0, null, out clEvent);

            Event[] eventList = events.ToArray();
            cq.EnqueueWaitForEvents(eventList.Length, eventList);
            while (events.Count > 0)
                if ((int)events[0].ExecutionStatus < 0)
                    Output(cq.Device.Name + ": TestCommandQueueAsync failed with error code " + (ErrorCode)events[0].ExecutionStatus);
コード例 #4
ファイル: KernelClass.cs プロジェクト: dptph/lat-qcd
        public Core(int Nxp,int Nyp, int Nzp, int Ntm, double Bbeta, double Flux)
            Nx = Nxp; Ny = Nyp; Nz = Nzp; Nt = Ntm; betagauge = (floattype)Bbeta; flux = (floattype)Flux;
            N = Nx * Ny * Nz * Nt; Nspace = Nx * Ny * Nz;

            string strforcompiler =  "-D Nt=" + Nt.ToString() + " -D Nxyz=" + (Nx * Ny * Nz).ToString() + " -D Nxy=" + (Nx*Ny).ToString() +
                                            " -D Nx="+(Nx).ToString()+" -D Ny="+(Ny).ToString()+" -D Nz="+(Nz).ToString();
            strforcompiler += typeof(floattype) == typeof(double) ? " -D floattype=double -D floattype2=double2 -D floattype4=double4" :
                                                                " -D floattype=float -D floattype2=float2 -D floattype4=float4";
            strforcompiler += " -D phi=" + flux.ToString().Replace(',', '.') + " -D KAPPA=" + kappa.ToString().Replace(',', '.');
            string fp64support = "#pragma OPENCL EXTENSION  cl_khr_fp64 : enable\n";

            Plocalsize = AdjustLocalSize(Nspace);
            Slocalsize = AdjustLocalSize(N / 2);
            XhermYlocalsize = AdjustLocalSize(4 * N);

               // Plocalsize = 16; Slocalsize = 16;

            PNumGroups = Nx * Ny * Nz / Plocalsize;
            SNumGroups = N/2 / Slocalsize;
            XhermYNumGroups = 4*4*N / XhermYlocalsize;
            BufferLength = N * 4 * 9 * 2 * sizeof(floattype);
            SeedBufLen = N * sizeof(Int32)/2 * 4;


            openCLPlatform = OpenCL.GetPlatform(0);
            openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL);
            openCLContext = openCLPlatform.CreateDefaultContext();
            openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE);
            MyKernelProgram = openCLContext.CreateProgramWithSource(
                (typeof(floattype)==typeof(double)?fp64support:"") + File.ReadAllText("MyKernel.cl")+File.ReadAllText("dirak_mul.cl"));
                MyKernelProgram.Build(openCLDevices, strforcompiler, null, IntPtr.Zero);
            catch (OpenCLException)
                string buildLog = MyKernelProgram.GetBuildLog(openCLDevices[0]);
                MessageBox.Show(buildLog, "Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)");
                //  Application.Exit();
            MyKernelKernel = MyKernelProgram.CreateKernel("MyKernel");
            PReductionKernel = MyKernelProgram.CreateKernel("PLoop");
            SReductionKernel = MyKernelProgram.CreateKernel("CalcS");
            DiralMulKernel = MyKernelProgram.CreateKernel("dirakMatrMul");
            FillWithKernel = MyKernelProgram.CreateKernel("FillWith");
            FillLinkWithKernel = MyKernelProgram.CreateKernel("FillLinkWith");
            FillWithRandomKernel = MyKernelProgram.CreateKernel("FillWithRandom");
            AXPYKernel = MyKernelProgram.CreateKernel("AXPY");
            XhermYKernel = MyKernelProgram.CreateKernel("XhermY");
            BackupLinkKernel = MyKernelProgram.CreateKernel("BackupLink");
            RestoreLinkKernel = MyKernelProgram.CreateKernel("RestoreLink");

            SeedMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedBufLen, IntPtr.Zero);
            LinkMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), BufferLength, IntPtr.Zero);
            PGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * PNumGroups, IntPtr.Zero);
            PResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero);
            SGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * SNumGroups, IntPtr.Zero);
            SResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero);

            XhermYGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2*XhermYNumGroups, IntPtr.Zero);
            XhermYresMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2, IntPtr.Zero);
            XhermYrespointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize * 2);

            SeedVectorMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedVectorBuf.Length * sizeof(int), IntPtr.Zero);
            StorageMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), linksize, IntPtr.Zero);
            dSmem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero);
            dSpointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize);

            MyKernelKernel.SetArg(0, (byte)EvenOdd);
            MyKernelKernel.SetArg(1, (floattype)betagauge);
            MyKernelKernel.SetArg(2, (floattype)flux);
            MyKernelKernel.SetArg(3, SeedMem);
            MyKernelKernel.SetArg(4, LinkMem);

            PReductionKernel.SetArg(0, LinkMem);
            PReductionKernel.SetArg(1, PGroupMem);
            PReductionKernel.SetArg(2, PResMem);
            IntPtr ptr = new IntPtr(Plocalsize * floatsize);
            PReductionKernel.SetArg(3, ptr, IntPtr.Zero);

            SReductionKernel.SetArg(0, LinkMem);
            SReductionKernel.SetArg(1, SGroupMem);
            SReductionKernel.SetArg(2, SResMem);
            IntPtr ptr1 = new IntPtr(Slocalsize * floatsize);
            SReductionKernel.SetArg(3, ptr1, IntPtr.Zero);

            XhermYKernel.SetArg(2, XhermYresMem);
            XhermYKernel.SetArg(3, XhermYGroupMem);
            XhermYKernel.SetArg(4, new IntPtr(XhermYlocalsize*floatsize*2),IntPtr.Zero);

            openCLCQ.EnqueueWriteBuffer(SeedMem, true, 0, SeedBufLen, ipseed);
            openCLCQ.EnqueueWriteBuffer(LinkMem, true, 0, BufferLength, ip);
            openCLCQ.EnqueueWriteBuffer(SeedVectorMem, true, 0, SeedVectorBuf.Length*sizeof(int), ipseedvector);
            rhat0 = new Vector();
            //init BICGStab vectors
            phi = new Vector();

            r0 = new Vector();

            //rprev = new Vector();
            pi = new Vector();
            vi = new Vector();
            t = new Vector();
            s = new Vector();
               // xprev = new Vector();

               // vprev = new Vector();
               // pprev = new Vector();

            temp = new Vector();

            ri = new Vector();

            x = new Vector();

            //for fermion update

            chi = new Vector();

            double s1 = S[0];
            BackupLink(0, 0,1, 0, 1);
            double s2 = S[0];
            RestoreLink(0, 0, 1, 0, 1);
            double s3 = S[0];

            //MessageBox.Show(s1.ToString() + s2.ToString() + s3.ToString());