コード例 #1
0
ファイル: KernelClass.cs プロジェクト: dptph/lat-qcd
        public Core(int Nxp,int Nyp, int Nzp, int Ntm, double Bbeta, double Flux)
        {
            Nx = Nxp; Ny = Nyp; Nz = Nzp; Nt = Ntm; betagauge = (floattype)Bbeta; flux = (floattype)Flux;
            N = Nx * Ny * Nz * Nt; Nspace = Nx * Ny * Nz;

            string strforcompiler =  "-D Nt=" + Nt.ToString() + " -D Nxyz=" + (Nx * Ny * Nz).ToString() + " -D Nxy=" + (Nx*Ny).ToString() +
                                            " -D Nx="+(Nx).ToString()+" -D Ny="+(Ny).ToString()+" -D Nz="+(Nz).ToString();
            strforcompiler += typeof(floattype) == typeof(double) ? " -D floattype=double -D floattype2=double2 -D floattype4=double4" :
                                                                " -D floattype=float -D floattype2=float2 -D floattype4=float4";
            strforcompiler += " -D phi=" + flux.ToString().Replace(',', '.') + " -D KAPPA=" + kappa.ToString().Replace(',', '.');
            string fp64support = "#pragma OPENCL EXTENSION  cl_khr_fp64 : enable\n";

            Plocalsize = AdjustLocalSize(Nspace);
            Slocalsize = AdjustLocalSize(N / 2);
            XhermYlocalsize = AdjustLocalSize(4 * N);

               // Plocalsize = 16; Slocalsize = 16;

            PNumGroups = Nx * Ny * Nz / Plocalsize;
            SNumGroups = N/2 / Slocalsize;
            XhermYNumGroups = 4*4*N / XhermYlocalsize;
            BufferLength = N * 4 * 9 * 2 * sizeof(floattype);
            SeedBufLen = N * sizeof(Int32)/2 * 4;

            AllocBuffers();

            openCLPlatform = OpenCL.GetPlatform(0);
            openCLDevices = openCLPlatform.QueryDevices(DeviceType.ALL);
            openCLContext = openCLPlatform.CreateDefaultContext();
            openCLCQ = openCLContext.CreateCommandQueue(openCLDevices[0], CommandQueueProperties.PROFILING_ENABLE);
            MyKernelProgram = openCLContext.CreateProgramWithSource(
                (typeof(floattype)==typeof(double)?fp64support:"") + File.ReadAllText("MyKernel.cl")+File.ReadAllText("dirak_mul.cl"));
            try
            {
                MyKernelProgram.Build(openCLDevices, strforcompiler, null, IntPtr.Zero);
            }
            catch (OpenCLException)
            {
                string buildLog = MyKernelProgram.GetBuildLog(openCLDevices[0]);
                MessageBox.Show(buildLog, "Build error(64 bit debug sessions in vs2008 always fail like this - debug in 32 bit or use vs2010)");
                //  Application.Exit();
            }
            MyKernelKernel = MyKernelProgram.CreateKernel("MyKernel");
            PReductionKernel = MyKernelProgram.CreateKernel("PLoop");
            SReductionKernel = MyKernelProgram.CreateKernel("CalcS");
            DiralMulKernel = MyKernelProgram.CreateKernel("dirakMatrMul");
            FillWithKernel = MyKernelProgram.CreateKernel("FillWith");
            FillLinkWithKernel = MyKernelProgram.CreateKernel("FillLinkWith");
            FillWithRandomKernel = MyKernelProgram.CreateKernel("FillWithRandom");
            AXPYKernel = MyKernelProgram.CreateKernel("AXPY");
            XhermYKernel = MyKernelProgram.CreateKernel("XhermY");
            BackupLinkKernel = MyKernelProgram.CreateKernel("BackupLink");
            RestoreLinkKernel = MyKernelProgram.CreateKernel("RestoreLink");

            SeedMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedBufLen, IntPtr.Zero);
            LinkMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), BufferLength, IntPtr.Zero);
            PGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * PNumGroups, IntPtr.Zero);
            PResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero);
            SGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * SNumGroups, IntPtr.Zero);
            SResMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero);

            XhermYGroupMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2*XhermYNumGroups, IntPtr.Zero);
            XhermYresMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize * 2, IntPtr.Zero);
            XhermYrespointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize * 2);

            SeedVectorMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), SeedVectorBuf.Length * sizeof(int), IntPtr.Zero);
            StorageMem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), linksize, IntPtr.Zero);
            dSmem = openCLContext.CreateBuffer((MemFlags)((long)MemFlags.READ_WRITE), floatsize, IntPtr.Zero);
            dSpointer = System.Runtime.InteropServices.Marshal.AllocHGlobal(floatsize);

            MyKernelKernel.SetArg(0, (byte)EvenOdd);
            MyKernelKernel.SetArg(1, (floattype)betagauge);
            MyKernelKernel.SetArg(2, (floattype)flux);
            MyKernelKernel.SetArg(3, SeedMem);
            MyKernelKernel.SetArg(4, LinkMem);

            PReductionKernel.SetArg(0, LinkMem);
            PReductionKernel.SetArg(1, PGroupMem);
            PReductionKernel.SetArg(2, PResMem);
            IntPtr ptr = new IntPtr(Plocalsize * floatsize);
            PReductionKernel.SetArg(3, ptr, IntPtr.Zero);

            SReductionKernel.SetArg(0, LinkMem);
            SReductionKernel.SetArg(1, SGroupMem);
            SReductionKernel.SetArg(2, SResMem);
            IntPtr ptr1 = new IntPtr(Slocalsize * floatsize);
            SReductionKernel.SetArg(3, ptr1, IntPtr.Zero);

            XhermYKernel.SetArg(2, XhermYresMem);
            XhermYKernel.SetArg(3, XhermYGroupMem);
            XhermYKernel.SetArg(4, new IntPtr(XhermYlocalsize*floatsize*2),IntPtr.Zero);

            openCLCQ.EnqueueWriteBuffer(SeedMem, true, 0, SeedBufLen, ipseed);
            openCLCQ.EnqueueWriteBuffer(LinkMem, true, 0, BufferLength, ip);
            openCLCQ.EnqueueWriteBuffer(SeedVectorMem, true, 0, SeedVectorBuf.Length*sizeof(int), ipseedvector);
            rhat0 = new Vector();
            //init BICGStab vectors
            phi = new Vector();

            r0 = new Vector();

            //rprev = new Vector();
            pi = new Vector();
            vi = new Vector();
            t = new Vector();
            s = new Vector();
               // xprev = new Vector();

               // vprev = new Vector();
               // pprev = new Vector();

            temp = new Vector();

            ri = new Vector();

            x = new Vector();

            //for fermion update

            chi = new Vector();

            CalculateS();
            double s1 = S[0];
            BackupLink(0, 0,1, 0, 1);
            CalculateS();
            double s2 = S[0];
            RestoreLink(0, 0, 1, 0, 1);
            CalculateS();
            double s3 = S[0];

            //MessageBox.Show(s1.ToString() + s2.ToString() + s3.ToString());
        }
コード例 #2
0
ファイル: KernelClass.cs プロジェクト: dptph/lat-qcd
 //y=MulD(x)
 void MulD(Vector x, Vector result)
 {
     DiralMulKernel.SetArg(0, LinkMem);
     DiralMulKernel.SetArg(1, x.buf);
     DiralMulKernel.SetArg(2, result.buf);
     openCLCQ.EnqueueNDRangeKernel(DiralMulKernel, 1, null, new int[1] { 4 * N }, null);
     openCLCQ.Finish();
     openCLCQ.Flush();
        // result.Updatearray();
 }
コード例 #3
0
ファイル: KernelClass.cs プロジェクト: dptph/lat-qcd
        Complex V1hermV2(Vector v1, Vector v2)
        {
            floattype[] c = new floattype[2];
            for (int i = 0; i < 2; i++)
            {
                XhermYKernel.SetArg(0, v1.buf);
                XhermYKernel.SetArg(1, v2.buf);
                openCLCQ.EnqueueNDRangeKernel(XhermYKernel, 1, null, new int[1] { 4 * N }, new int[1] { XhermYlocalsize });
                openCLCQ.Finish();
                openCLCQ.Flush();

                openCLCQ.EnqueueReadBuffer(XhermYresMem, true, 0, floatsize * 2, XhermYrespointer);

                System.Runtime.InteropServices.Marshal.Copy(XhermYrespointer, c, 0, 2);
            }
            return new Complex(c[0], c[1]);
        }
コード例 #4
0
ファイル: KernelClass.cs プロジェクト: dptph/lat-qcd
 void AXPY(Complex a, Vector x, Vector y,Vector result)
 {
     AXPYKernel.SetArg(0, new floattype2(a.x,a.y));
     AXPYKernel.SetArg(1, x.buf);
     AXPYKernel.SetArg(2, y.buf);
     AXPYKernel.SetArg(3, result.buf);
     openCLCQ.EnqueueNDRangeKernel(AXPYKernel, 1, null, new int[1] { 4 * N }, null);
     openCLCQ.Finish();
     //result.Updatearray();
 }