Exemple #1
0
        public static void Run(TextWriter log, ComputeContext context)
        {
            StartTest(log, "Vector addition test");

            try
            {
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();

                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                ComputeProgram program = new ComputeProgram(context, kernelSource);
                program.Build(null, null, null, IntPtr.Zero);
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                ICollection<ComputeEventBase> events = new Collection<ComputeEventBase>();

                // BUG: ATI Stream v2.2 crash if event list not null.
                commands.Execute(kernel, null, new long[] { count }, null, events);
                //commands.Execute(kernel, null, new long[] { count }, null, null);

                arrC = new float[count];
                GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

                commands.Read(c, true, 0, count, arrCHandle.AddrOfPinnedObject(), events);

                arrCHandle.Free();

                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }

            EndTest(log, "Vector addition test");
        }
		public OpenCLPasswordMatcher ()
		{
			if (ComputePlatform.Platforms.Count == 0) {
				Console.WriteLine ("Cound not find any OpenCL platforms");
				Environment.Exit (1);
			}

			var platform = ComputePlatform.Platforms [0];

			logger.Info ("Found {0} computing devices:", platform.Devices.Count);

			foreach (var d in platform.Devices) {
				logger.Info ("* {0}", d.Name);
			}

			Context = new ComputeContext (ComputeDeviceTypes.All,	
				new ComputeContextPropertyList (platform), null, IntPtr.Zero);

			Device = Context.Devices [0];

			logger.Info ("Using first device.");

			// load opencl source
			StreamReader streamReader = new StreamReader (MD5_OPENCL_FILE);
			string clSource = streamReader.ReadToEnd ();
			streamReader.Close ();

			// create program with opencl source
			ComputeProgram program = new ComputeProgram (Context, clSource);

			// compile opencl source
			try {
				program.Build (null, null, null, IntPtr.Zero);
			} catch (Exception e) {
				logger.Error ("Build log: " + program.GetBuildLog(Device));
				throw e;
			}

			// load chosen kernel from program
			Kernel = program.CreateKernel ("crackMD5");
		}
        /// <summary>
        /// OpenCL関係の準備をする
        /// </summary>
        static void InitializeOpenCL(Real[] result, Real[] left, Real[] right)
        {
            // プラットフォームを取得
            var platform = ComputePlatform.Platforms[0];
            Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version);

            // コンテキストを作成
            var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // 利用可能なデバイス群を取得
            var devices = context.Devices;
            Console.WriteLine("デバイス数:{0}", devices.Count);

            // 1デバイスで使う要素数を計算
            countPerDevice = (int)Math.Ceiling((double)COUNT / devices.Count);

            // キューの配列を作成
            queues = new ComputeCommandQueue[devices.Count];

            // 利用可能なデバイスすべてに対して
            for(int i = 0; i < devices.Count; i++)
            {
                var device = devices[i];

                // キューを作成
                queues[i] = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);

                // デバイス情報を表示
                Console.WriteLine("* {0} ({1})", device.Name, device.Vendor);
            }

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.MultiGpu);

            // ビルドしてみて
            try
            {
                string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float");

                program.Build(devices,
                    string.Format(" -D REAL={0} -D REALV={0}{1} -D VLOADN=vload{1} -D VSTOREN=vstore{1} -D COUNT_PER_WORKITEM={2} -Werror", realString, VECTOR_COUNT, COUNT_PER_WORKITEM),
                    null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // ログを表示して例外を投げる
                throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex);
            }

            // カーネルを作成
            addOneElement = new ComputeKernel[devices.Count];
            for(int i = 0; i < devices.Count; i++)
            {
                addOneElement[i] = program.CreateKernel("AddOneElement");
            }

            // バッファーを作成
            bufferLeft = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, left);
            bufferRight = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, right);
            bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, result);

            buffersLeft = new ComputeSubBuffer<Real>[devices.Count];
            buffersRight = new ComputeSubBuffer<Real>[devices.Count];
            buffersResult = new ComputeSubBuffer<Real>[devices.Count];
            for(int i = 0; i < devices.Count; i++)
            {
                buffersLeft[i] = new ComputeSubBuffer<Real>(bufferLeft, ComputeMemoryFlags.ReadOnly, countPerDevice * i, countPerDevice);
                buffersRight[i] = new ComputeSubBuffer<Real>(bufferRight, ComputeMemoryFlags.ReadOnly, countPerDevice * i, countPerDevice);
                buffersResult[i] = new ComputeSubBuffer<Real>(bufferResult, ComputeMemoryFlags.WriteOnly, countPerDevice * i, countPerDevice);
            }
        }
Exemple #4
0
 private static ComputeKernel GetKernel(ComputeProgram program)
 {
     try
     {
         return program.CreateKernel("place");
     }
     catch
     {
         string log = program.GetBuildLog(program.Context.Platform.Devices[0]);
         Console.WriteLine(log);
         throw;
     }
 }
Exemple #5
0
        protected override void RunInternal()
        {
            int count = 10;
            float[] arrA = new float[count];
            float[] arrB = new float[count];
            float[] arrC = new float[count];

            Random rand = new Random();

            for (int i = 0; i < count; i++)
            {
                arrA[i] = (float)(rand.NextDouble() * 100);
                arrB[i] = (float)(rand.NextDouble() * 100);
            }

            ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
            ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
            ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

            ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });
            program.Build(null, null, null, IntPtr.Zero);

            ComputeKernel kernel = program.CreateKernel("VectorAdd");
            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            ComputeEventList events = new ComputeEventList();

            commands.Execute(kernel, null, new long[] { count }, null, events);

            arrC = new float[count];
            GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

            commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events);
            commands.Finish();

            arrCHandle.Free();

            for (int i = 0; i < count; i++)
                Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
        }
        public ICalculator GenFractalCalc(List<ProcessLayer> LayerData, FractalType fractaltype, string code, ProcessLayer deflayer)
        {
            string macros = @"
            #pragma OPENCL EXTENSION cl_amd_printf : enable

            inline float ABS(float a) {
              return a>0?a:-a;
            }

            inline float ARGC(float2 a) {
              return atan2(a.y,a.x);
            }

            inline float NORM(float2 a) {
              return a.x*a.x+a.y*a.y;
            }

            inline float ABSC(float2 a) {
              return sqrt(NORM(a));
            }

            inline float2 MULC(float2 a, float2 b) {
              return (float2)( a.x*b.x-a.y*b.y, a.y*b.x+a.x*b.y  );
            }

            inline float2 DIVC(float2 a, float2 b) {
              return (float2)( (a.x*b.x+a.y*b.y)/(b.x*b.x+b.y*b.y), (a.y*b.x-a.x*b.y)/(b.x*b.x+b.y*b.y)  );
            }

            inline float2 lnc(float2 c) {
              float r = ABSC(c);
              float a = ARGC(c);
              return (float2)(log(r),a);
            }

            inline float2 arctanc(float2 c) {
              float2 io = (float2)(0.0f,1.0f);
              float2 two = (float2)(2.0f,0.0f);
              float2 one = (float2)(1.0f,0.0f);

              return (float2)(MULC(DIVC(io,two),lnc(one - MULC(io,c))-lnc(one + MULC(io,c))));
            }

            inline float2 powc(float2 c, float p) {
              if (NORM(c)==0) {
            return (float2)(0.0f,0.0f);
              } else {
            float r = pow(ABSC(c),p);
            float a = ARGC(c)*p;
            return (float2)(r*cos(a),r*sin(a));
              }
            }

            struct ProcessLayer {
             float2 c_old2x;
             float2 c_oldx;
             float2 c_x;
             float2 c_resx;
             float c_calc;
             float c_cmean;
             float c_cvarsx;
             float c_cvariance;
             int c_active;
             int c_isin;
             int c_n;
             int c_resn;
            };

            kernel void FractalCalc (
            global  read_only float2* in_x,
            global  read_only float2* in_c,
            ";

              StringBuilder kernel = new StringBuilder(macros);

              for (int i=0; i< LayerData.Count; i++) {
            kernel.Append("    global write_only struct ProcessLayer* out_p" + i);
            kernel.Append(i+1==LayerData.Count ? "\n){" : ",\n");
              }

              bool hastriangle = false;
              bool fractdiv = true;
              SeqType modesused = 0;

              foreach (var it in LayerData)
              {
            if (it.c_checktype.HasFlag(SeqCheck.MPL_CHECK_TRIANGLE)) hastriangle = true;
            if (it.c_checktype.HasFlag(SeqCheck.MPL_CHECK_TRIANGLE_SMOOTH)) hastriangle = true;
            modesused |= it.c_seqtype;
              }

              if (modesused.HasFlag(SeqType.MPL_SEQ_STDDEV)) modesused |= SeqType.MPL_SEQ_VARIANCE;
              if (modesused.HasFlag(SeqType.MPL_SEQ_VARIANCE)) modesused |= SeqType.MPL_SEQ_VARSX;
              if (modesused.HasFlag(SeqType.MPL_SEQ_VARSX)) modesused |= SeqType.MPL_SEQ_MEAN;

              kernel.Append("float2 sumx = (float2)(0.0f,0.0f);");
              kernel.Append("float2 meanx = (float2)(0.0f,0.0f);");
              kernel.Append("float2 varsx = (float2)(0.0f,0.0f);");
              kernel.Append("float2 variacex = (float2)(0.0f,0.0f);");
              kernel.Append("float2 sdx = (float2)(0.0f,0.0f);");
              kernel.Append("float2 minx = (float2)(0.0f,0.0f);");
              kernel.Append("float2 maxx = (float2)(0.0f,0.0f);");
              kernel.Append("float2 deltax = (float2)(0.0f,0.0f);");
              kernel.Append("float2 deltac = (float2)(0.0f,0.0f);");

              kernel.Append("float delta = 0.0f;");
              kernel.Append("float newxnorm = 0.0f;");
              kernel.Append("float lowbound = 0.0f;");
              kernel.Append("float newd = 0.0f;");

              kernel.Append("int end = 0;");
              kernel.Append("int n = 0;");
              kernel.Append("float2 newx = (float2)(0.0f,0.0f);");

              kernel.Append("int index = get_global_id(0);");
              kernel.Append("float2 x = in_x[index];");
              kernel.Append("float2 c = in_c[index];");

              for (int i = 0; i < LayerData.Count; i++)
              {
            kernel.Append("struct ProcessLayer p"+i+";");
            kernel.Append("p"+i+".c_active = 1;");
            kernel.Append("p"+i+".c_isin = 0;");
            kernel.Append("p"+i+".c_x = x;");
            kernel.Append("p"+i+".c_oldx = x;");
            kernel.Append("p"+i+".c_old2x = x;");
            kernel.Append("p"+i+".c_calc = 0;");
            kernel.Append("p"+i+".c_cmean = 0;");
            kernel.Append("p"+i+".c_cvarsx = 0;");
            kernel.Append("p"+i+".c_cvariance = 0;");
              }

              kernel.Append("struct ProcessLayer* p = 0;");

              if (hastriangle)
              {
            if (fractaltype == FractalType.FRACTAL_TYPE_MANDEL)
            {
              kernel.Append("float trinorm = ABSC(c);");
              // trinorm = c.Magnitude;
            }
            else
            {
              kernel.Append("float trinorm = NORM(c);");
              // trinorm = c.Norm;
            }
              }

              kernel.Append("while (!end) {");
              // while (!end)
              kernel.Append("n++;");
              // n++;

              switch (fractaltype)
              {
            case FractalType.FRACTAL_TYPE_MANDEL:
              kernel.Append("newx = (float2)(x.x*x.x - x.y*x.y,2*x.x*x.y) + c;");
              //kernel.Append(@"printf(""%f %f - "",newx.x,newx.y);");
              //double sx = x.Real;
              //double sy = x.Imaginary;
              //return new Complex(sx * sx - sy * sy + c.Real, 2 * sx * sy + c.Imaginary);
              break;
            case FractalType.FRACTAL_TYPE_MANDEL_N:
              kernel.Append("newx = powc(x,pr) + c;");
              // return Complex.Pow(x, param) + c;
              break;
            case FractalType.FRACTAL_TYPE_BURNINGSHIP:
              kernel.Append("newx = (float2)(x.x*x.x-x.y*x.y,2*ABS(x.x*x.y)) + c;");
              //  double sx = x.Real;
              //  double sy = x.Imaginary;
              //  return new Complex(sx * sx - sy * sy + c.Real, 2 * absval(sx * sy) + c.Imaginary);
              break;
            case FractalType.FRACTAL_TYPE_BURNINGSHIP_N:
              kernel.Append("newx = powc((ABS(x.x),ABS(x.y)),pr) + c;");
              // return Complex.Pow(new Complex(absval(x.Real), absval(x.Imaginary)), n) + c;
              break;
            case FractalType.FRACTAL_TYPE_DIVERGENT:
              kernel.Append("newx = " + code + ";");
              //  newx = code.eval(x, c, n, param);
              break;
            case FractalType.FRACTAL_TYPE_CONVERGENT:
              kernel.Append("newx = " + code + ";");
              fractdiv = false;
              break;
            default:
              throw new NotSupportedException("Unknown FractalType");
              }
              if (modesused.HasFlag(SeqType.MPL_SEQ_SUM))
              {
            kernel.Append("sumx += newx;");
            //sumx+=newx;
              }
              if (modesused.HasFlag(SeqType.MPL_SEQ_MEAN))
              {
            kernel.Append("deltax = newx-meanx;");
            kernel.Append("meanx += deltax/(float)n;");
            /*Complex delta = newx-meanx;
            meanx = meanx+delta/(double)n;*/
            if (modesused.HasFlag(SeqType.MPL_SEQ_VARSX))
            {
              kernel.Append("varsx += MULC(deltax,(newx-meanx));");
              //varsx = varsx + delta*(newx-meanx);
              if (modesused.HasFlag(SeqType.MPL_SEQ_VARIANCE))
              {
            kernel.Append("if (n!=1) {");
            // if (n!=1) {
            kernel.Append("variacex = varsx / (float)((float)n-(float)1.0f);");
            //variacex = varsx/((double)n-(double)1);
            if (modesused.HasFlag(SeqType.MPL_SEQ_STDDEV))
            {
              kernel.Append("sdx = powc(variacex,0.5f);");
              //sdx = Complex.Sqrt(variacex);
            }
            kernel.Append("}");
              }
            }
              }
              if (modesused.HasFlag(SeqType.MPL_SEQ_MIN))
              {
            kernel.Append("if (n==1) minx = newx; else {");
            kernel.Append("if (NORM(newx)<NORM(minx)) { minx = newx; } }");
            //if (n==1) minx=newx; else if (Complex.Abs(newx)<Complex.Abs(minx)) minx=newx;
              }
              if (modesused.HasFlag(SeqType.MPL_SEQ_MAX))
              {
            kernel.Append("if (n==1) maxx = newx; else {");
            kernel.Append("if (NORM(newx)>NORM(maxx)) { maxx = newx; } }");
            //if (n==1) maxx=newx; else if (Complex.Abs(newx)>Complex.Abs(maxx)) maxx=newx;
              }
              if (modesused.HasFlag(SeqType.MPL_SEQ_DELTA))
              {
            kernel.Append("deltax = newx - x");
            //deltax = newx-x;
              }

              for (int i=0; i< LayerData.Count; i++)
              {
            var p = LayerData[i];
            kernel.Append("p = &p"+i+";");
            kernel.Append("if (p->c_active) {");
            //if (p.c_active) {
            kernel.Append("p->c_n = n;");
            //p.c_n = n;
            kernel.Append("p->c_old2x = p->c_oldx;");
            kernel.Append("p->c_oldx = p->c_x;");
            //p.c_old2x = p.c_oldx;
            //p.c_oldx = p.c_x;
            switch (p.c_seqtype)
            {
              case SeqType.MPL_SEQ_NORMAL: kernel.Append("p->c_x = newx;"); break; // p.c_x = newx; break;
              case SeqType.MPL_SEQ_SUM: kernel.Append("p->c_x = sumx;"); break; //  p.c_x = sumx; break;
              case SeqType.MPL_SEQ_MEAN: kernel.Append("p->c_x = meanx;"); break;// p.c_x = meanx; break;
              case SeqType.MPL_SEQ_VARSX: kernel.Append("p->c_x = varsx;"); break;
              case SeqType.MPL_SEQ_VARIANCE: kernel.Append("p->c_x = variacex;"); break; // p.c_x = variacex; break;
              case SeqType.MPL_SEQ_STDDEV: kernel.Append("p->c_x = sdx;"); break; // p.c_x = sdx; break;
              case SeqType.MPL_SEQ_MIN: kernel.Append("p->c_x = minx;"); break; //  p.c_x = minx; break;
              case SeqType.MPL_SEQ_MAX: kernel.Append("p->c_x = maxx;"); break; //  p.c_x = maxx; break;
              case SeqType.MPL_SEQ_DELTA: kernel.Append("p->c_x = deltax;"); break; //  p.c_x = deltax; break;
              default: kernel.Append("p->c_x = newx;"); break; // p.c_x = newx; break;
            }
            kernel.Append("newd = 0;");
            //double newd = 0;

            switch (p.c_checktype)
            {
              case SeqCheck.MPL_CHECK_SMOOTH:
            if (fractdiv)
            {
              kernel.Append("newd = exp(-ABSC(p->c_x));");
              //newd = Math.Exp(-Complex.Abs(p.c_x));
            }
            else
            {
              kernel.Append("newd = exp(-ABSC(p->c_x-p->c_oldx));");
              //newd = Math.Exp(-Complex.Abs(p.c_x-p.c_oldx));
            }
            break;
              case SeqCheck.MPL_CHECK_REAL:
            kernel.Append("newd = p->c_x.x;");
            //newd = p.c_x.Real;
            break;
              case SeqCheck.MPL_CHECK_IMAG:
            kernel.Append("newd = p->c_x.y;");
            //newd = p.c_x.Imaginary;
            break;
              case SeqCheck.MPL_CHECK_ARG:
            kernel.Append("newd = atan2(p->c_x.y,p->c_x.x);");
            //newd = p.c_x.Phase;
            break;
              case SeqCheck.MPL_CHECK_ABS:
            kernel.Append("newd = ABSC(p->c_x);");
            //newd = p.c_x.Magnitude;
            break;
              case SeqCheck.MPL_CHECK_CURVATURE:
            kernel.Append("if (isnotequal(p.c_oldx,p.c_old2x)) { newd = ABSC(atanc(DIVC(p->c_x-p->c_oldx,p->c_oldx-p->c_old2x))); } else newd = 0;");
            //if ((p.c_oldx!=p.c_old2x)) newd=Complex.Abs(Complex.Atan((p.c_x-p.c_oldx) / (p.c_oldx-p.c_old2x))); else newd=0; }
            break;
              case SeqCheck.MPL_CHECK_TRIANGLE:
            if (fractaltype == FractalType.FRACTAL_TYPE_MANDEL)
            {
              kernel.Append("newxnorm = NORM(p->c_oldx);");
              //double newxnorm = p.c_oldx.Norm();
              kernel.Append("lowbound = ABS(newxnorm-trinorm);");
              //double lowbound = absval(newxnorm-trinorm);
              kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x)-lowbound)/(newxnorm+trinorm-lowbound);");
              //if ((newxnorm+trinorm-lowbound)==0) newd=0; else
              //  newd = (p.c_x.Magnitude-lowbound)/(newxnorm+trinorm-lowbound);
            }
            else
            {
              kernel.Append("newxnorm = ABSC(p->c_x);");
              //double newxnorm = p.c_x.Magnitude;
              kernel.Append("lowbound = ABS(newxnorm-trinorm);");
              //double lowbound = absval(newxnorm-trinorm);
              kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x-c)-lowbound)/(newxnorm+trinorm-lowbound);");
              //if ((newxnorm+trinorm-lowbound)==0) newd=0; else
              //  newd = ((Complex.Abs(p.c_x-c)-lowbound)/(newxnorm+trinorm-lowbound));
            }
            break;
              case SeqCheck.MPL_CHECK_TRIANGLE_SMOOTH:
            if (fractaltype == FractalType.FRACTAL_TYPE_MANDEL)
            {
              kernel.Append("newxnorm = NORM(p->c_oldx);");
              //double newxnorm = p.c_oldx.Norm();
              kernel.Append("lowbound = ABS(newxnorm-trinorm);");
              //double lowbound = absval(newxnorm-trinorm);
              kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x)-lowbound)/(newxnorm+trinorm-lowbound);");
              //if ((newxnorm+trinorm-lowbound)==0) newd=0; else
              //  newd = (p.c_x.Magnitude-lowbound)/(newxnorm+trinorm-lowbound);
            }
            else
            {
              kernel.Append("newxnorm = ABSC(p->c_x);");
              //double newxnorm = p.c_x.Magnitude;
              kernel.Append("lowbound = ABS(newxnorm-trinorm);");
              //double lowbound = absval(newxnorm-trinorm);
              kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x-c)-lowbound)/(newxnorm+trinorm-lowbound);");
              //if ((newxnorm+trinorm-lowbound)==0) newd=0; else
              //  newd = ((Complex.Abs(p.c_x-c)-lowbound)/(newxnorm+trinorm-lowbound));
            }
            break;
              case SeqCheck.MPL_CHECK_ORBIT_TRAP:
            switch (p.c_orbittraptype)
            {
              case OrbitTrap.MPL_ORBIT_TRAP_POINT:
                kernel.Append("newd = ABSC(p->c_x - p->c_pointA);");
                //newd = Complex.Abs(p.c_x - p.c_pointA);
                break;
              case OrbitTrap.MPL_ORBIT_TRAP_LINE:
                if ((p.c_pointA.Real) == 1)
                {
                  kernel.Append("newd = ABS(p->c_x.x);");
                  //newd = Math.Abs(p.c_x.Real);
                }
                else
                {
                  kernel.Append("newd = ABS(p->c_x.y);");
                  //newd = Math.Abs(p.c_x.Imaginary);
                }
                break;
              case OrbitTrap.MPL_ORBIT_TRAP_GAUSS:
                {
                  kernel.Append("newd = ABSC((round(p->c_x.x),round(p->c_x.y)) - p->c_x);");
                  //Complex gauss = new Complex(Math.Round(p.c_x.Real),Math.Round(p.c_x.Imaginary));
                  //newd = Complex.Abs(gauss - p.c_x);
                }
                break;
            }
            break;
            }
            switch (p.c_checkseqtype)
            {
              case SeqType.MPL_SEQ_NORMAL: kernel.Append("p->c_calc = newd;"); break;
              case SeqType.MPL_SEQ_SUM: kernel.Append("p->c_calc += newd;"); break; // p.c_calc += newd; break;
              case SeqType.MPL_SEQ_MEAN: kernel.Append("p->c_calc += newd;"); break; // p.c_calc += newd; break;
              case SeqType.MPL_SEQ_VARSX:
            {
              kernel.Append("delta = newd - p->c_cmean;");
              //double delta = newd - p.c_cmean;
              kernel.Append("p->c_cmean = p->c_cmean + delta / p->c_n;");
              //p.c_cmean = p.c_cmean+delta/p.c_n;
              kernel.Append("p->c_calc += delta * (newd - p->c_cmean);");
              //p.c_calc += delta*(newd-p.c_cmean);
            }
            break;
              case SeqType.MPL_SEQ_VARIANCE:
            {
              kernel.Append("delta = newd - p->c_cmean;");
              //double delta = newd - p.c_cmean;
              kernel.Append("p->c_cmean = p->c_cmean + delta / p->c_n;");
              //p.c_cmean = p.c_cmean+delta/p.c_n;
              kernel.Append("p->c_cvarsx += delta * (newd - p->c_cmean);");
              //p.c_cvarsx = p.c_cvarsx + delta*(newd-p.c_cmean);
              kernel.Append("if (p->c_n!=1) { p->c_calc = p->c_cvarsx/(p->c_n-1.0f); }");
              /*if (p.c_n!=1) {
                p.c_calc = p.c_cvarsx/(p.c_n-1.0);
              }*/
            }
            break;
              case SeqType.MPL_SEQ_STDDEV:
            {
              kernel.Append("delta = newd - p->c_cmean;");
              //double delta = newd - p.c_cmean;
              kernel.Append("p->c_cmean = p->c_cmean + delta / p->c_n;");
              //p.c_cmean = p.c_cmean+delta/p.c_n;
              kernel.Append("p->c_cvarsx += delta * (newd - p->c_cmean);");
              //p.c_cvarsx = p.c_cvarsx + delta*(newd-p.c_cmean);
              kernel.Append("if (p->c_n!=1) { p->c_cvariance = p->c_cvarsx/((float)p->c_n-1.0f);");
              /*if (p.c_n!=1) {
                p.c_cvariance = p.c_cvarsx/(p.c_n-1.0);
              }*/
              kernel.Append("p->c_calc = sqrt(p->c_cvariance);");
              //p.c_calc = Math.Sqrt(p.c_cvariance);
              kernel.Append("}");
            }
            break;
              case SeqType.MPL_SEQ_MIN:
            kernel.Append("if (p->c_n==1) p->c_calc = newd; else if (p->c_calc>newd) { p->c_calc = newd; p->c_resx = p->c_x; p->c_resn = p->c_n; };");
            //if (p.c_n==1) p.c_calc=newd; else if (p.c_calc>newd) { p.c_calc = newd; p.c_resx = p.c_x; p.c_resn = p.c_n; }
            break;
              case SeqType.MPL_SEQ_MAX:
            kernel.Append("if (p->c_n==1) p->c_calc = newd; else if (p->c_calc<newd) { p->c_calc = newd; p->c_resx = p->c_x; p->c_resn = p->c_n; };");
            // if (p.c_n==1) p.c_calc=newd; else if (p.c_calc<newd) { p.c_calc = newd; p.c_resx = p.c_x; p.c_resn = p.c_n; }
            break;
              case SeqType.MPL_SEQ_DELTA:
            kernel.Append("p->c_calc = newd-p->c_calc;");
            //p.c_calc = newd-p.c_calc;
            break;
              default:
            kernel.Append("p->c_calc = newd;");
            //p.c_calc = newd;
            break;
            }

            if (p.c_convchktype == ConvCheck.MPL_CONVCHK_REAL)
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture,"if (p->c_x.x*p->c_x.x " + (fractdiv ? ">" : "<") + " {0:E}f) p->c_active = 0;", p.c_bailout);
              /*double ddd = p.c_x.Real*p.c_x.Real;
              if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false;
              if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/
            }
            else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_IMAG)
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture, "if (p->c_x.y*p->c_x.y " + (fractdiv ? ">" : "<") + " {0:E}f) p->c_active = 0;", p.c_bailout);
              /*double ddd = p.c_x.Imaginary*p.c_x.Imaginary;
              if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false;
              if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/
            }
            else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_OR)
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture, "if ((p->c_x.y*p->c_x.y " + (fractdiv ? ">" : "<") + " {0:E}f) || (p->c_x.x*p->c_x.x " + (fractdiv ? ">" : "<") + " {0:E}f)) p->c_active = 0;", p.c_bailout);
              /*if ((fractdiv) && ((p.c_x.Real*p.c_x.Real>p.c_bailout) || (p.c_x.Imaginary*p.c_x.Imaginary>p.c_bailout))) p.c_active = false;
              if (!(fractdiv) && ((p.c_x.Real*p.c_x.Real<p.c_bailout) || (p.c_x.Imaginary*p.c_x.Imaginary<p.c_bailout))) p.c_active = false;*/
            }
            else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_AND)
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture, "if ((p->c_x.y*p->c_x.y " + (fractdiv ? ">" : "<") + " {0:E}f) && (p->c_x.x*p->c_x.x " + (fractdiv ? ">" : "<") + " {0:E}f)) p->c_active = 0;", p.c_bailout);
              /*if ((fractdiv) && ((p.c_x.Real*p.c_x.Real>p.c_bailout) && (p.c_x.Imaginary*p.c_x.Imaginary>p.c_bailout))) p.c_active = false;
              if (!(fractdiv) && ((p.c_x.Real*p.c_x.Real<p.c_bailout) && (p.c_x.Imaginary*p.c_x.Imaginary<p.c_bailout))) p.c_active = false;*/
            }
            else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_MANH)
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture, "if ( ((ABS(p->c_x.y)+ABS(p->c_x.x))*((ABS(p->c_x.y)+ABS(p->c_x.x))) " + (fractdiv ? ">" : "<") + " {0:G}f)) p->c_active = 0;", p.c_bailout);
              /*double ddd = (absval(p.c_x.Imaginary)+absval(p.c_x.Real))*(absval(p.c_x.Imaginary)+absval(p.c_x.Real));
               if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false;
              if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/
            }
            else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_MANR)
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture, "if ( ((p->c_x.y+p->c_x.x)*(p->c_x.y+p->c_x.x)) " + (fractdiv ? ">" : "<") + " {0:E}f)) p->c_active = 0;", p.c_bailout);
              /*double ddd = (p.c_x.Real+p.c_x.Imaginary)*(p.c_x.Real+p.c_x.Imaginary);
               if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false;
              if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false; */
            }
            else
            {
              kernel.AppendFormat(CultureInfo.InvariantCulture, "if (NORM(p->c_x) " + (fractdiv ? ">" : "<") + " {0:E}f) p->c_active = 0;", p.c_bailout);
              /*double ddd = p.c_x.Norm();
               if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false;
              if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/
            }
            kernel.AppendFormat(CultureInfo.InvariantCulture, "if (p->c_n>{0}) {{ p->c_active = 0; p->c_isin = 1; }}", p.c_nlimit);
            //if (p.c_n>p.c_nlimit) { p.c_active = false; p.c_isin = true; }
            if (p.c_checktype == SeqCheck.MPL_CHECK_TRIANGLE_SMOOTH)
            {
              throw new NotImplementedException("Smooth triangle algorithm is unavailable in this CalculatorFactory");
              /*if (p.c_active == false)
            if (!p.c_isin) {
              p.c_oldx = p.c_x;
              p.c_x = Fractal_Mandel(p.c_x,c);
              p.c_n++;
              double newxnorm = p.c_oldx.Norm();
              double lowbound = absval(newxnorm-trinorm);
              if ((newxnorm+trinorm-lowbound)==0) newd=0; else
                newd = (p.c_x.Magnitude-lowbound)/(newxnorm+trinorm-lowbound);
              p.c_calc += newd;
              double oldsum = p.c_calc/(p.c_n+1);
              double il2=1/Math.Log(2);
              double lp=Math.Log(Math.Log(p.c_bailout));
              double f=il2*lp-il2*Math.Log(Math.Log(Complex.Abs(p.c_x)))+2;
              double az2 = p.c_x.Norm();
              p.c_oldx = p.c_x;
              p.c_x = Fractal_Mandel(p.c_oldx,c);
              lowbound = absval(az2-trinorm);
              if ((az2+trinorm-lowbound)!=0) p.c_calc+=(Complex.Abs(p.c_x)-lowbound)/(az2+trinorm-lowbound);
              p.c_n++;
              p.c_calc = p.c_calc/(p.c_n+1);
              p.c_calc = oldsum+(p.c_calc-oldsum)*(f-1);
            } else {
              p.c_calc /= p.c_n+1;
            }*/
            }
            else if (p.c_checkseqtype == SeqType.MPL_SEQ_MEAN)
            {
              kernel.Append("if (p->c_active == 0) p->c_calc /= (float)p->c_n+1.0f;");
              //if (p.c_active == false) p.c_calc /= p.c_n+1;
            }
            if (p == deflayer)
            {
              kernel.Append("if (p->c_active == 0) end = 1;");
              /*if (!deflayer.c_active) end = true; */
            }
            kernel.Append("}");

              }
              kernel.Append("x = newx; }");
              for (int i = 0; i < LayerData.Count; i++)
              {
            kernel.Append("out_p"+i+"[index] = p"+i+";");
            //kernel.Append("out_p" + i + "[index].c_calc = 52.0f;");
              }
              kernel.Append("}");

              //System.Console.WriteLine(kernel.Replace(";", ";\n").Replace("}","}\n"));
              //kernel.Clear();
              //kernel.Append(@"kernel void VectorAdd(global  read_only float* a,global  read_only float* b,global write_only float* c ){int index = get_global_id(0);c[index] = a[index] + b[index];}");

              ComputeProgram prg = new ComputeProgram(_context, kernel.Replace(";", ";\n").Replace("}","}\n").ToString());
              try
              {
            prg.Build(null, null, null, IntPtr.Zero);
              }
              catch (ComputeException e)
              {
            throw new Exception("Error while building: " + prg.GetBuildLog(_context.Devices[0]), e);
              }
              ComputeKernel krnl = prg.CreateKernel("FractalCalc");

              return new OpenCLCalculator(_context,prg,krnl);
        }
        /// <summary>
        /// OpenCL関係の準備をする
        /// </summary>
        static void InitializeOpenCL(Real[] left, Real[] right)
        {
            // プラットフォームを取得
            var platform = ComputePlatform.Platforms[0];
            Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version);

            // コンテキストを作成
            var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // 利用可能なデバイス群を取得
            var devices = context.Devices;
            Console.WriteLine("デバイス数:{0}", devices.Count);

            // 各デバイスで計算する要素数を初期化
            countPerDevice = new int[devices.Count];

            // 1デバイスが計算する最大要素数を計算
            int maxCountPerDevice = (int)Math.Ceiling((double)COUNT / devices.Count);

            // 全デバイスの
            for(int i = 0; i < devices.Count; i++)
            {
                // 計算する要素数を計算
                countPerDevice[i] = maxCountPerDevice - ((i < maxCountPerDevice * devices.Count - COUNT) ? 1 : 0);
            }

            // デバイス内での結果を作成
            resultsPerDevice = new Real[devices.Count];

            // ワークグループ内ワークアイテム数
            localSize = (int)devices[0].MaxWorkItemSizes[0];

            // キューの配列を作成
            queues = new ComputeCommandQueue[devices.Count];

            // 利用可能なデバイスすべてに対して
            for(int i = 0; i < devices.Count; i++)
            {
                // デバイスを取得
                var device = devices[i];

                // キューを作成
                queues[i] = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);

                // デバイス情報を表示
                Console.WriteLine("* {0} ({1})", device.Name, device.Vendor);
            }

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.VectorDot);

            // ビルドしてみて
            try
            {
                string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float");

                program.Build(devices,
                    string.Format(" -D REAL={0} -Werror", realString),
                    null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // ログを表示して例外を投げる
                throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex);
            }

            // カーネルを作成
            multyplyEachElement = new ComputeKernel[devices.Count];
            reductionSum = new ComputeKernel[REDUCTION_VERSION + 1, devices.Count];
            for(int i = 0; i < devices.Count; i++)
            {
                multyplyEachElement[i] = program.CreateKernel("MultyplyEachElement");

                reductionSum[0, i] = program.CreateKernel("ReductionSum0");
                reductionSum[1, i] = program.CreateKernel("ReductionSum1");
                reductionSum[2, i] = program.CreateKernel("ReductionSum2");
                reductionSum[3, i] = program.CreateKernel("ReductionSum3");
                reductionSum[4, i] = program.CreateKernel("ReductionSum4");
            }

            // 単一GPU用バッファーを作成
            bufferLeft = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, left);
            bufferRight = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, right);
            bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadWrite, left.Length);

            // 複数GPU用バッファーを作成
            buffersLeft = new ComputeBuffer<Real>[devices.Count];
            buffersRight = new ComputeBuffer<Real>[devices.Count];
            buffersResult = new ComputeBuffer<Real>[devices.Count];
            for(int i = 0; i < devices.Count; i++)
            {
                buffersLeft[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]);
                buffersRight[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]);
                buffersResult[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly, countPerDevice[i]);
            }
        }
Exemple #8
0
        private void CalculateConvolution(ComputeContext computeContext)
        {
            Stopwatch stopwatch = new Stopwatch();
            stopwatch.Start();

            float dx;
            bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx);
            if (!shiftXParse)
                throw new SyntaxErrorException(", needs to be .");

            float dy;
            bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy);
            if (!shiftYParse)
                throw new SyntaxErrorException(", needs to be  .");

            float dz;
            bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz);
            if (!shiftZParse)
                throw new SyntaxErrorException(", needs to be  .");

            int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ;

            Console.WriteLine("Computing...");
            Console.WriteLine("Reading kernel...");

            String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName;

            String kernelString;
            using (var sr = new StreamReader(kernelPath + "\\convolution.cl"))
                kernelString = sr.ReadToEnd();

            Console.WriteLine("Reading kernel... done");

            float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz);

            //create openCL program
            ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString);

            computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero);

            ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice);
            Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus);

            String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice);
            Console.WriteLine("buildLog");
            if (buildLog.Equals("\n"))
                Console.WriteLine("\tbuildLog is empty...");
            else
                Console.WriteLine("\t" + buildLog);

            float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename);

            /////////////////////////////////////////////
            // Create a Command Queue & Event List
            /////////////////////////////////////////////
            ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None);

            ////////////////////////////////////////////////////////////////
            // Create Buffers Transform
            ////////////////////////////////////////////////////////////////
            ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength);

            ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            ///////////////////////////////////////////////////////////
            ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores");

            /////////////////////////////////////////////
            // Set the transformFluorophoresKernel arguments
            /////////////////////////////////////////////
            transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords);
            transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformFluorophoresKernel = null;
            long[] globalWorkSizeTransformFluorophoresKernel = new long[]   { fluorophores.Length / 4 };
            long[] localWorkSizeTransformFluorophoresKernel = null;

            ////////////////////////////////////////////////////////
            // Enqueue the transformFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null);
            computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null);

            computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null);
            //            computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents);

            float[] transformedFluorophores = new float[fluorophores.Length];

            computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null);

            computeCommandQueue.Finish();

            //TODO remove, only for testing
            //            for (int i = 0; i < transformedFluorophores.Length; i++)
            //            {
            //                Console.WriteLine(transformedFluorophores[i]);
            //            }
            // /TODO remove, only for testing

            stopwatch.Stop();
            Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed);
            stopwatch.Reset();
            stopwatch.Start();
            // fluorophoresCoords are now transformed (done in place)

            ////////////////////////////////////////////////////////////////
            // Create Buffers Convolve Fluorophores
            ////////////////////////////////////////////////////////////////

            const int convolve_kernel_lwgs = 16;
            int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs;

            ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            /////////////////////////////////////////////
            ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores");

            /////////////////////////////////////////////
            // Set the convolveFluorophoresKernel arguments
            /////////////////////////////////////////////

            convolveFluorophoresKernel.SetMemoryArgument(0, resultImage);
            convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX);
            convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY);
            convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords);
            convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs);
            convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null;
            long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount };
            long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs};

            ////////////////////////////////////////////////////////
            // Enqueue the convolveFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null);

            float[] resultImageData = new float[totalBuffer];
            computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null);

            computeCommandQueue.Finish();

            for (int i = 0; i < pixelCount; i++)
            {
                Console.WriteLine(resultImageData[i]);
            }

            Console.WriteLine("Writing data to file...");
            //            CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData);
            TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY);

            Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY);

            float max = resultImageData.Max();

            float scale = 255/(float)max;

            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }

            ushort[] ushortdata = new ushort[resultImageData.Length];

            for (int i = 0; i < resultImageData.Length; i++)
            {
                ushortdata[i] = (ushort)resultImageData[i];
            }

            uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16);

            byte[] bytes = new byte[convertGray16ToRgb.Length * 4];
            //
            //            int[] resultImageData2 = new int[resultImageData.Length];
            //
            for (int index = 0; index < convertGray16ToRgb.Length; index++)
            {
            //                resultImageData2[index] = (int)(scale*resultImageData[index]);

                byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]);
                bytes[index] = bytes1[0];
                bytes[4 * index + 1] = bytes1[1];
                bytes[4 * index + 2] = bytes1[2];
                bytes[4 * index + 3] = bytes1[3];
            }
            //
            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData2[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }
            //            bitmap.Save("c:\\temp.bmp");

            using (MemoryStream ms = new MemoryStream(bytes))
            {
                Image image = Bitmap.FromStream(ms);
                image.Save("c:\\temp.bmp");
            }

            Console.WriteLine("Writing data to file... done");

            stopwatch.Stop();
            Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed);
            Console.WriteLine("Computing... done");
        }
        // initialize renderer: takes in command line parameters passed by template code
        public void Init(int rt, bool gpu, int platformIdx)
        {
            // pass command line parameters
            runningTime = rt;
            useGPU = gpu;
            gpuPlatform = platformIdx;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera(screen.width, screen.height);
            rngQueue = new ConcurrentQueue<Random>();

            xtiles = (int)Math.Ceiling((float)screen.width / TILESIZE);
            ytiles = (int)Math.Ceiling((float)screen.height / TILESIZE);

#if DEBUG
            RTTools.factorials[0] = Vector<float>.One;
            for (int i = 1; i < RTTools.TERMS * 2; i++)
                RTTools.factorials[i] = RTTools.factorials[i - 1] * i;
            //for (int i = 0; i < RTTools.TERMS; i++)
            //    RTTools.atanStuff[i] = (new Vector<float>((float)Math.Pow(2, 2 * i)) * (RTTools.factorials[i] * RTTools.factorials[i])) / RTTools.factorials[2 * i + 1];
#endif

#region OpenCL related things

            randNums = new float[screen.width * screen.height + 25];

            var streamReader = new StreamReader("../../assets/GPUCode.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();

            platform = ComputePlatform.Platforms[gpuPlatform];
            context = new ComputeContext(ComputeDeviceTypes.Gpu,
                new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            program = new ComputeProgram(context, clSource);
            try
            {
                program.Build(null, null, null, IntPtr.Zero);
                kernel = program.CreateKernel("Test");
            }
            catch
            {
                Console.Write("error in kernel code:\n");
                Console.Write(program.GetBuildLog(context.Devices[0]) + "\n");
                Debugger.Break();
            }

            eventList = new ComputeEventList();
            commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
#endregion
        }
Exemple #10
0
        // initialize renderer: takes in command line parameters passed by template code
        public void Init(int rt, bool gpu, int platformIdx)
        {
            // pass command line parameters
            runningTime = rt;
            useGPU = gpu;
            gpuPlatform = platformIdx;
            //Determine tile width and height
            tileCount = GreatestDiv(screen.width, screen.height);
            tileWidth = screen.width/tileCount;
            tileHeight = screen.height/tileCount;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera(screen.width, screen.height);

            //Init OpenCL
            ComputePlatform platform = ComputePlatform.Platforms[gpuPlatform];
            context = new ComputeContext(
                ComputeDeviceTypes.Gpu,
                new ComputeContextPropertyList(platform),
                null,
                IntPtr.Zero
                );
            var streamReader = new StreamReader("../../program.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();

            ComputeProgram program = new ComputeProgram(context, clSource);

            //try to compile
            try
            {
                program.Build(null, null, null, IntPtr.Zero);
            }
            catch
            {
                Console.Write("error in kernel code:\n");
                Console.Write(program.GetBuildLog(context.Devices[0]) + "\n");
            }
            kernel = program.CreateKernel("device_function");

            //setup RNG
            rngSeed = new int[screen.width * screen.height];
            Random r = RTTools.GetRNG();
            for (int i = 0; i < rngSeed.Length; i++)
                rngSeed[i] = r.Next();

            //import buffers etc to GPU
            Vector3[] data = new Vector3[screen.width * screen.height];
            Vector3[] sphereOrigins = Scene.GetOrigins;
            float[] sphereRadii = Scene.GetRadii;

            var FlagRW = ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer;
            var FlagR = ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer;

            rngBuffer = new ComputeBuffer<int>(context, FlagRW, rngSeed);
            screenPixels = new ComputeBuffer<int>(context, FlagRW, screen.pixels);
            skyBox = new ComputeBuffer<float>(context, FlagR, scene.skybox);
            originBuffer = new ComputeBuffer<Vector3>(context, FlagR, sphereOrigins);
            radiusBuffer = new ComputeBuffer<float>(context, FlagR, sphereRadii);
            accBuffer = new ComputeBuffer<Vector3>(context, FlagRW, accumulator);

            kernel.SetValueArgument(0, camera.p1);
            kernel.SetValueArgument(1, camera.p2);
            kernel.SetValueArgument(2, camera.p3);
            kernel.SetValueArgument(3, camera.up);
            kernel.SetValueArgument(4, camera.right);
            kernel.SetValueArgument(5, camera.pos);
            kernel.SetValueArgument(6, camera.lensSize);
            kernel.SetValueArgument(7, (float)screen.width);
            kernel.SetValueArgument(8, (float)screen.height);
            kernel.SetMemoryArgument(9, rngBuffer);
            kernel.SetMemoryArgument(10, screenPixels);
            kernel.SetMemoryArgument(11, skyBox);
            kernel.SetMemoryArgument(12, originBuffer);
            kernel.SetMemoryArgument(13, radiusBuffer);
            kernel.SetMemoryArgument(14, accBuffer);

            queue = new ComputeCommandQueue(context, context.Devices[0], 0);

            long[] tempWorkSize = { screen.width * screen.height };             //For some reason, doing this directly produces a build error.
            workSize = tempWorkSize;                                            //Luckily, this works.
        }
        /// <summary>
        /// OpenCLでの共役勾配法を生成する
        /// </summary>
        /// <param name="count">要素数</param>
        /// <param name="maxNonZeroCount"></param>
        /// <param name="_minIteration"></param>
        /// <param name="_maxIteration"></param>
        /// <param name="_allowableResidual"></param>
        public ConjugateGradientParallelGpu(int count, int maxNonZeroCount, int _minIteration, int _maxIteration, double allowableResidual)
            : base(count, maxNonZeroCount, _minIteration, _maxIteration, allowableResidual)
        {
            // プラットフォームを取得
            var platform = ComputePlatform.Platforms[0];

            // コンテキストを作成
            var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // 利用可能なデバイス群を取得
            var devices = context.Devices;

            // 内積の計算の場合は、回せる最大の数
            this.localWorkSize = (int)devices[0].MaxWorkGroupSize;

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.Mgcg);

            // ビルドしてみて
            try
            {
                string realString = "double";

                program.Build(devices,
                    string.Format(" -D REAL={0} -D MAX_NONZERO_COUNT={1}", realString, this.A.MaxNonzeroCountPerRow),
                    null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // 例外を投げる
                throw new ApplicationException(program.GetBuildLog(devices[0]), ex);
            }

            // 各デバイスで計算する要素数を初期化
            countPerDevice = new int[devices.Count];

            // 1デバイスが計算する最大要素数を計算
            int maxCountPerDevice = (int)Math.Ceiling((double)this.Count / devices.Count);

            // デバイスの計算開始番号を作成
            offset = new int[devices.Count];

            // キュー配列を作成
            queues = new ComputeCommandQueue[devices.Count];

            // カーネル配列を作成
            addVectorVector = new ComputeKernel[devices.Count];
            multiplyVectorVector = new ComputeKernel[devices.Count];
            reductionSum = new ComputeKernel[devices.Count];
            reductionMax = new ComputeKernel[devices.Count];
            matrix_x_Vector = new ComputeKernel[devices.Count];

            // バッファー配列を作成
            buffersA = new ComputeBuffer<double>[devices.Count];
            buffersColumnIndeces = new ComputeBuffer<int>[devices.Count];
            buffersNonzeroCounts = new ComputeBuffer<int>[devices.Count];
            buffersB = new ComputeBuffer<double>[devices.Count];
            buffersX = new ComputeBuffer<double>[devices.Count];
            buffersAp = new ComputeBuffer<double>[devices.Count];
            buffersP = new ComputeBuffer<double>[devices.Count];
            buffersR = new ComputeBuffer<double>[devices.Count];
            buffersForDot = new ComputeBuffer<double>[devices.Count];
            buffersForMax = new ComputeBuffer<double>[devices.Count];
            bufferAllVector = new ComputeBuffer<double>[devices.Count];
            answerForReduction = new double[devices.Count];
            allVector = new double[this.Count];

            // 全デバイスについて
            for(int i = 0; i < devices.Count; i++)
            {
                // 計算する要素数を計算
                countPerDevice[i] = maxCountPerDevice - ((i < maxCountPerDevice * devices.Count - this.Count) ? 1 : 0);

                // 計算開始番号を設定
                offset[i] = (i == 0) ? 0 : (offset[i - 1] + countPerDevice[i - 1]);

                // キューを作成
                queues[i] = new ComputeCommandQueue(context, devices[i], ComputeCommandQueueFlags.None);

                // カーネルを作成
                addVectorVector[i] = program.CreateKernel("AddVectorVector");
                multiplyVectorVector[i] = program.CreateKernel("MultiplyVectorVector");
                reductionSum[i] = program.CreateKernel("ReductionSum");
                reductionMax[i] = program.CreateKernel("ReductionMaxAbsolute");
                matrix_x_Vector[i] = program.CreateKernel("Matrix_x_Vector");

                // 行列のバッファーを作成
                buffersA[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * this.A.MaxNonzeroCountPerRow);
                buffersColumnIndeces[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * this.A.MaxNonzeroCountPerRow);
                buffersNonzeroCounts[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]);

                // 右辺ベクトル、未知数、探索方向、残差、行列と探索方向の積のバッファーを作成
                buffersB[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]);
                buffersX[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]);
                buffersAp[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]);
                buffersP[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]);
                buffersR[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]);

                // 計算に使用するバッファーの作成
                buffersForDot[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]);
                buffersForMax[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]);
                bufferAllVector[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count);
            }
        }
        /// <summary>
        /// OpenCL関係の準備をする
        /// </summary>
        static void InitializeOpenCL(Real[] matrix, Real[] vector, int[] nonzeroCount, int[] columnIndeces)
        {
            // プラットフォームを取得
            var platform = ComputePlatform.Platforms[0];
            Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version);

            // コンテキストを作成
            var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // 利用可能なデバイス群を取得
            var devices = context.Devices;
            Console.WriteLine("デバイス数:{0}", devices.Count);

            // 各デバイスで計算する要素数を初期化
            countPerDevice = new int[devices.Count];

            // 1デバイスが計算する最大要素数を計算
            int maxCountPerDevice = (int)Math.Ceiling((double)ROW_COUNT / devices.Count);

            // デバイスの計算開始番号とローカルアイテム数を作成
            offset = new int[devices.Count];
            localSize = new int[devices.Count];

            // 全デバイスの
            for(int i = 0; i < devices.Count; i++)
            {
                // 計算する要素数を計算
                countPerDevice[i] = maxCountPerDevice - ((i < maxCountPerDevice * devices.Count - ROW_COUNT) ? 1 : 0);

                // 計算開始番号を設定
                offset[i] = (i == 0) ? 0 : (offset[i - 1] + countPerDevice[i - 1]);

                // ローカルアイテム数を取得
                localSize[i] = 8;// (int)devices[i].MaxWorkGroupSize;
            }

            // キューの配列を作成
            queues = new ComputeCommandQueue[devices.Count];

            // 利用可能なデバイスすべてに対して
            for(int i = 0; i < devices.Count; i++)
            {

                // デバイスを取得
                var device = devices[i];

                // キューを作成
                queues[i] = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);

                // デバイス情報を表示
                Console.WriteLine("* {0} ({1})", device.Name, device.Vendor);
            }

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.Matrix_x_Vector);

            // ビルドしてみて
            try
            {
                string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float");

                program.Build(devices,
                    string.Format(" -D REAL={0} -D MAX_NONZERO_COUNT={1} -Werror", realString, MAX_NONZERO_COUNT),
                    null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // ログを表示して例外を投げる
                throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex);
            }

            // カーネルを作成
            matrix_x_Vector = new ComputeKernel[KERNEL_COUNT, devices.Count];
            for(int i = 0; i < KERNEL_COUNT; i++)
            {
                for(int j = 0; j < devices.Count; j++)
                {
                    matrix_x_Vector[i, j] = program.CreateKernel("Matrix_x_Vector" + i);
                }
            }

            // 単一GPU用バッファーを作成
            bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadWrite, vector.Length);
            bufferMatrix = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, matrix.Length);
            bufferVector = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, vector.Length);
            bufferColumnIndeces = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, columnIndeces.Length);
            bufferNonzeroCount = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, nonzeroCount.Length);

            // 複数GPU用バッファーを作成
            buffersResult = new ComputeBuffer<Real>[devices.Count];
            buffersMatrix = new ComputeBuffer<Real>[devices.Count];
            buffersVector = new ComputeBuffer<Real>[devices.Count];
            buffersColumnIndeces = new ComputeBuffer<int>[devices.Count];
            buffersNonzeroCount = new ComputeBuffer<int>[devices.Count];
            for(int i = 0; i < devices.Count; i++)
            {
                buffersResult[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly, countPerDevice[i]);
                buffersMatrix[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * MAX_NONZERO_COUNT);
                buffersVector[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, ROW_COUNT);
                buffersColumnIndeces[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * MAX_NONZERO_COUNT);
                buffersNonzeroCount[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]);
            }
        }
        /// <summary>
        /// OpenCLでの計算プログラムを作成する
        /// </summary>
        /// <param name="maxDt">初期時間刻み</param>
        /// <param name="a">振幅</param>
        /// <param name="omega">角速度</param>
        public ComputerCL(double maxDt, double a, double omega)
            : base(maxDt, a, omega)
        {
            // プラットフォームとデバイス群を取得
            this.Platform = ComputePlatform.Platforms[0];
            this.Devices = this.Platform.Devices;

            // コンテキストを作成
            var context = new ComputeContext(this.Devices, new ComputeContextPropertyList(this.Platform), null, IntPtr.Zero);

            // キューを作成
            this.queue = new ComputeCommandQueue(context, this.Devices[0], ComputeCommandQueueFlags.None);

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.SinAcceleration);

            // ビルドしてみて
            try
            {
                program.Build(this.Devices, null, null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // 例外を投げる
                throw new BuildCLException(program.Source[0], program.GetBuildLog(this.Devices[0]));
            }

            // カーネルを作成
            this.sinAccelerationKernel = program.CreateKernel("SinAcceleration");

            // 準備処理は何もしない
            this.prepare = () => { };

            // 粒子が追加された時に
            base.ParticleAdded += (sender, e) =>
            {
                // 準備処理の時の処理を実装
                this.prepare = () =>
                {
                    // 粒子数を設定
                    this.particleCount = this.inputParticles.Count;

                    // バッファーを作成
                    this.bufferX = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
                    this.bufferU = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
                    this.bufferA = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
                    this.bufferD = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly, this.particleCount);

                    // 入力データを確保
                    var particlesX = new Vector4[this.particleCount];
                    var particlesU = new Vector4[this.particleCount];
                    var particlesA = new Vector4[this.particleCount];
                    this.particlesD = new float[this.particleCount];
                    this.particlesMaterial = new Material[this.particleCount];
                    this.particlesType = new ParticleType[this.particleCount];

                    // 全粒子について
                    int i = 0;
                    foreach(var particle in this.inputParticles)
                    {
                        // データをコピー
                        particlesX[i] = new Vector4((Vector3)particle.X, 0);
                        particlesU[i] = new Vector4((Vector3)particle.U, 0);
                        particlesA[i] = new Vector4((Vector3)particle.A, 0);
                        this.particlesD[i] = (float)particle.D;
                        this.particlesMaterial[i] = particle.Material;
                        this.particlesType[i] = particle.Type;

                        i++;
                    }

                    // バッファーへ転送
                    this.queue.WriteToBuffer(particlesX, this.bufferX, false, null);
                    this.queue.WriteToBuffer(particlesU, this.bufferU, false, null);
                    this.queue.WriteToBuffer(particlesA, this.bufferA, false, null);
                    this.queue.WriteToBuffer(this.particlesD, this.bufferD, false, null);

                    // 入力粒子群を空にする
                    this.inputParticles.Clear();

                    // 準備処理は空
                    this.prepare = () => { };

                    // ここまで完了を待機
                    queue.Finish();
                };
            };
        }
Exemple #14
0
        public static void InitializeOpenCL()
        {
            string source = File.ReadAllText("MonteCarloSimulate.cl");

            //Choose Device
            ComputePlatform platform = ComputePlatform.Platforms[0];

            openCLDevice = platform.QueryDevices()[0];

            ComputeContextPropertyList properties =
                new ComputeContextPropertyList(platform);

            //Setup of stuff on our side
            openCLContext = new ComputeContext(ComputeDeviceTypes.All,
                properties, null, IntPtr.Zero);

            //Build the program, which gets us the kernel
            openCLProgram = new ComputeProgram(openCLContext, source);
            openCLProgram.Build(null, null, null, IntPtr.Zero);
            //can use notify as the 3rd command... if you want this to be non-blocking

            openCLKernel = openCLProgram.CreateKernel("MonteCarloSimulate");


        }
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                
                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();
                
                // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                commands.Execute(kernel, null, new long[] { count }, null, eventList);
                
                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commands.ReadFromBuffer(c, ref arrC, false, eventList);

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                // to finish has to be issued before "arrC" can be used. 
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                //eventList.Wait();

                // 2) Or simply use
                commands.Finish();

                // Print the results to a log/console.
                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);

                // cleanup commands
                commands.Dispose();

                // cleanup events
                foreach (ComputeEventBase eventBase in eventList)
                {
                    eventBase.Dispose();
                }
                eventList.Clear();

                // cleanup kernel
                kernel.Dispose();

                // cleanup program
                program.Dispose();

                // cleanup buffers
                a.Dispose();
                b.Dispose();
                c.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Exemple #16
0
        public static void Test()
        {
            string source = File.ReadAllText("MonteCarloSimulate.cl");

            //Choose Device
            ComputePlatform platform = ComputePlatform.Platforms[0];

            ComputeDevice device = platform.QueryDevices()[0];

            ComputeContextPropertyList properties =
                new ComputeContextPropertyList(platform);

            //Setup of stuff on our side
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.All,
                properties, null, IntPtr.Zero);

            //Build the program, which gets us the kernel
            ComputeProgram program = new ComputeProgram(context, source);
            program.Build(null, null, null, IntPtr.Zero);
            //can use notify as the 3rd command... if you want this to be non-blocking

            ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate");


            //Create arguments
            int sideSize = 4096;
            int[] inMatrixA = new int[sideSize * sideSize];
            int[] inMatrixB = new int[sideSize * sideSize];
            int[] outMatrixC = new int[sideSize * sideSize];
            Random random = new Random((int)DateTime.Now.Ticks);

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                    for (int x = 0; x < sideSize; x++)
                    {
                        inMatrixA[y * sideSize + x] = random.Next(3);
                        inMatrixB[y * sideSize + x] = random.Next(3);
                        outMatrixC[y * sideSize + x] = 0;
                    }


            ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixA);

            ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixB);

            ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, outMatrixC);

            long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize);


            //Sets arguments
            kernel.SetMemoryArgument(0, bufferMatrixA);
            kernel.SetMemoryArgument(1, bufferMatrixB);
            kernel.SetMemoryArgument(2, bufferMatrixC);
            kernel.SetLocalArgument(3, sideSize * 2);
            kernel.SetValueArgument<int>(4, sideSize);
            //kernel.SetLocalArgument(1, localWorkSize);            

            string offset = " ";
            for (int x = 0; x < sideSize; x++)
                offset += "  ";

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    Console.Write(offset);
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixA[y * sideSize + x] + " ");
                    Console.WriteLine();
                }




            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(context,
                context.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(kernel, null,
                new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) },
                new long[] { localWorkSize, 1 }, null);

            //globalWorkSize can be any size
            //localWorkSize product much not be greater than device.MaxComputeUnits
            //and it must not be greater than kernel.GetWorkGroupSize()
            //ESSENTIALLY, the program iterates through globalWorkSize
            //in increments of localWorkSize. Both are multidimensional,
            //but this just saves us the time of doing that
            //(1 dimension can be put to multiple if the max dimension lengths
            //are known very easily with remainder).

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null);

            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
            program.Dispose();

            Console.WriteLine();
            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixB[y * sideSize + x] + " ");
                    Console.Write(" ");
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(outMatrixC[y * sideSize + x] + " ");

                    Console.WriteLine();
                }

            int testY = random.Next(sideSize);
            int testX = random.Next(sideSize);

            int sum = 0;
            for (int q = 0; q < sideSize; q++)
                sum += inMatrixA[q * sideSize + testX] *
                    inMatrixB[testY * sideSize + q];

            Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]);

            Console.WriteLine(executionTime / 10000.0);

        }
Exemple #17
0
        static void Main(string[] args)
        {
            #region
            const string programName = "Prime Number";

            Stopwatch stopWatch = new Stopwatch();

            string clProgramSource = KernelProgram();

            Console.WriteLine("Environment OS:");
            Console.WriteLine("-----------------------------------------");
            Console.WriteLine(Environment.OSVersion);
            #endregion
            if (ComputePlatform.Platforms.Count == 0)
            {
                Console.WriteLine("No OpenCL Platforms are availble!");
            }
            else
            {
                #region 1
                // step 1 choose the first available platform
                ComputePlatform platform = ComputePlatform.Platforms[0];

                // output the basic info
                BasicInfo(platform);

                Console.WriteLine("Program: " + programName);
                Console.WriteLine("-----------------------------------------");
                #endregion
                //Cpu 10 seconds Gpu 28 seconds
                int count = 64;

                int[] output_Z = new int[count * count * count];

                int[] input_X = new int[count * count * count];

                for (int x = 0; x < count * count * count; x++)
                {
                    input_X[x] = x;
                }
                #region 2
                // step 2 create context for that platform and all devices
                ComputeContextPropertyList properties = new ComputeContextPropertyList(platform);
                ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero);

                // step 3 create and build program
                ComputeProgram program = new ComputeProgram(context, clProgramSource);
                program.Build(platform.Devices, null, null, IntPtr.Zero);
                #endregion
                // step 4 create memory objects
                ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X);
                ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length);

                // step 5 create kernel object with same kernel programe name VectorAdd
                ComputeKernel kernel = program.CreateKernel("PrimeNumber");

                // step 6 set kernel arguments
                //kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, z);

                ComputeEventList eventList = new ComputeEventList();

                //for (int j = 0; j < context.Devices.Count; j++)
                // query available devices n,...,1,0.  cpu first then gpu
                for (int j = context.Devices.Count-1; j > -1; j--)
                {
                    #region 3
                    stopWatch.Start();

                    // step 7 create command queue on that context on that device
                    ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None);

                    // step 8 run the kernel program
                    commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList);
                    //Application.DoEvents();

                    #endregion
                    // step 9 read results
                    commands.ReadFromBuffer(z, ref output_Z, false, eventList);
                    #region 4
                    commands.Finish();

                    string fileName = "C:\\primenumber\\PrimeNumberGPU.txt";
                    StreamWriter file = new StreamWriter(fileName, true);

                    FileInfo info = new FileInfo(fileName);
                    long fs = info.Length;

                    // 1 MegaByte = 1.049e+6 Byte
                    int index = 1;
                    if (fs == 1.049e+6)
                    {
                        fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt";
                        file = new System.IO.StreamWriter(fileName, true);
                        index++;
                    }
                    #endregion

                    for (uint xx = 0; xx < count * count * count; xx++)
                    {
                        if (output_Z[xx] != 0 && output_Z[xx] != 1)
                        {
                            Console.WriteLine(output_Z[xx]);
                            file.Write(output_Z[xx]);
                            file.Write("x");
                        }
                    }
                    #region 5
                    file.Close();
                    stopWatch.Stop();

                    ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started;
                    ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended;
                    double time = 10e-9 * (end - start);
                    //Console.WriteLine("Nanosecond: " + time);

                    TimeSpan ts = stopWatch.Elapsed;
                    string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds);
                    Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime);

                    Console.WriteLine("-----------------------------------------");
                    #endregion
                }
                Console.ReadLine();
            }
        }
Exemple #18
0
        /// <summary>
        /// Attempts to initialize OpenCL for the selected GPU.
        /// </summary>
        private void InitializeOpenCL()
        {
            // only initialize once
            if (clKernel != null)
                return;

            // select the device we've been instructed to use
            clDevice = ComputePlatform.Platforms
                .SelectMany(i => i.Devices)
                .SingleOrDefault(i => i.Handle.Value == Gpu.CLDeviceHandle.Value);

            // context we'll be working underneath
            clContext = new ComputeContext(new ComputeDevice[] { clDevice }, new ComputeContextPropertyList(clDevice.Platform), null, IntPtr.Zero);

            // queue to control device
            clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None);

            // buffers to store kernel output
            clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);
            clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);

            // kernel code
            string kernelCode;
            using (var rdr = new StreamReader(GetType().Assembly.GetManifestResourceStream("BitMaker.Miner.Gpu.DiabloMiner.cl")))
                kernelCode = rdr.ReadToEnd();

            clProgram = new ComputeProgram(clContext, kernelCode);

            try
            {
                // build kernel for device
                clProgram.Build(new ComputeDevice[] { clDevice }, "-D WORKSIZE=" + clDevice.MaxWorkGroupSize, null, IntPtr.Zero);
            }
            catch (ComputeException)
            {
                throw new Exception(clProgram.GetBuildLog(clDevice));
            }

            clKernel = clProgram.CreateKernel("search");
        }
Exemple #19
0
        protected virtual void buildOpenCLProgram()
        {
            if (CLSourcePaths == null)
            {
                System.Diagnostics.Trace.Write("No CL source defined.\n");
                return;
            }

            String[] sourceArray = new String[CLSourcePaths.Length];
            try
            {

                for (int i = 0; i < CLSourcePaths.Length; i++)
                {
                    StreamReader sourceReader = new StreamReader(CLSourcePaths[i]);
                    sourceArray[i] = sourceReader.ReadToEnd();
                }
            }
            catch (FileNotFoundException e)
            {
                System.Diagnostics.Trace.Write("Can't find: " + e.FileName + "\n");
                Environment.Exit(-1);
            }

            // Build and compile the OpenCL program
            _renderKernel = null;
            _renderProgram = new ComputeProgram(_commandQueue.Context, sourceArray);
            try
            {
                // build the program
                _renderProgram.Build(null, "-cl-nv-verbose", null, IntPtr.Zero);

                // create a reference a kernel function
                _renderKernel = _renderProgram.CreateKernel("render");
            }
            catch (BuildProgramFailureComputeException)
            {
                printBuildLog();

                Environment.Exit(-1);
            }
            catch (InvalidBuildOptionsComputeException)
            {
                printBuildLog();

                Environment.Exit(-1);
            }
            catch (InvalidBinaryComputeException)
            {
                printBuildLog();

                Environment.Exit(-1);
            }
        }
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 640*480; // 
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                
                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                

                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                //ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);

                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                
                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("CompareGPUCPU");
                DateTime ExecutionStartTime; //Var will hold Execution Starting Time
                DateTime ExecutionStopTime;//Var will hold Execution Stopped Time
                TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero                
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
                
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                int repeatTimes = 100;
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    kernel.SetMemoryArgument(0, a);
                    //kernel.SetMemoryArgument(1, b);
                    //kernel.SetMemoryArgument(2, c);
                    kernel.SetMemoryArgument(1, c);

                    // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                    // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                    // For this reason their use should be avoided if possible.
                    //ComputeEventList eventList = new ComputeEventList();

                    // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                  

                    // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                    // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                    //commands.Execute(kernel, null, new long[] { count }, null, eventList);
                    commands.Execute(kernel, null, new long[] { count }, null, null);

                    // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                    // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                    // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                    // eventList will contain two events after this method returns.
                    //commands.ReadFromBuffer(c, ref arrC, false, eventList);
                    commands.ReadFromBuffer(c, ref arrC, false, null);

                    // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                    // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                    // to finish has to be issued before "arrC" can be used. 
                    // This explicit synchronization can be achieved in two ways:

                    // 1) Wait for the events in the list to finish,
                    //eventList.Wait();

                    // 2) Or simply use
                    commands.Finish();
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using GPU", perTaskTime);
 
                // Do that using CPU
                /*
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    for (int i = 0; i < count; i++)
                    {
                        //arrC[i] = arrA[i] + arrB[i];
                        int j;
                        for (j = 0; j < 330 * 10; j++)
                            arrC[i] = arrA[i] + j;
                    }
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using CPU", ExecutionTime.TotalMilliseconds.ToString());
                 */
                log.WriteLine("arrA[0]:{0}, arrC[0]:{1}", arrA[0], arrC[0]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Exemple #21
0
 private static ComputeKernel Compile(ComputeContext context, string[] sourcecodes, Dictionary<string, string> defines)
 {
     var program = new ComputeProgram(context, sourcecodes);
     var device = context.Devices.Single();
     try
     {
         foreach (var define in defines.Where(define => define.Key.Any(char.IsWhiteSpace) || define.Value.Any(char.IsWhiteSpace)))
         {
             MessageBox.Show("Invalid define \"" + define.Key + "=" + define.Value + "\": define contained whitespace", "Error");
             return null;
         }
         var options = string.Join(" ", defines.Where(kvp => !string.IsNullOrEmpty(kvp.Value)).Select(kvp => "-D " + kvp.Key + "=" + kvp.Value));
         program.Build(new[] { device }, options + " " + StaticSettings.Fetch.OpenClOptions, null, IntPtr.Zero);
         var str = program.GetBuildLog(device).Trim();
         if (string.IsNullOrEmpty(str) == false)
             MessageBox.Show(str, "Build log");
         return program.CreateKernel("Main");
     }
     catch (InvalidBinaryComputeException)
     {
         MessageBox.Show(program.GetBuildLog(device), "Build error (invalid binary)");
         return null;
     }
     catch (BuildProgramFailureComputeException)
     {
         MessageBox.Show(program.GetBuildLog(device), "Build error (build program failure)");
         return null;
     }
 }
Exemple #22
0
        public void InitGPU(int platformIdx)
        {
            platform = ComputePlatform.Platforms[platformIdx];
            context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);
            StreamReader streamReader = new StreamReader("../../program.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();
            ComputeProgram program = new ComputeProgram(context, clSource);
            program.Build(null, null, null, IntPtr.Zero);

            ComputeKernel kernelInit = program.CreateKernel("init");
            ComputeKernel kernelUpdate = program.CreateKernel("update");

            var flags = ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer;

            int aantalPixels = screen.height * screen.width;

            rays = new GPURay[aantalPixels];
            pixels = new Vector3[aantalPixels];
            setGPUCameraToCamera();
            GPUCamera[] gpuCamArray = { gpuCamera };

            ComputeBuffer<GPURay> bufferRays = new ComputeBuffer<GPURay>(context, flags, rays);
            ComputeBuffer<Vector3> bufferPixels = new ComputeBuffer<Vector3>(context, flags, pixels);
            ComputeBuffer<GPUCamera> bufferCamera = new ComputeBuffer<GPUCamera>(context, flags, gpuCamArray);

            kernelUpdate.SetMemoryArgument(0, bufferRays);
            kernelUpdate.SetMemoryArgument(1, bufferPixels);
            kernelUpdate.SetMemoryArgument(2, bufferCamera);

            ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], 0);

        }
Exemple #23
0
    // Use this for initialization
    void Awake()
    {
        var platform = ComputePlatform.Platforms[0];
        _context = new ComputeContext(ComputeDeviceTypes.Cpu,
            new ComputeContextPropertyList(platform), null, System.IntPtr.Zero);
        _queue = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None);
        string clSource = System.IO.File.ReadAllText(clProgramPath);
        _program = new ComputeProgram(_context, clSource);
        try {
            _program.Build(null, null, null, System.IntPtr.Zero);
        } catch(BuildProgramFailureComputeException) {
            Debug.Log(_program.GetBuildLog(_context.Devices[0]));
            throw;
        }
        _events = new ComputeEventList();
        _updateGridKernel = _program.CreateKernel(clUpdateGridKernelName);
        _updateBoidsKernel = _program.CreateKernel(clUpdateBoidsKernelName);
        _boundaryKernel = _program.CreateKernel(clBoundaryKernelName);

        _pointCounters = new int[nGridPartitions * nGridPartitions * nGridPartitions];
        _pointIndices = new int[_pointCounters.Length * maxIndices];

        _pointCountersBuffer = new Cloo.ComputeBuffer<int>(
            _context, ComputeMemoryFlags.WriteOnly, _pointCounters.Length);
        _pointIndicesBuffer = new Cloo.ComputeBuffer<int>(
            _context, ComputeMemoryFlags.WriteOnly, _pointIndices.Length);

        _gridInfo = new GridInfo() {
            worldOrigin = gridbounds.min,
            worldSize = gridbounds.size,
            cellSize = gridbounds.size * (1f / nGridPartitions),
            nGridPartitions = nGridPartitions,
            maxIndices = maxIndices
        };

        _boundaryKernel.SetValueArgument(1, _gridInfo);

        _updateGridKernel.SetMemoryArgument(1, _pointCountersBuffer);
        _updateGridKernel.SetMemoryArgument(2, _pointIndicesBuffer);
        _updateGridKernel.SetValueArgument(3, _gridInfo);

        _updateBoidsKernel.SetMemoryArgument(2, _pointCountersBuffer);
        _updateBoidsKernel.SetMemoryArgument(3, _pointIndicesBuffer);
        _updateBoidsKernel.SetValueArgument(4, _gridInfo);
    }
Exemple #24
0
        public ComputeKernel CreateKernel(object kernelInstance)
        {
            string kernelName = kernelInstance.GetType().Name;

            if (HardwareAccelerationEnabled)
            {
                IKernel program = KernelManager.LoadKernel(kernelName);

                // Create and build the opencl program.
                var computeProgram = new ComputeProgram(_context, program.Code);
                computeProgram.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = computeProgram.CreateKernel("Run");

                int index = 0;

                foreach (string key in _intComputeBuffers.Keys)
                {
                    kernel.SetMemoryArgument(index, _intComputeBuffers[key]);

                    index++;
                }

                foreach (string key in _floatComputeBuffers.Keys)
                {
                    kernel.SetMemoryArgument(index, _floatComputeBuffers[key]);

                    index++;
                }

                return kernel;
            }

            return null;
        }
Exemple #25
0
        /// <summary>
        /// Attempts to initialize OpenCL for the selected GPU.
        /// </summary>
        internal void InitializeOpenCL()
        {
            // only initialize once
            if (clKernel != null)
                return;

            // unused memory so Cloo doesn't break with a null ptr
            var userDataPtr = Marshal.AllocCoTaskMem(512);

            try
            {
                clDevice = Gpu.CLDevice;

                // context we'll be working underneath
                clContext = new ComputeContext(
                    new[] { clDevice },
                    new ComputeContextPropertyList(clDevice.Platform),
                    (p1, p2, p3, p4) => { },
                    userDataPtr);

                // queue to control device
                clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None);

                // buffers to store kernel output
                clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);
                clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);

                // obtain the program
                clProgram = new ComputeProgram(clContext, Gpu.GetSource());

                var b = new StringBuilder();
                if (Gpu.WorkSize > 0)
                    b.Append(" -D WORKSIZE=").Append(Gpu.WorkSize);
                if (Gpu.HasBitAlign)
                    b.Append(" -D BITALIGN");
                if (Gpu.HasBfiInt)
                    b.Append(" -D BFIINT");

                try
                {
                    // build kernel for device
                    clProgram.Build(new[] { clDevice }, b.ToString(), (p1, p2) => { }, userDataPtr);
                }
                catch (ComputeException)
                {
                    throw new Exception(clProgram.GetBuildLog(clDevice));
                }

                clKernel = clProgram.CreateKernel("search");
            }
            finally
            {
                Marshal.FreeCoTaskMem(userDataPtr);
            }
        }
Exemple #26
0
        bool useGPU = true; // GPU code enabled (from commandline)

        #endregion Fields

        #region Methods

        // initialize renderer: takes in command line parameters passed by template code
        public void Init( int rt, bool gpu, int platformIdx )
        {
            // pass command line parameters
            runningTime = rt;
            useGPU = gpu;
            gpuPlatform = platformIdx;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera( screen.width, screen.height );

            // Generate randoms
            Console.Write("Generating randoms....\t");

            randoms = new float[1000];
            Random r = RTTools.GetRNG();
            for (int i = 0; i < 1000; i++)
            randoms[i] = (float)r.NextDouble();

            int variable = r.Next();

            Console.WriteLine("Done!");

            // initialize required opencl things if gpu is used
            if (useGPU)
            {
            StreamReader streamReader = new StreamReader("../../kernel.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();

            platform = ComputePlatform.Platforms[0];
            context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);
            queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            program = new ComputeProgram(context, clSource);
            try
            {
                program.Build(null, null, null, IntPtr.Zero);
                kernel = program.CreateKernel("Main");

                sceneBuffer = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.toCL());
                rndBuffer = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms);
                cameraBuffer = new ComputeBuffer<Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL());
                outputBuffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, screen.pixels);
                skydome = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.Skydome);

                kernel.SetMemoryArgument(0, outputBuffer);
                kernel.SetValueArgument(1, screen.width);
                kernel.SetValueArgument(2, screen.height);
                kernel.SetMemoryArgument(3, sceneBuffer);
                kernel.SetValueArgument(4, scene.toCL().Length);
                kernel.SetMemoryArgument(5, skydome);
                kernel.SetMemoryArgument(6, cameraBuffer);
                kernel.SetMemoryArgument(7, rndBuffer);

            }
            catch (ComputeException e) {
                Console.WriteLine("Error in kernel code: {0}", program.GetBuildLog(context.Devices[0]));
                Console.ReadLine();
                useGPU = false;
            }
            }
            else {
            return;
            }
        }
        /// <summary>
        /// OpenCLでの共役勾配法を生成する
        /// </summary>
        /// <param name="count">要素数</param>
        /// <param name="maxNonZeroCount"></param>
        /// <param name="_minIteration"></param>
        /// <param name="_maxIteration"></param>
        /// <param name="_allowableResidual"></param>
        public ConjugateGradientSingleGpu(int count, int maxNonZeroCount, int _minIteration, int _maxIteration, double allowableResidual)
            : base(count, maxNonZeroCount, _minIteration, _maxIteration, allowableResidual)
        {
            // プラットフォームを取得
            var platform = ComputePlatform.Platforms[0];

            // コンテキストを作成
            var context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // 利用可能なデバイス群を取得
            var devices = context.Devices;
            var device = devices[0];

            globalWorkSize = new long[] { this.Count };
            localWorkSize = new long[] { device.MaxWorkItemSizes[0] };

            // キューを作成
            queue = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);

            // バッファーを作成
            bufferA = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, this.A.Elements.Length);
            bufferColumnIndeces = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, this.A.ColumnIndeces.Length);
            bufferNonzeroCounts = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, this.A.NonzeroCounts.Length);

            // バッファーを作成
            bufferB = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, this.b.Length);
            bufferX = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.x.Length);
            bufferAp = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count);
            bufferP = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count);
            bufferR = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count);

            // 計算に使うバッファーを作成
            bufferVector = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count);
            answerForReduction = new double[1];

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.Mgcg);

            // ビルドしてみて
            try
            {
                string realString = "double";

                program.Build(devices,
                    string.Format(" -D REAL={0} -D MAX_NONZERO_COUNT={1}", realString, this.A.MaxNonzeroCountPerRow),
                    null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // 例外を投げる
                throw new ApplicationException(program.GetBuildLog(devices[0]), ex);
            }

            // カーネルを作成
            addVectorVector = program.CreateKernel("AddVectorVector");
            multiplyVectorVector = program.CreateKernel("MultiplyVectorVector");
            reductionSum = program.CreateKernel("ReductionSum");
            reductionMax = program.CreateKernel("ReductionMaxAbsolute");
            matrix_x_Vector = program.CreateKernel("Matrix_x_Vector");

            // バッファーは最大非ゼロ要素数の半分
            bufferSizeOfVectorOnMatrixMultiplying = this.A.MaxNonzeroCountPerRow / 2;
        }