public KernelManager(GraphicsInterop interop, InputManager input, string source) { _input = input; var localSizeSingle = (long)Math.Sqrt(interop.Device.MaxWorkGroupSize); _localSize = new[] { localSizeSingle, localSizeSingle }; //_localSize = new[] { interop.Device.MaxWorkGroupSize, 1 }; _program = new ComputeProgram(interop.Context, source); try { _program.Build(new[] { interop.Device }, "", null, IntPtr.Zero); } catch (InvalidBinaryComputeException) { Console.WriteLine(_program.GetBuildLog(interop.Device)); return; } catch (BuildProgramFailureComputeException) { Console.WriteLine(_program.GetBuildLog(interop.Device)); return; } Console.WriteLine(_program.GetBuildLog(interop.Device)); _kernels = _program.CreateAllKernels().ToArray(); }
public OpenCLPasswordMatcher () { if (ComputePlatform.Platforms.Count == 0) { Console.WriteLine ("Cound not find any OpenCL platforms"); Environment.Exit (1); } var platform = ComputePlatform.Platforms [0]; logger.Info ("Found {0} computing devices:", platform.Devices.Count); foreach (var d in platform.Devices) { logger.Info ("* {0}", d.Name); } Context = new ComputeContext (ComputeDeviceTypes.All, new ComputeContextPropertyList (platform), null, IntPtr.Zero); Device = Context.Devices [0]; logger.Info ("Using first device."); // load opencl source StreamReader streamReader = new StreamReader (MD5_OPENCL_FILE); string clSource = streamReader.ReadToEnd (); streamReader.Close (); // create program with opencl source ComputeProgram program = new ComputeProgram (Context, clSource); // compile opencl source try { program.Build (null, null, null, IntPtr.Zero); } catch (Exception e) { logger.Error ("Build log: " + program.GetBuildLog(Device)); throw e; } // load chosen kernel from program Kernel = program.CreateKernel ("crackMD5"); }
/// <summary> /// OpenCL関係の準備をする /// </summary> static void InitializeOpenCL(Real[] result, Real[] left, Real[] right) { // プラットフォームを取得 var platform = ComputePlatform.Platforms[0]; Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version); // コンテキストを作成 var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // 利用可能なデバイス群を取得 var devices = context.Devices; Console.WriteLine("デバイス数:{0}", devices.Count); // 1デバイスで使う要素数を計算 countPerDevice = (int)Math.Ceiling((double)COUNT / devices.Count); // キューの配列を作成 queues = new ComputeCommandQueue[devices.Count]; // 利用可能なデバイスすべてに対して for(int i = 0; i < devices.Count; i++) { var device = devices[i]; // キューを作成 queues[i] = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None); // デバイス情報を表示 Console.WriteLine("* {0} ({1})", device.Name, device.Vendor); } // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.MultiGpu); // ビルドしてみて try { string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float"); program.Build(devices, string.Format(" -D REAL={0} -D REALV={0}{1} -D VLOADN=vload{1} -D VSTOREN=vstore{1} -D COUNT_PER_WORKITEM={2} -Werror", realString, VECTOR_COUNT, COUNT_PER_WORKITEM), null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // ログを表示して例外を投げる throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex); } // カーネルを作成 addOneElement = new ComputeKernel[devices.Count]; for(int i = 0; i < devices.Count; i++) { addOneElement[i] = program.CreateKernel("AddOneElement"); } // バッファーを作成 bufferLeft = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, left); bufferRight = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, right); bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, result); buffersLeft = new ComputeSubBuffer<Real>[devices.Count]; buffersRight = new ComputeSubBuffer<Real>[devices.Count]; buffersResult = new ComputeSubBuffer<Real>[devices.Count]; for(int i = 0; i < devices.Count; i++) { buffersLeft[i] = new ComputeSubBuffer<Real>(bufferLeft, ComputeMemoryFlags.ReadOnly, countPerDevice * i, countPerDevice); buffersRight[i] = new ComputeSubBuffer<Real>(bufferRight, ComputeMemoryFlags.ReadOnly, countPerDevice * i, countPerDevice); buffersResult[i] = new ComputeSubBuffer<Real>(bufferResult, ComputeMemoryFlags.WriteOnly, countPerDevice * i, countPerDevice); } }
/// <summary> /// Attempts to initialize OpenCL for the selected GPU. /// </summary> internal void InitializeOpenCL() { // only initialize once if (clKernel != null) return; // unused memory so Cloo doesn't break with a null ptr var userDataPtr = Marshal.AllocCoTaskMem(512); try { clDevice = Gpu.CLDevice; // context we'll be working underneath clContext = new ComputeContext( new[] { clDevice }, new ComputeContextPropertyList(clDevice.Platform), (p1, p2, p3, p4) => { }, userDataPtr); // queue to control device clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None); // buffers to store kernel output clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); // obtain the program clProgram = new ComputeProgram(clContext, Gpu.GetSource()); var b = new StringBuilder(); if (Gpu.WorkSize > 0) b.Append(" -D WORKSIZE=").Append(Gpu.WorkSize); if (Gpu.HasBitAlign) b.Append(" -D BITALIGN"); if (Gpu.HasBfiInt) b.Append(" -D BFIINT"); try { // build kernel for device clProgram.Build(new[] { clDevice }, b.ToString(), (p1, p2) => { }, userDataPtr); } catch (ComputeException) { throw new Exception(clProgram.GetBuildLog(clDevice)); } clKernel = clProgram.CreateKernel("search"); } finally { Marshal.FreeCoTaskMem(userDataPtr); } }
/// <summary>Compiles the program. Returns the build logs for each device.</summary> /// <param name="SourceCode">Source code array to compile</param> /// <param name="BuildLogs">Build logs for each device</param> public static void Compile(string[] SourceCode, out List<string> BuildLogs) { //CLProgram Prog = OpenCLDriver.clCreateProgramWithSource(ContextoGPUs, 1, new string[] { sProgramSource }, null, ref Err); Prog = new ComputeProgram(Context, SourceCode); //Verifica se compilou em algum device bool funcionou = false; for (int i = 0; i < CLCalc.CLDevices.Count; i++) { try { Prog.Build(new List<ComputeDevice>() { CLCalc.CLDevices[i] }, "", null, IntPtr.Zero); funcionou = true; } catch { } } //Build Information BuildLogs = new List<string>(); for (int i = 0; i < CLDevices.Count; i++) { string LogInfo = ""; try { LogInfo = Prog.GetBuildLog(CLCalc.CLDevices[i]); } catch { LogInfo = "Error retrieving build info"; } //if (!CLCalc.CLDevices[i].CLDeviceAvailable) LogInfo = "Possible compilation failure for device " + i.ToString() + "\n" + LogInfo; BuildLogs.Add(LogInfo); } //Nao compilou em nenhum, joga exception if (!funcionou) { throw new Exception("Could not compile program"); } }
// Use this for initialization void Awake() { var platform = ComputePlatform.Platforms[0]; _context = new ComputeContext(ComputeDeviceTypes.Cpu, new ComputeContextPropertyList(platform), null, System.IntPtr.Zero); _queue = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None); string clSource = System.IO.File.ReadAllText(clProgramPath); _program = new ComputeProgram(_context, clSource); try { _program.Build(null, null, null, System.IntPtr.Zero); } catch(BuildProgramFailureComputeException) { Debug.Log(_program.GetBuildLog(_context.Devices[0])); throw; } _events = new ComputeEventList(); _updateGridKernel = _program.CreateKernel(clUpdateGridKernelName); _updateBoidsKernel = _program.CreateKernel(clUpdateBoidsKernelName); _boundaryKernel = _program.CreateKernel(clBoundaryKernelName); _pointCounters = new int[nGridPartitions * nGridPartitions * nGridPartitions]; _pointIndices = new int[_pointCounters.Length * maxIndices]; _pointCountersBuffer = new Cloo.ComputeBuffer<int>( _context, ComputeMemoryFlags.WriteOnly, _pointCounters.Length); _pointIndicesBuffer = new Cloo.ComputeBuffer<int>( _context, ComputeMemoryFlags.WriteOnly, _pointIndices.Length); _gridInfo = new GridInfo() { worldOrigin = gridbounds.min, worldSize = gridbounds.size, cellSize = gridbounds.size * (1f / nGridPartitions), nGridPartitions = nGridPartitions, maxIndices = maxIndices }; _boundaryKernel.SetValueArgument(1, _gridInfo); _updateGridKernel.SetMemoryArgument(1, _pointCountersBuffer); _updateGridKernel.SetMemoryArgument(2, _pointIndicesBuffer); _updateGridKernel.SetValueArgument(3, _gridInfo); _updateBoidsKernel.SetMemoryArgument(2, _pointCountersBuffer); _updateBoidsKernel.SetMemoryArgument(3, _pointIndicesBuffer); _updateBoidsKernel.SetValueArgument(4, _gridInfo); }
private static ComputeKernel GetKernel(ComputeProgram program) { try { return program.CreateKernel("place"); } catch { string log = program.GetBuildLog(program.Context.Platform.Devices[0]); Console.WriteLine(log); throw; } }
private void buildProgramMenuItem_Click(object sender, EventArgs e) { if (editorTextBox.Text.Length == 0) { logTextBox.Text = "No source."; return; } string[] logContent; ComputeContextPropertyList properties = new ComputeContextPropertyList(configForm.Platform); ComputeContext context = new ComputeContext(configForm.Devices, properties, null, IntPtr.Zero); ComputeProgram program = new ComputeProgram(context, editorTextBox.Text); try { program.Build(configForm.Devices, configForm.Options, null, IntPtr.Zero); logContent = new string[] { "Build succeeded." }; } catch (Exception exception) { List<string> lineList = new List<string>(); foreach (ComputeDevice device in context.Devices) { string header = "PLATFORM: " + configForm.Platform.Name + ", DEVICE: " + device.Name; lineList.Add(header); StringReader reader = new StringReader(program.GetBuildLog(device)); string line = reader.ReadLine(); while (line != null) { lineList.Add(line); line = reader.ReadLine(); } lineList.Add(""); lineList.Add(exception.Message); } logContent = lineList.ToArray(); } logTextBox.Lines = logContent; }
public ICalculator GenFractalCalc(List<ProcessLayer> LayerData, FractalType fractaltype, string code, ProcessLayer deflayer) { string macros = @" #pragma OPENCL EXTENSION cl_amd_printf : enable inline float ABS(float a) { return a>0?a:-a; } inline float ARGC(float2 a) { return atan2(a.y,a.x); } inline float NORM(float2 a) { return a.x*a.x+a.y*a.y; } inline float ABSC(float2 a) { return sqrt(NORM(a)); } inline float2 MULC(float2 a, float2 b) { return (float2)( a.x*b.x-a.y*b.y, a.y*b.x+a.x*b.y ); } inline float2 DIVC(float2 a, float2 b) { return (float2)( (a.x*b.x+a.y*b.y)/(b.x*b.x+b.y*b.y), (a.y*b.x-a.x*b.y)/(b.x*b.x+b.y*b.y) ); } inline float2 lnc(float2 c) { float r = ABSC(c); float a = ARGC(c); return (float2)(log(r),a); } inline float2 arctanc(float2 c) { float2 io = (float2)(0.0f,1.0f); float2 two = (float2)(2.0f,0.0f); float2 one = (float2)(1.0f,0.0f); return (float2)(MULC(DIVC(io,two),lnc(one - MULC(io,c))-lnc(one + MULC(io,c)))); } inline float2 powc(float2 c, float p) { if (NORM(c)==0) { return (float2)(0.0f,0.0f); } else { float r = pow(ABSC(c),p); float a = ARGC(c)*p; return (float2)(r*cos(a),r*sin(a)); } } struct ProcessLayer { float2 c_old2x; float2 c_oldx; float2 c_x; float2 c_resx; float c_calc; float c_cmean; float c_cvarsx; float c_cvariance; int c_active; int c_isin; int c_n; int c_resn; }; kernel void FractalCalc ( global read_only float2* in_x, global read_only float2* in_c, "; StringBuilder kernel = new StringBuilder(macros); for (int i=0; i< LayerData.Count; i++) { kernel.Append(" global write_only struct ProcessLayer* out_p" + i); kernel.Append(i+1==LayerData.Count ? "\n){" : ",\n"); } bool hastriangle = false; bool fractdiv = true; SeqType modesused = 0; foreach (var it in LayerData) { if (it.c_checktype.HasFlag(SeqCheck.MPL_CHECK_TRIANGLE)) hastriangle = true; if (it.c_checktype.HasFlag(SeqCheck.MPL_CHECK_TRIANGLE_SMOOTH)) hastriangle = true; modesused |= it.c_seqtype; } if (modesused.HasFlag(SeqType.MPL_SEQ_STDDEV)) modesused |= SeqType.MPL_SEQ_VARIANCE; if (modesused.HasFlag(SeqType.MPL_SEQ_VARIANCE)) modesused |= SeqType.MPL_SEQ_VARSX; if (modesused.HasFlag(SeqType.MPL_SEQ_VARSX)) modesused |= SeqType.MPL_SEQ_MEAN; kernel.Append("float2 sumx = (float2)(0.0f,0.0f);"); kernel.Append("float2 meanx = (float2)(0.0f,0.0f);"); kernel.Append("float2 varsx = (float2)(0.0f,0.0f);"); kernel.Append("float2 variacex = (float2)(0.0f,0.0f);"); kernel.Append("float2 sdx = (float2)(0.0f,0.0f);"); kernel.Append("float2 minx = (float2)(0.0f,0.0f);"); kernel.Append("float2 maxx = (float2)(0.0f,0.0f);"); kernel.Append("float2 deltax = (float2)(0.0f,0.0f);"); kernel.Append("float2 deltac = (float2)(0.0f,0.0f);"); kernel.Append("float delta = 0.0f;"); kernel.Append("float newxnorm = 0.0f;"); kernel.Append("float lowbound = 0.0f;"); kernel.Append("float newd = 0.0f;"); kernel.Append("int end = 0;"); kernel.Append("int n = 0;"); kernel.Append("float2 newx = (float2)(0.0f,0.0f);"); kernel.Append("int index = get_global_id(0);"); kernel.Append("float2 x = in_x[index];"); kernel.Append("float2 c = in_c[index];"); for (int i = 0; i < LayerData.Count; i++) { kernel.Append("struct ProcessLayer p"+i+";"); kernel.Append("p"+i+".c_active = 1;"); kernel.Append("p"+i+".c_isin = 0;"); kernel.Append("p"+i+".c_x = x;"); kernel.Append("p"+i+".c_oldx = x;"); kernel.Append("p"+i+".c_old2x = x;"); kernel.Append("p"+i+".c_calc = 0;"); kernel.Append("p"+i+".c_cmean = 0;"); kernel.Append("p"+i+".c_cvarsx = 0;"); kernel.Append("p"+i+".c_cvariance = 0;"); } kernel.Append("struct ProcessLayer* p = 0;"); if (hastriangle) { if (fractaltype == FractalType.FRACTAL_TYPE_MANDEL) { kernel.Append("float trinorm = ABSC(c);"); // trinorm = c.Magnitude; } else { kernel.Append("float trinorm = NORM(c);"); // trinorm = c.Norm; } } kernel.Append("while (!end) {"); // while (!end) kernel.Append("n++;"); // n++; switch (fractaltype) { case FractalType.FRACTAL_TYPE_MANDEL: kernel.Append("newx = (float2)(x.x*x.x - x.y*x.y,2*x.x*x.y) + c;"); //kernel.Append(@"printf(""%f %f - "",newx.x,newx.y);"); //double sx = x.Real; //double sy = x.Imaginary; //return new Complex(sx * sx - sy * sy + c.Real, 2 * sx * sy + c.Imaginary); break; case FractalType.FRACTAL_TYPE_MANDEL_N: kernel.Append("newx = powc(x,pr) + c;"); // return Complex.Pow(x, param) + c; break; case FractalType.FRACTAL_TYPE_BURNINGSHIP: kernel.Append("newx = (float2)(x.x*x.x-x.y*x.y,2*ABS(x.x*x.y)) + c;"); // double sx = x.Real; // double sy = x.Imaginary; // return new Complex(sx * sx - sy * sy + c.Real, 2 * absval(sx * sy) + c.Imaginary); break; case FractalType.FRACTAL_TYPE_BURNINGSHIP_N: kernel.Append("newx = powc((ABS(x.x),ABS(x.y)),pr) + c;"); // return Complex.Pow(new Complex(absval(x.Real), absval(x.Imaginary)), n) + c; break; case FractalType.FRACTAL_TYPE_DIVERGENT: kernel.Append("newx = " + code + ";"); // newx = code.eval(x, c, n, param); break; case FractalType.FRACTAL_TYPE_CONVERGENT: kernel.Append("newx = " + code + ";"); fractdiv = false; break; default: throw new NotSupportedException("Unknown FractalType"); } if (modesused.HasFlag(SeqType.MPL_SEQ_SUM)) { kernel.Append("sumx += newx;"); //sumx+=newx; } if (modesused.HasFlag(SeqType.MPL_SEQ_MEAN)) { kernel.Append("deltax = newx-meanx;"); kernel.Append("meanx += deltax/(float)n;"); /*Complex delta = newx-meanx; meanx = meanx+delta/(double)n;*/ if (modesused.HasFlag(SeqType.MPL_SEQ_VARSX)) { kernel.Append("varsx += MULC(deltax,(newx-meanx));"); //varsx = varsx + delta*(newx-meanx); if (modesused.HasFlag(SeqType.MPL_SEQ_VARIANCE)) { kernel.Append("if (n!=1) {"); // if (n!=1) { kernel.Append("variacex = varsx / (float)((float)n-(float)1.0f);"); //variacex = varsx/((double)n-(double)1); if (modesused.HasFlag(SeqType.MPL_SEQ_STDDEV)) { kernel.Append("sdx = powc(variacex,0.5f);"); //sdx = Complex.Sqrt(variacex); } kernel.Append("}"); } } } if (modesused.HasFlag(SeqType.MPL_SEQ_MIN)) { kernel.Append("if (n==1) minx = newx; else {"); kernel.Append("if (NORM(newx)<NORM(minx)) { minx = newx; } }"); //if (n==1) minx=newx; else if (Complex.Abs(newx)<Complex.Abs(minx)) minx=newx; } if (modesused.HasFlag(SeqType.MPL_SEQ_MAX)) { kernel.Append("if (n==1) maxx = newx; else {"); kernel.Append("if (NORM(newx)>NORM(maxx)) { maxx = newx; } }"); //if (n==1) maxx=newx; else if (Complex.Abs(newx)>Complex.Abs(maxx)) maxx=newx; } if (modesused.HasFlag(SeqType.MPL_SEQ_DELTA)) { kernel.Append("deltax = newx - x"); //deltax = newx-x; } for (int i=0; i< LayerData.Count; i++) { var p = LayerData[i]; kernel.Append("p = &p"+i+";"); kernel.Append("if (p->c_active) {"); //if (p.c_active) { kernel.Append("p->c_n = n;"); //p.c_n = n; kernel.Append("p->c_old2x = p->c_oldx;"); kernel.Append("p->c_oldx = p->c_x;"); //p.c_old2x = p.c_oldx; //p.c_oldx = p.c_x; switch (p.c_seqtype) { case SeqType.MPL_SEQ_NORMAL: kernel.Append("p->c_x = newx;"); break; // p.c_x = newx; break; case SeqType.MPL_SEQ_SUM: kernel.Append("p->c_x = sumx;"); break; // p.c_x = sumx; break; case SeqType.MPL_SEQ_MEAN: kernel.Append("p->c_x = meanx;"); break;// p.c_x = meanx; break; case SeqType.MPL_SEQ_VARSX: kernel.Append("p->c_x = varsx;"); break; case SeqType.MPL_SEQ_VARIANCE: kernel.Append("p->c_x = variacex;"); break; // p.c_x = variacex; break; case SeqType.MPL_SEQ_STDDEV: kernel.Append("p->c_x = sdx;"); break; // p.c_x = sdx; break; case SeqType.MPL_SEQ_MIN: kernel.Append("p->c_x = minx;"); break; // p.c_x = minx; break; case SeqType.MPL_SEQ_MAX: kernel.Append("p->c_x = maxx;"); break; // p.c_x = maxx; break; case SeqType.MPL_SEQ_DELTA: kernel.Append("p->c_x = deltax;"); break; // p.c_x = deltax; break; default: kernel.Append("p->c_x = newx;"); break; // p.c_x = newx; break; } kernel.Append("newd = 0;"); //double newd = 0; switch (p.c_checktype) { case SeqCheck.MPL_CHECK_SMOOTH: if (fractdiv) { kernel.Append("newd = exp(-ABSC(p->c_x));"); //newd = Math.Exp(-Complex.Abs(p.c_x)); } else { kernel.Append("newd = exp(-ABSC(p->c_x-p->c_oldx));"); //newd = Math.Exp(-Complex.Abs(p.c_x-p.c_oldx)); } break; case SeqCheck.MPL_CHECK_REAL: kernel.Append("newd = p->c_x.x;"); //newd = p.c_x.Real; break; case SeqCheck.MPL_CHECK_IMAG: kernel.Append("newd = p->c_x.y;"); //newd = p.c_x.Imaginary; break; case SeqCheck.MPL_CHECK_ARG: kernel.Append("newd = atan2(p->c_x.y,p->c_x.x);"); //newd = p.c_x.Phase; break; case SeqCheck.MPL_CHECK_ABS: kernel.Append("newd = ABSC(p->c_x);"); //newd = p.c_x.Magnitude; break; case SeqCheck.MPL_CHECK_CURVATURE: kernel.Append("if (isnotequal(p.c_oldx,p.c_old2x)) { newd = ABSC(atanc(DIVC(p->c_x-p->c_oldx,p->c_oldx-p->c_old2x))); } else newd = 0;"); //if ((p.c_oldx!=p.c_old2x)) newd=Complex.Abs(Complex.Atan((p.c_x-p.c_oldx) / (p.c_oldx-p.c_old2x))); else newd=0; } break; case SeqCheck.MPL_CHECK_TRIANGLE: if (fractaltype == FractalType.FRACTAL_TYPE_MANDEL) { kernel.Append("newxnorm = NORM(p->c_oldx);"); //double newxnorm = p.c_oldx.Norm(); kernel.Append("lowbound = ABS(newxnorm-trinorm);"); //double lowbound = absval(newxnorm-trinorm); kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x)-lowbound)/(newxnorm+trinorm-lowbound);"); //if ((newxnorm+trinorm-lowbound)==0) newd=0; else // newd = (p.c_x.Magnitude-lowbound)/(newxnorm+trinorm-lowbound); } else { kernel.Append("newxnorm = ABSC(p->c_x);"); //double newxnorm = p.c_x.Magnitude; kernel.Append("lowbound = ABS(newxnorm-trinorm);"); //double lowbound = absval(newxnorm-trinorm); kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x-c)-lowbound)/(newxnorm+trinorm-lowbound);"); //if ((newxnorm+trinorm-lowbound)==0) newd=0; else // newd = ((Complex.Abs(p.c_x-c)-lowbound)/(newxnorm+trinorm-lowbound)); } break; case SeqCheck.MPL_CHECK_TRIANGLE_SMOOTH: if (fractaltype == FractalType.FRACTAL_TYPE_MANDEL) { kernel.Append("newxnorm = NORM(p->c_oldx);"); //double newxnorm = p.c_oldx.Norm(); kernel.Append("lowbound = ABS(newxnorm-trinorm);"); //double lowbound = absval(newxnorm-trinorm); kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x)-lowbound)/(newxnorm+trinorm-lowbound);"); //if ((newxnorm+trinorm-lowbound)==0) newd=0; else // newd = (p.c_x.Magnitude-lowbound)/(newxnorm+trinorm-lowbound); } else { kernel.Append("newxnorm = ABSC(p->c_x);"); //double newxnorm = p.c_x.Magnitude; kernel.Append("lowbound = ABS(newxnorm-trinorm);"); //double lowbound = absval(newxnorm-trinorm); kernel.Append("if ((newxnorm+trinorm-lowbound)==0) newd = 0; else newd = (ABSC(p->c_x-c)-lowbound)/(newxnorm+trinorm-lowbound);"); //if ((newxnorm+trinorm-lowbound)==0) newd=0; else // newd = ((Complex.Abs(p.c_x-c)-lowbound)/(newxnorm+trinorm-lowbound)); } break; case SeqCheck.MPL_CHECK_ORBIT_TRAP: switch (p.c_orbittraptype) { case OrbitTrap.MPL_ORBIT_TRAP_POINT: kernel.Append("newd = ABSC(p->c_x - p->c_pointA);"); //newd = Complex.Abs(p.c_x - p.c_pointA); break; case OrbitTrap.MPL_ORBIT_TRAP_LINE: if ((p.c_pointA.Real) == 1) { kernel.Append("newd = ABS(p->c_x.x);"); //newd = Math.Abs(p.c_x.Real); } else { kernel.Append("newd = ABS(p->c_x.y);"); //newd = Math.Abs(p.c_x.Imaginary); } break; case OrbitTrap.MPL_ORBIT_TRAP_GAUSS: { kernel.Append("newd = ABSC((round(p->c_x.x),round(p->c_x.y)) - p->c_x);"); //Complex gauss = new Complex(Math.Round(p.c_x.Real),Math.Round(p.c_x.Imaginary)); //newd = Complex.Abs(gauss - p.c_x); } break; } break; } switch (p.c_checkseqtype) { case SeqType.MPL_SEQ_NORMAL: kernel.Append("p->c_calc = newd;"); break; case SeqType.MPL_SEQ_SUM: kernel.Append("p->c_calc += newd;"); break; // p.c_calc += newd; break; case SeqType.MPL_SEQ_MEAN: kernel.Append("p->c_calc += newd;"); break; // p.c_calc += newd; break; case SeqType.MPL_SEQ_VARSX: { kernel.Append("delta = newd - p->c_cmean;"); //double delta = newd - p.c_cmean; kernel.Append("p->c_cmean = p->c_cmean + delta / p->c_n;"); //p.c_cmean = p.c_cmean+delta/p.c_n; kernel.Append("p->c_calc += delta * (newd - p->c_cmean);"); //p.c_calc += delta*(newd-p.c_cmean); } break; case SeqType.MPL_SEQ_VARIANCE: { kernel.Append("delta = newd - p->c_cmean;"); //double delta = newd - p.c_cmean; kernel.Append("p->c_cmean = p->c_cmean + delta / p->c_n;"); //p.c_cmean = p.c_cmean+delta/p.c_n; kernel.Append("p->c_cvarsx += delta * (newd - p->c_cmean);"); //p.c_cvarsx = p.c_cvarsx + delta*(newd-p.c_cmean); kernel.Append("if (p->c_n!=1) { p->c_calc = p->c_cvarsx/(p->c_n-1.0f); }"); /*if (p.c_n!=1) { p.c_calc = p.c_cvarsx/(p.c_n-1.0); }*/ } break; case SeqType.MPL_SEQ_STDDEV: { kernel.Append("delta = newd - p->c_cmean;"); //double delta = newd - p.c_cmean; kernel.Append("p->c_cmean = p->c_cmean + delta / p->c_n;"); //p.c_cmean = p.c_cmean+delta/p.c_n; kernel.Append("p->c_cvarsx += delta * (newd - p->c_cmean);"); //p.c_cvarsx = p.c_cvarsx + delta*(newd-p.c_cmean); kernel.Append("if (p->c_n!=1) { p->c_cvariance = p->c_cvarsx/((float)p->c_n-1.0f);"); /*if (p.c_n!=1) { p.c_cvariance = p.c_cvarsx/(p.c_n-1.0); }*/ kernel.Append("p->c_calc = sqrt(p->c_cvariance);"); //p.c_calc = Math.Sqrt(p.c_cvariance); kernel.Append("}"); } break; case SeqType.MPL_SEQ_MIN: kernel.Append("if (p->c_n==1) p->c_calc = newd; else if (p->c_calc>newd) { p->c_calc = newd; p->c_resx = p->c_x; p->c_resn = p->c_n; };"); //if (p.c_n==1) p.c_calc=newd; else if (p.c_calc>newd) { p.c_calc = newd; p.c_resx = p.c_x; p.c_resn = p.c_n; } break; case SeqType.MPL_SEQ_MAX: kernel.Append("if (p->c_n==1) p->c_calc = newd; else if (p->c_calc<newd) { p->c_calc = newd; p->c_resx = p->c_x; p->c_resn = p->c_n; };"); // if (p.c_n==1) p.c_calc=newd; else if (p.c_calc<newd) { p.c_calc = newd; p.c_resx = p.c_x; p.c_resn = p.c_n; } break; case SeqType.MPL_SEQ_DELTA: kernel.Append("p->c_calc = newd-p->c_calc;"); //p.c_calc = newd-p.c_calc; break; default: kernel.Append("p->c_calc = newd;"); //p.c_calc = newd; break; } if (p.c_convchktype == ConvCheck.MPL_CONVCHK_REAL) { kernel.AppendFormat(CultureInfo.InvariantCulture,"if (p->c_x.x*p->c_x.x " + (fractdiv ? ">" : "<") + " {0:E}f) p->c_active = 0;", p.c_bailout); /*double ddd = p.c_x.Real*p.c_x.Real; if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false; if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/ } else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_IMAG) { kernel.AppendFormat(CultureInfo.InvariantCulture, "if (p->c_x.y*p->c_x.y " + (fractdiv ? ">" : "<") + " {0:E}f) p->c_active = 0;", p.c_bailout); /*double ddd = p.c_x.Imaginary*p.c_x.Imaginary; if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false; if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/ } else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_OR) { kernel.AppendFormat(CultureInfo.InvariantCulture, "if ((p->c_x.y*p->c_x.y " + (fractdiv ? ">" : "<") + " {0:E}f) || (p->c_x.x*p->c_x.x " + (fractdiv ? ">" : "<") + " {0:E}f)) p->c_active = 0;", p.c_bailout); /*if ((fractdiv) && ((p.c_x.Real*p.c_x.Real>p.c_bailout) || (p.c_x.Imaginary*p.c_x.Imaginary>p.c_bailout))) p.c_active = false; if (!(fractdiv) && ((p.c_x.Real*p.c_x.Real<p.c_bailout) || (p.c_x.Imaginary*p.c_x.Imaginary<p.c_bailout))) p.c_active = false;*/ } else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_AND) { kernel.AppendFormat(CultureInfo.InvariantCulture, "if ((p->c_x.y*p->c_x.y " + (fractdiv ? ">" : "<") + " {0:E}f) && (p->c_x.x*p->c_x.x " + (fractdiv ? ">" : "<") + " {0:E}f)) p->c_active = 0;", p.c_bailout); /*if ((fractdiv) && ((p.c_x.Real*p.c_x.Real>p.c_bailout) && (p.c_x.Imaginary*p.c_x.Imaginary>p.c_bailout))) p.c_active = false; if (!(fractdiv) && ((p.c_x.Real*p.c_x.Real<p.c_bailout) && (p.c_x.Imaginary*p.c_x.Imaginary<p.c_bailout))) p.c_active = false;*/ } else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_MANH) { kernel.AppendFormat(CultureInfo.InvariantCulture, "if ( ((ABS(p->c_x.y)+ABS(p->c_x.x))*((ABS(p->c_x.y)+ABS(p->c_x.x))) " + (fractdiv ? ">" : "<") + " {0:G}f)) p->c_active = 0;", p.c_bailout); /*double ddd = (absval(p.c_x.Imaginary)+absval(p.c_x.Real))*(absval(p.c_x.Imaginary)+absval(p.c_x.Real)); if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false; if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/ } else if (p.c_convchktype == ConvCheck.MPL_CONVCHK_MANR) { kernel.AppendFormat(CultureInfo.InvariantCulture, "if ( ((p->c_x.y+p->c_x.x)*(p->c_x.y+p->c_x.x)) " + (fractdiv ? ">" : "<") + " {0:E}f)) p->c_active = 0;", p.c_bailout); /*double ddd = (p.c_x.Real+p.c_x.Imaginary)*(p.c_x.Real+p.c_x.Imaginary); if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false; if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false; */ } else { kernel.AppendFormat(CultureInfo.InvariantCulture, "if (NORM(p->c_x) " + (fractdiv ? ">" : "<") + " {0:E}f) p->c_active = 0;", p.c_bailout); /*double ddd = p.c_x.Norm(); if ((fractdiv) && ( ddd>p.c_bailout)) p.c_active = false; if (!(fractdiv) && ( ddd<p.c_bailout)) p.c_active = false;*/ } kernel.AppendFormat(CultureInfo.InvariantCulture, "if (p->c_n>{0}) {{ p->c_active = 0; p->c_isin = 1; }}", p.c_nlimit); //if (p.c_n>p.c_nlimit) { p.c_active = false; p.c_isin = true; } if (p.c_checktype == SeqCheck.MPL_CHECK_TRIANGLE_SMOOTH) { throw new NotImplementedException("Smooth triangle algorithm is unavailable in this CalculatorFactory"); /*if (p.c_active == false) if (!p.c_isin) { p.c_oldx = p.c_x; p.c_x = Fractal_Mandel(p.c_x,c); p.c_n++; double newxnorm = p.c_oldx.Norm(); double lowbound = absval(newxnorm-trinorm); if ((newxnorm+trinorm-lowbound)==0) newd=0; else newd = (p.c_x.Magnitude-lowbound)/(newxnorm+trinorm-lowbound); p.c_calc += newd; double oldsum = p.c_calc/(p.c_n+1); double il2=1/Math.Log(2); double lp=Math.Log(Math.Log(p.c_bailout)); double f=il2*lp-il2*Math.Log(Math.Log(Complex.Abs(p.c_x)))+2; double az2 = p.c_x.Norm(); p.c_oldx = p.c_x; p.c_x = Fractal_Mandel(p.c_oldx,c); lowbound = absval(az2-trinorm); if ((az2+trinorm-lowbound)!=0) p.c_calc+=(Complex.Abs(p.c_x)-lowbound)/(az2+trinorm-lowbound); p.c_n++; p.c_calc = p.c_calc/(p.c_n+1); p.c_calc = oldsum+(p.c_calc-oldsum)*(f-1); } else { p.c_calc /= p.c_n+1; }*/ } else if (p.c_checkseqtype == SeqType.MPL_SEQ_MEAN) { kernel.Append("if (p->c_active == 0) p->c_calc /= (float)p->c_n+1.0f;"); //if (p.c_active == false) p.c_calc /= p.c_n+1; } if (p == deflayer) { kernel.Append("if (p->c_active == 0) end = 1;"); /*if (!deflayer.c_active) end = true; */ } kernel.Append("}"); } kernel.Append("x = newx; }"); for (int i = 0; i < LayerData.Count; i++) { kernel.Append("out_p"+i+"[index] = p"+i+";"); //kernel.Append("out_p" + i + "[index].c_calc = 52.0f;"); } kernel.Append("}"); //System.Console.WriteLine(kernel.Replace(";", ";\n").Replace("}","}\n")); //kernel.Clear(); //kernel.Append(@"kernel void VectorAdd(global read_only float* a,global read_only float* b,global write_only float* c ){int index = get_global_id(0);c[index] = a[index] + b[index];}"); ComputeProgram prg = new ComputeProgram(_context, kernel.Replace(";", ";\n").Replace("}","}\n").ToString()); try { prg.Build(null, null, null, IntPtr.Zero); } catch (ComputeException e) { throw new Exception("Error while building: " + prg.GetBuildLog(_context.Devices[0]), e); } ComputeKernel krnl = prg.CreateKernel("FractalCalc"); return new OpenCLCalculator(_context,prg,krnl); }
// initialize renderer: takes in command line parameters passed by template code public void Init(int rt, bool gpu, int platformIdx) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera(screen.width, screen.height); rngQueue = new ConcurrentQueue<Random>(); xtiles = (int)Math.Ceiling((float)screen.width / TILESIZE); ytiles = (int)Math.Ceiling((float)screen.height / TILESIZE); #if DEBUG RTTools.factorials[0] = Vector<float>.One; for (int i = 1; i < RTTools.TERMS * 2; i++) RTTools.factorials[i] = RTTools.factorials[i - 1] * i; //for (int i = 0; i < RTTools.TERMS; i++) // RTTools.atanStuff[i] = (new Vector<float>((float)Math.Pow(2, 2 * i)) * (RTTools.factorials[i] * RTTools.factorials[i])) / RTTools.factorials[2 * i + 1]; #endif #region OpenCL related things randNums = new float[screen.width * screen.height + 25]; var streamReader = new StreamReader("../../assets/GPUCode.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); platform = ComputePlatform.Platforms[gpuPlatform]; context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); program = new ComputeProgram(context, clSource); try { program.Build(null, null, null, IntPtr.Zero); kernel = program.CreateKernel("Test"); } catch { Console.Write("error in kernel code:\n"); Console.Write(program.GetBuildLog(context.Devices[0]) + "\n"); Debugger.Break(); } eventList = new ComputeEventList(); commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); #endregion }
/// <summary> /// OpenCLでの共役勾配法を生成する /// </summary> /// <param name="count">要素数</param> /// <param name="maxNonZeroCount"></param> /// <param name="_minIteration"></param> /// <param name="_maxIteration"></param> /// <param name="_allowableResidual"></param> public ConjugateGradientParallelGpu(int count, int maxNonZeroCount, int _minIteration, int _maxIteration, double allowableResidual) : base(count, maxNonZeroCount, _minIteration, _maxIteration, allowableResidual) { // プラットフォームを取得 var platform = ComputePlatform.Platforms[0]; // コンテキストを作成 var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // 利用可能なデバイス群を取得 var devices = context.Devices; // 内積の計算の場合は、回せる最大の数 this.localWorkSize = (int)devices[0].MaxWorkGroupSize; // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.Mgcg); // ビルドしてみて try { string realString = "double"; program.Build(devices, string.Format(" -D REAL={0} -D MAX_NONZERO_COUNT={1}", realString, this.A.MaxNonzeroCountPerRow), null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // 例外を投げる throw new ApplicationException(program.GetBuildLog(devices[0]), ex); } // 各デバイスで計算する要素数を初期化 countPerDevice = new int[devices.Count]; // 1デバイスが計算する最大要素数を計算 int maxCountPerDevice = (int)Math.Ceiling((double)this.Count / devices.Count); // デバイスの計算開始番号を作成 offset = new int[devices.Count]; // キュー配列を作成 queues = new ComputeCommandQueue[devices.Count]; // カーネル配列を作成 addVectorVector = new ComputeKernel[devices.Count]; multiplyVectorVector = new ComputeKernel[devices.Count]; reductionSum = new ComputeKernel[devices.Count]; reductionMax = new ComputeKernel[devices.Count]; matrix_x_Vector = new ComputeKernel[devices.Count]; // バッファー配列を作成 buffersA = new ComputeBuffer<double>[devices.Count]; buffersColumnIndeces = new ComputeBuffer<int>[devices.Count]; buffersNonzeroCounts = new ComputeBuffer<int>[devices.Count]; buffersB = new ComputeBuffer<double>[devices.Count]; buffersX = new ComputeBuffer<double>[devices.Count]; buffersAp = new ComputeBuffer<double>[devices.Count]; buffersP = new ComputeBuffer<double>[devices.Count]; buffersR = new ComputeBuffer<double>[devices.Count]; buffersForDot = new ComputeBuffer<double>[devices.Count]; buffersForMax = new ComputeBuffer<double>[devices.Count]; bufferAllVector = new ComputeBuffer<double>[devices.Count]; answerForReduction = new double[devices.Count]; allVector = new double[this.Count]; // 全デバイスについて for(int i = 0; i < devices.Count; i++) { // 計算する要素数を計算 countPerDevice[i] = maxCountPerDevice - ((i < maxCountPerDevice * devices.Count - this.Count) ? 1 : 0); // 計算開始番号を設定 offset[i] = (i == 0) ? 0 : (offset[i - 1] + countPerDevice[i - 1]); // キューを作成 queues[i] = new ComputeCommandQueue(context, devices[i], ComputeCommandQueueFlags.None); // カーネルを作成 addVectorVector[i] = program.CreateKernel("AddVectorVector"); multiplyVectorVector[i] = program.CreateKernel("MultiplyVectorVector"); reductionSum[i] = program.CreateKernel("ReductionSum"); reductionMax[i] = program.CreateKernel("ReductionMaxAbsolute"); matrix_x_Vector[i] = program.CreateKernel("Matrix_x_Vector"); // 行列のバッファーを作成 buffersA[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * this.A.MaxNonzeroCountPerRow); buffersColumnIndeces[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * this.A.MaxNonzeroCountPerRow); buffersNonzeroCounts[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]); // 右辺ベクトル、未知数、探索方向、残差、行列と探索方向の積のバッファーを作成 buffersB[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]); buffersX[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]); buffersAp[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]); buffersP[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]); buffersR[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]); // 計算に使用するバッファーの作成 buffersForDot[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]); buffersForMax[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, countPerDevice[i]); bufferAllVector[i] = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count); } }
/// <summary> /// OpenCL関係の準備をする /// </summary> static void InitializeOpenCL(Real[] matrix, Real[] vector, int[] nonzeroCount, int[] columnIndeces) { // プラットフォームを取得 var platform = ComputePlatform.Platforms[0]; Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version); // コンテキストを作成 var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // 利用可能なデバイス群を取得 var devices = context.Devices; Console.WriteLine("デバイス数:{0}", devices.Count); // 各デバイスで計算する要素数を初期化 countPerDevice = new int[devices.Count]; // 1デバイスが計算する最大要素数を計算 int maxCountPerDevice = (int)Math.Ceiling((double)ROW_COUNT / devices.Count); // デバイスの計算開始番号とローカルアイテム数を作成 offset = new int[devices.Count]; localSize = new int[devices.Count]; // 全デバイスの for(int i = 0; i < devices.Count; i++) { // 計算する要素数を計算 countPerDevice[i] = maxCountPerDevice - ((i < maxCountPerDevice * devices.Count - ROW_COUNT) ? 1 : 0); // 計算開始番号を設定 offset[i] = (i == 0) ? 0 : (offset[i - 1] + countPerDevice[i - 1]); // ローカルアイテム数を取得 localSize[i] = 8;// (int)devices[i].MaxWorkGroupSize; } // キューの配列を作成 queues = new ComputeCommandQueue[devices.Count]; // 利用可能なデバイスすべてに対して for(int i = 0; i < devices.Count; i++) { // デバイスを取得 var device = devices[i]; // キューを作成 queues[i] = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None); // デバイス情報を表示 Console.WriteLine("* {0} ({1})", device.Name, device.Vendor); } // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.Matrix_x_Vector); // ビルドしてみて try { string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float"); program.Build(devices, string.Format(" -D REAL={0} -D MAX_NONZERO_COUNT={1} -Werror", realString, MAX_NONZERO_COUNT), null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // ログを表示して例外を投げる throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex); } // カーネルを作成 matrix_x_Vector = new ComputeKernel[KERNEL_COUNT, devices.Count]; for(int i = 0; i < KERNEL_COUNT; i++) { for(int j = 0; j < devices.Count; j++) { matrix_x_Vector[i, j] = program.CreateKernel("Matrix_x_Vector" + i); } } // 単一GPU用バッファーを作成 bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadWrite, vector.Length); bufferMatrix = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, matrix.Length); bufferVector = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, vector.Length); bufferColumnIndeces = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, columnIndeces.Length); bufferNonzeroCount = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, nonzeroCount.Length); // 複数GPU用バッファーを作成 buffersResult = new ComputeBuffer<Real>[devices.Count]; buffersMatrix = new ComputeBuffer<Real>[devices.Count]; buffersVector = new ComputeBuffer<Real>[devices.Count]; buffersColumnIndeces = new ComputeBuffer<int>[devices.Count]; buffersNonzeroCount = new ComputeBuffer<int>[devices.Count]; for(int i = 0; i < devices.Count; i++) { buffersResult[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly, countPerDevice[i]); buffersMatrix[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * MAX_NONZERO_COUNT); buffersVector[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, ROW_COUNT); buffersColumnIndeces[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i] * MAX_NONZERO_COUNT); buffersNonzeroCount[i] = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]); } }
/// <summary> /// OpenCLでの計算プログラムを作成する /// </summary> /// <param name="maxDt">初期時間刻み</param> /// <param name="a">振幅</param> /// <param name="omega">角速度</param> public ComputerCL(double maxDt, double a, double omega) : base(maxDt, a, omega) { // プラットフォームとデバイス群を取得 this.Platform = ComputePlatform.Platforms[0]; this.Devices = this.Platform.Devices; // コンテキストを作成 var context = new ComputeContext(this.Devices, new ComputeContextPropertyList(this.Platform), null, IntPtr.Zero); // キューを作成 this.queue = new ComputeCommandQueue(context, this.Devices[0], ComputeCommandQueueFlags.None); // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.SinAcceleration); // ビルドしてみて try { program.Build(this.Devices, null, null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // 例外を投げる throw new BuildCLException(program.Source[0], program.GetBuildLog(this.Devices[0])); } // カーネルを作成 this.sinAccelerationKernel = program.CreateKernel("SinAcceleration"); // 準備処理は何もしない this.prepare = () => { }; // 粒子が追加された時に base.ParticleAdded += (sender, e) => { // 準備処理の時の処理を実装 this.prepare = () => { // 粒子数を設定 this.particleCount = this.inputParticles.Count; // バッファーを作成 this.bufferX = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount); this.bufferU = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount); this.bufferA = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount); this.bufferD = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly, this.particleCount); // 入力データを確保 var particlesX = new Vector4[this.particleCount]; var particlesU = new Vector4[this.particleCount]; var particlesA = new Vector4[this.particleCount]; this.particlesD = new float[this.particleCount]; this.particlesMaterial = new Material[this.particleCount]; this.particlesType = new ParticleType[this.particleCount]; // 全粒子について int i = 0; foreach(var particle in this.inputParticles) { // データをコピー particlesX[i] = new Vector4((Vector3)particle.X, 0); particlesU[i] = new Vector4((Vector3)particle.U, 0); particlesA[i] = new Vector4((Vector3)particle.A, 0); this.particlesD[i] = (float)particle.D; this.particlesMaterial[i] = particle.Material; this.particlesType[i] = particle.Type; i++; } // バッファーへ転送 this.queue.WriteToBuffer(particlesX, this.bufferX, false, null); this.queue.WriteToBuffer(particlesU, this.bufferU, false, null); this.queue.WriteToBuffer(particlesA, this.bufferA, false, null); this.queue.WriteToBuffer(this.particlesD, this.bufferD, false, null); // 入力粒子群を空にする this.inputParticles.Clear(); // 準備処理は空 this.prepare = () => { }; // ここまで完了を待機 queue.Finish(); }; }; }
private static ComputeProgram GetBuiltProgram(ComputeContext context) { string queenKernelSource = GetQueenKernelSource(); var program = new ComputeProgram(context, queenKernelSource); try { program.Build(null, null, null, IntPtr.Zero); } catch { string log = program.GetBuildLog(context.Platform.Devices[0]); Console.WriteLine(log); throw; } return program; }
private void CalculateConvolution(ComputeContext computeContext) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); float dx; bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx); if (!shiftXParse) throw new SyntaxErrorException(", needs to be ."); float dy; bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy); if (!shiftYParse) throw new SyntaxErrorException(", needs to be ."); float dz; bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz); if (!shiftZParse) throw new SyntaxErrorException(", needs to be ."); int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ; Console.WriteLine("Computing..."); Console.WriteLine("Reading kernel..."); String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName; String kernelString; using (var sr = new StreamReader(kernelPath + "\\convolution.cl")) kernelString = sr.ReadToEnd(); Console.WriteLine("Reading kernel... done"); float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz); //create openCL program ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString); computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero); ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice); Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus); String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice); Console.WriteLine("buildLog"); if (buildLog.Equals("\n")) Console.WriteLine("\tbuildLog is empty..."); else Console.WriteLine("\t" + buildLog); float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename); ///////////////////////////////////////////// // Create a Command Queue & Event List ///////////////////////////////////////////// ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None); //////////////////////////////////////////////////////////////// // Create Buffers Transform //////////////////////////////////////////////////////////////// ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength); ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength); ///////////////////////////////////////////// // Create the transformFluorophoresKernel /////////////////////////////////////////////////////////// ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores"); ///////////////////////////////////////////// // Set the transformFluorophoresKernel arguments ///////////////////////////////////////////// transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords); transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix); ///////////////////////////////////////////// // Configure the work-item structure ///////////////////////////////////////////// long[] globalWorkOffsetTransformFluorophoresKernel = null; long[] globalWorkSizeTransformFluorophoresKernel = new long[] { fluorophores.Length / 4 }; long[] localWorkSizeTransformFluorophoresKernel = null; //////////////////////////////////////////////////////// // Enqueue the transformFluorophoresKernel for execution //////////////////////////////////////////////////////// computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null); computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null); computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null); // computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents); float[] transformedFluorophores = new float[fluorophores.Length]; computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null); computeCommandQueue.Finish(); //TODO remove, only for testing // for (int i = 0; i < transformedFluorophores.Length; i++) // { // Console.WriteLine(transformedFluorophores[i]); // } // /TODO remove, only for testing stopwatch.Stop(); Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed); stopwatch.Reset(); stopwatch.Start(); // fluorophoresCoords are now transformed (done in place) //////////////////////////////////////////////////////////////// // Create Buffers Convolve Fluorophores //////////////////////////////////////////////////////////////// const int convolve_kernel_lwgs = 16; int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs; ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer); ///////////////////////////////////////////// // Create the transformFluorophoresKernel ///////////////////////////////////////////// ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores"); ///////////////////////////////////////////// // Set the convolveFluorophoresKernel arguments ///////////////////////////////////////////// convolveFluorophoresKernel.SetMemoryArgument(0, resultImage); convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX); convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY); convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords); convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs); convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4); ///////////////////////////////////////////// // Configure the work-item structure ///////////////////////////////////////////// long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null; long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount }; long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs}; //////////////////////////////////////////////////////// // Enqueue the convolveFluorophoresKernel for execution //////////////////////////////////////////////////////// computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null); float[] resultImageData = new float[totalBuffer]; computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null); computeCommandQueue.Finish(); for (int i = 0; i < pixelCount; i++) { Console.WriteLine(resultImageData[i]); } Console.WriteLine("Writing data to file..."); // CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData); TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY); Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY); float max = resultImageData.Max(); float scale = 255/(float)max; // for (int r = 0; r < _imageDimensionY; r++) // { // for (int c = 0; c < _imageDimensionX; c++) // { // float value = resultImageData[c*(r + 1)]; // Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale)); // bitmap.SetPixel(c,r, newColor); // } // } ushort[] ushortdata = new ushort[resultImageData.Length]; for (int i = 0; i < resultImageData.Length; i++) { ushortdata[i] = (ushort)resultImageData[i]; } uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16); byte[] bytes = new byte[convertGray16ToRgb.Length * 4]; // // int[] resultImageData2 = new int[resultImageData.Length]; // for (int index = 0; index < convertGray16ToRgb.Length; index++) { // resultImageData2[index] = (int)(scale*resultImageData[index]); byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]); bytes[index] = bytes1[0]; bytes[4 * index + 1] = bytes1[1]; bytes[4 * index + 2] = bytes1[2]; bytes[4 * index + 3] = bytes1[3]; } // // for (int r = 0; r < _imageDimensionY; r++) // { // for (int c = 0; c < _imageDimensionX; c++) // { // float value = resultImageData2[c*(r + 1)]; // Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value)); // bitmap.SetPixel(c,r, newColor); // } // } // bitmap.Save("c:\\temp.bmp"); using (MemoryStream ms = new MemoryStream(bytes)) { Image image = Bitmap.FromStream(ms); image.Save("c:\\temp.bmp"); } Console.WriteLine("Writing data to file... done"); stopwatch.Stop(); Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed); Console.WriteLine("Computing... done"); }
/// <summary> /// Attempts to initialize OpenCL for the selected GPU. /// </summary> private void InitializeOpenCL() { // only initialize once if (clKernel != null) return; // select the device we've been instructed to use clDevice = ComputePlatform.Platforms .SelectMany(i => i.Devices) .SingleOrDefault(i => i.Handle.Value == Gpu.CLDeviceHandle.Value); // context we'll be working underneath clContext = new ComputeContext(new ComputeDevice[] { clDevice }, new ComputeContextPropertyList(clDevice.Platform), null, IntPtr.Zero); // queue to control device clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None); // buffers to store kernel output clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); // kernel code string kernelCode; using (var rdr = new StreamReader(GetType().Assembly.GetManifestResourceStream("BitMaker.Miner.Gpu.DiabloMiner.cl"))) kernelCode = rdr.ReadToEnd(); clProgram = new ComputeProgram(clContext, kernelCode); try { // build kernel for device clProgram.Build(new ComputeDevice[] { clDevice }, "-D WORKSIZE=" + clDevice.MaxWorkGroupSize, null, IntPtr.Zero); } catch (ComputeException) { throw new Exception(clProgram.GetBuildLog(clDevice)); } clKernel = clProgram.CreateKernel("search"); }
// initialize renderer: takes in command line parameters passed by template code public void Init(int rt, bool gpu, int platformIdx) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; //Determine tile width and height tileCount = GreatestDiv(screen.width, screen.height); tileWidth = screen.width/tileCount; tileHeight = screen.height/tileCount; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera(screen.width, screen.height); //Init OpenCL ComputePlatform platform = ComputePlatform.Platforms[gpuPlatform]; context = new ComputeContext( ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero ); var streamReader = new StreamReader("../../program.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); ComputeProgram program = new ComputeProgram(context, clSource); //try to compile try { program.Build(null, null, null, IntPtr.Zero); } catch { Console.Write("error in kernel code:\n"); Console.Write(program.GetBuildLog(context.Devices[0]) + "\n"); } kernel = program.CreateKernel("device_function"); //setup RNG rngSeed = new int[screen.width * screen.height]; Random r = RTTools.GetRNG(); for (int i = 0; i < rngSeed.Length; i++) rngSeed[i] = r.Next(); //import buffers etc to GPU Vector3[] data = new Vector3[screen.width * screen.height]; Vector3[] sphereOrigins = Scene.GetOrigins; float[] sphereRadii = Scene.GetRadii; var FlagRW = ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer; var FlagR = ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer; rngBuffer = new ComputeBuffer<int>(context, FlagRW, rngSeed); screenPixels = new ComputeBuffer<int>(context, FlagRW, screen.pixels); skyBox = new ComputeBuffer<float>(context, FlagR, scene.skybox); originBuffer = new ComputeBuffer<Vector3>(context, FlagR, sphereOrigins); radiusBuffer = new ComputeBuffer<float>(context, FlagR, sphereRadii); accBuffer = new ComputeBuffer<Vector3>(context, FlagRW, accumulator); kernel.SetValueArgument(0, camera.p1); kernel.SetValueArgument(1, camera.p2); kernel.SetValueArgument(2, camera.p3); kernel.SetValueArgument(3, camera.up); kernel.SetValueArgument(4, camera.right); kernel.SetValueArgument(5, camera.pos); kernel.SetValueArgument(6, camera.lensSize); kernel.SetValueArgument(7, (float)screen.width); kernel.SetValueArgument(8, (float)screen.height); kernel.SetMemoryArgument(9, rngBuffer); kernel.SetMemoryArgument(10, screenPixels); kernel.SetMemoryArgument(11, skyBox); kernel.SetMemoryArgument(12, originBuffer); kernel.SetMemoryArgument(13, radiusBuffer); kernel.SetMemoryArgument(14, accBuffer); queue = new ComputeCommandQueue(context, context.Devices[0], 0); long[] tempWorkSize = { screen.width * screen.height }; //For some reason, doing this directly produces a build error. workSize = tempWorkSize; //Luckily, this works. }
/// <summary> /// OpenCL関係の準備をする /// </summary> static void InitializeOpenCL(Real[] left, Real[] right) { // プラットフォームを取得 var platform = ComputePlatform.Platforms[0]; Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version); // コンテキストを作成 var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // 利用可能なデバイス群を取得 var devices = context.Devices; Console.WriteLine("デバイス数:{0}", devices.Count); // 各デバイスで計算する要素数を初期化 countPerDevice = new int[devices.Count]; // 1デバイスが計算する最大要素数を計算 int maxCountPerDevice = (int)Math.Ceiling((double)COUNT / devices.Count); // 全デバイスの for(int i = 0; i < devices.Count; i++) { // 計算する要素数を計算 countPerDevice[i] = maxCountPerDevice - ((i < maxCountPerDevice * devices.Count - COUNT) ? 1 : 0); } // デバイス内での結果を作成 resultsPerDevice = new Real[devices.Count]; // ワークグループ内ワークアイテム数 localSize = (int)devices[0].MaxWorkItemSizes[0]; // キューの配列を作成 queues = new ComputeCommandQueue[devices.Count]; // 利用可能なデバイスすべてに対して for(int i = 0; i < devices.Count; i++) { // デバイスを取得 var device = devices[i]; // キューを作成 queues[i] = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None); // デバイス情報を表示 Console.WriteLine("* {0} ({1})", device.Name, device.Vendor); } // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.VectorDot); // ビルドしてみて try { string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float"); program.Build(devices, string.Format(" -D REAL={0} -Werror", realString), null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // ログを表示して例外を投げる throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex); } // カーネルを作成 multyplyEachElement = new ComputeKernel[devices.Count]; reductionSum = new ComputeKernel[REDUCTION_VERSION + 1, devices.Count]; for(int i = 0; i < devices.Count; i++) { multyplyEachElement[i] = program.CreateKernel("MultyplyEachElement"); reductionSum[0, i] = program.CreateKernel("ReductionSum0"); reductionSum[1, i] = program.CreateKernel("ReductionSum1"); reductionSum[2, i] = program.CreateKernel("ReductionSum2"); reductionSum[3, i] = program.CreateKernel("ReductionSum3"); reductionSum[4, i] = program.CreateKernel("ReductionSum4"); } // 単一GPU用バッファーを作成 bufferLeft = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, left); bufferRight = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, right); bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadWrite, left.Length); // 複数GPU用バッファーを作成 buffersLeft = new ComputeBuffer<Real>[devices.Count]; buffersRight = new ComputeBuffer<Real>[devices.Count]; buffersResult = new ComputeBuffer<Real>[devices.Count]; for(int i = 0; i < devices.Count; i++) { buffersLeft[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]); buffersRight[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly, countPerDevice[i]); buffersResult[i] = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly, countPerDevice[i]); } }
private static ComputeKernel Compile(ComputeContext context, string[] sourcecodes, Dictionary<string, string> defines) { var program = new ComputeProgram(context, sourcecodes); var device = context.Devices.Single(); try { foreach (var define in defines.Where(define => define.Key.Any(char.IsWhiteSpace) || define.Value.Any(char.IsWhiteSpace))) { MessageBox.Show("Invalid define \"" + define.Key + "=" + define.Value + "\": define contained whitespace", "Error"); return null; } var options = string.Join(" ", defines.Where(kvp => !string.IsNullOrEmpty(kvp.Value)).Select(kvp => "-D " + kvp.Key + "=" + kvp.Value)); program.Build(new[] { device }, options + " " + StaticSettings.Fetch.OpenClOptions, null, IntPtr.Zero); var str = program.GetBuildLog(device).Trim(); if (string.IsNullOrEmpty(str) == false) MessageBox.Show(str, "Build log"); return program.CreateKernel("Main"); } catch (InvalidBinaryComputeException) { MessageBox.Show(program.GetBuildLog(device), "Build error (invalid binary)"); return null; } catch (BuildProgramFailureComputeException) { MessageBox.Show(program.GetBuildLog(device), "Build error (build program failure)"); return null; } }
bool useGPU = true; // GPU code enabled (from commandline) #endregion Fields #region Methods // initialize renderer: takes in command line parameters passed by template code public void Init( int rt, bool gpu, int platformIdx ) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera( screen.width, screen.height ); // Generate randoms Console.Write("Generating randoms....\t"); randoms = new float[1000]; Random r = RTTools.GetRNG(); for (int i = 0; i < 1000; i++) randoms[i] = (float)r.NextDouble(); int variable = r.Next(); Console.WriteLine("Done!"); // initialize required opencl things if gpu is used if (useGPU) { StreamReader streamReader = new StreamReader("../../kernel.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); platform = ComputePlatform.Platforms[0]; context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); program = new ComputeProgram(context, clSource); try { program.Build(null, null, null, IntPtr.Zero); kernel = program.CreateKernel("Main"); sceneBuffer = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.toCL()); rndBuffer = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms); cameraBuffer = new ComputeBuffer<Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL()); outputBuffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, screen.pixels); skydome = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.Skydome); kernel.SetMemoryArgument(0, outputBuffer); kernel.SetValueArgument(1, screen.width); kernel.SetValueArgument(2, screen.height); kernel.SetMemoryArgument(3, sceneBuffer); kernel.SetValueArgument(4, scene.toCL().Length); kernel.SetMemoryArgument(5, skydome); kernel.SetMemoryArgument(6, cameraBuffer); kernel.SetMemoryArgument(7, rndBuffer); } catch (ComputeException e) { Console.WriteLine("Error in kernel code: {0}", program.GetBuildLog(context.Devices[0])); Console.ReadLine(); useGPU = false; } } else { return; } }
/// <summary> /// OpenCLでの共役勾配法を生成する /// </summary> /// <param name="count">要素数</param> /// <param name="maxNonZeroCount"></param> /// <param name="_minIteration"></param> /// <param name="_maxIteration"></param> /// <param name="_allowableResidual"></param> public ConjugateGradientSingleGpu(int count, int maxNonZeroCount, int _minIteration, int _maxIteration, double allowableResidual) : base(count, maxNonZeroCount, _minIteration, _maxIteration, allowableResidual) { // プラットフォームを取得 var platform = ComputePlatform.Platforms[0]; // コンテキストを作成 var context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // 利用可能なデバイス群を取得 var devices = context.Devices; var device = devices[0]; globalWorkSize = new long[] { this.Count }; localWorkSize = new long[] { device.MaxWorkItemSizes[0] }; // キューを作成 queue = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None); // バッファーを作成 bufferA = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, this.A.Elements.Length); bufferColumnIndeces = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, this.A.ColumnIndeces.Length); bufferNonzeroCounts = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly, this.A.NonzeroCounts.Length); // バッファーを作成 bufferB = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadOnly, this.b.Length); bufferX = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.x.Length); bufferAp = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count); bufferP = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count); bufferR = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count); // 計算に使うバッファーを作成 bufferVector = new ComputeBuffer<double>(context, ComputeMemoryFlags.ReadWrite, this.Count); answerForReduction = new double[1]; // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.Mgcg); // ビルドしてみて try { string realString = "double"; program.Build(devices, string.Format(" -D REAL={0} -D MAX_NONZERO_COUNT={1}", realString, this.A.MaxNonzeroCountPerRow), null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // 例外を投げる throw new ApplicationException(program.GetBuildLog(devices[0]), ex); } // カーネルを作成 addVectorVector = program.CreateKernel("AddVectorVector"); multiplyVectorVector = program.CreateKernel("MultiplyVectorVector"); reductionSum = program.CreateKernel("ReductionSum"); reductionMax = program.CreateKernel("ReductionMaxAbsolute"); matrix_x_Vector = program.CreateKernel("Matrix_x_Vector"); // バッファーは最大非ゼロ要素数の半分 bufferSizeOfVectorOnMatrixMultiplying = this.A.MaxNonzeroCountPerRow / 2; }