//functions public static void initScan(OpenCLManager OCLManager, CommandQueue cqCommandQueue, int numDataShift, int chunkSize) { Debug.Log(" ...loading Scan.cl and creating scan program then build it\n"); Program cpProgram; for (int i = 0; i < WORKGROUP_SIZE_AVAILABLE.Length; i++) { OCLManager.BuildOptions = "-D WORKGROUP_SIZE=" + WORKGROUP_SIZE_AVAILABLE[i]; OCLManager.Defines = ""; try{ TextAsset srcKernel = Resources.Load("OclKernel/Scan") as TextAsset; cpProgram = OCLManager.CompileSource(srcKernel.text); // TODO : seperate 4 file to reuse binary srcKernel = null; } catch (OpenCLBuildException e) { string log = "CL Kernel Error: "; for (int j = 0; j < e.BuildLogs.Count; j++) { log += e.BuildLogs[j]; } Debug.LogError(log); throw; return; } ckScanExclusiveLocal1Array[i] = cpProgram.CreateKernel("scanExclusiveLocal1"); ckScanExclusiveLocal2Array[i] = cpProgram.CreateKernel("scanExclusiveLocal2"); ckUniformUpdateArray[i] = cpProgram.CreateKernel("uniformUpdate"); } cpProgram = null; /* TODO : check * Debug.Log( " ...checking minimum supported workgroup size\n"); * //Check for work group size * cl_device_id device; * uint szScanExclusiveLocal1, szScanExclusiveLocal2, szUniformUpdate; * ciErrNum = clGetCommandQueueInfo(cqParamCommandQue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL); * ciErrNum |= clGetKernelWorkGroupInfo(ckScanExclusiveLocal1, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(uint), &szScanExclusiveLocal1, NULL); * ciErrNum |= clGetKernelWorkGroupInfo(ckScanExclusiveLocal2, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(uint), &szScanExclusiveLocal2, NULL); * ciErrNum |= clGetKernelWorkGroupInfo(ckUniformUpdate, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(uint), &szUniformUpdate, NULL); * * if( (szScanExclusiveLocal1 < WORKGROUP_SIZE) || (szScanExclusiveLocal2 < WORKGROUP_SIZE) || (szUniformUpdate < WORKGROUP_SIZE) ){ * Debug.Log("ERROR: Minimum work-group size %u required by this application is not supported on this device.\n", WORKGROUP_SIZE); * return false; * } */ Debug.Log(" ...allocating internal buffers\n"); // allocate offset data OFS_DATA_SIZE = (1 << (numDataShift - 9)); // (max_voxel_num/512) ofsDataArray = new int[OFS_DATA_SIZE + SUM_DATA_SIZE]; hofsDataArray = GCHandle.Alloc(ofsDataArray, GCHandleType.Pinned); d_Buffer = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, (BUFFER_LENGTH + OFS_DATA_SIZE + SUM_DATA_SIZE) * sizeof(uint)); }
private void InitializeOpenCL() { if (OpenCL.NumberOfPlatforms == 0) { MessageBox.Show("OpenCL не поддерживается вашей системой!"); Application.Exit(); } manager = new OpenCLManager(); manager.AttemptUseBinaries = true; manager.AttemptUseSource = true; manager.RequireImageSupport = false; manager.BuildOptions = ""; manager.CreateDefaultContext(0, DeviceType.ALL); // Компиляция OpenCL кода program = manager.CompileSource(Properties.Resources.DVR); kernel = program.CreateKernel("DVR"); }
/// <summary> /// Generates the OpenCL code and creates the kernel out of it. /// </summary> /// <param name="oclManager">The OpenCL manager to use.</param> /// <param name="keyTranslator">The KeyTranslator to use. This is important for mapping the key movements to code.</param> /// <returns>The Kernel</returns> public Kernel GetBruteforceKernel(OpenCLManager oclManager, IKeyTranslator keyTranslator) { //caching: if (keyTranslatorOfCode == keyTranslator) { return(openCLKernel); } try { var program = oclManager.CompileSource(CreateOpenCLBruteForceCode(keyTranslator)); //keySearcher.GuiLogMessage(string.Format("Using OpenCL with (virtually) {0} threads.", keyTranslator.GetOpenCLBatchSize()), NotificationLevel.Info); openCLKernel = program.CreateKernel("bruteforceKernel"); return(openCLKernel); } catch (Exception ex) { throw new Exception(Resources.An_error_occured_when_trying_to_compile_OpenCL_code__ + ex.Message); } }
//////////////////////////////////////////////////////////////////////////////// // initialize marching cubes //////////////////////////////////////////////////////////////////////////////// public static bool InitMC() { try{ OCLManager = oclManager.ActiveOclMan; OCLComQueue = oclManager.ActiveOclCQ; try { oclScanLaucher.initScan(OCLManager, OCLComQueue, VOLUME_VOXEL_X_LEN_SHIFT + VOLUME_VOXEL_Y_LEN_SHIFT + VOLUME_VOXEL_Z_LEN_SHIFT, CHUNK_VOXEL_NUM); } catch (Exception e) { string log = "[OCLLOG]Kernel Error: "; OpenCLBuildException eocl = e as OpenCLBuildException; if (eocl != null) { for (int i = 0; i < eocl.BuildLogs.Count; i++) { log += eocl.BuildLogs[i]; } } else { log += e.Message; } Debug.LogError(log); throw; } ImageFormat imageFormat = new ImageFormat(CH_ORDER, ChannelType.UNORM_INT8); bImageFormatSupported = (!OCLComQueue.Device.Name.Contains("RV7") /*!bRV7xxGpu*/ && OCLComQueue.Context.SupportsImageFormat(MemFlags.READ_ONLY, MemObjectType.IMAGE3D, imageFormat.ChannelOrder, imageFormat.ChannelType)); //bImageFormatSupported = false; OCLManager.BuildOptions = "-cl-mad-enable"; OCLManager.Defines = ""; Program cpProgram = null; while (true) { try { string mcKernelPathName = bImageFormatSupported ? "OclKernel/marchingCubes_kernel_img" : ("OclKernel/marchingCubes_kernel_u" + VOXEL_SIZE + "b"); TextAsset srcKernel = Resources.Load(mcKernelPathName) as TextAsset; Debug.Log("[OCLLOG]Build kernel:" + mcKernelPathName); cpProgram = OCLManager.CompileSource(srcKernel.text); srcKernel = null; } catch (Exception e) { string log = "[OCLLOG]Kernel Error: "; OpenCLBuildException eocl = e as OpenCLBuildException; if (eocl != null) { for (int i = 0; i < eocl.BuildLogs.Count; i++) { log += eocl.BuildLogs[i]; } } else { log += e.Message; } Debug.LogError(log); if (bImageFormatSupported) { bImageFormatSupported = false; Debug.Log("[OCLLOG]Try to build kernel without img support:"); continue; } throw; } break; } classifyVoxelKernel = cpProgram.CreateKernel("classifyVoxel"); compactVoxelsKernel = cpProgram.CreateKernel("compactVoxels"); generateTriangles2Kernel = cpProgram.CreateKernel("generateTriangles2_vec3"); cpProgram = null; Debug.Log("[OCLLOG]All kernels are ready."); if (bImageFormatSupported) { d_volume = OCLManager.Context.CreateImage3D(MemFlags.READ_ONLY | MemFlags.ALLOC_HOST_PTR, imageFormat, VOLUME_VOXEL_X_LEN_REAL, VOLUME_VOXEL_Y_LEN_REAL, VOLUME_VOXEL_Z_LEN_REAL, 0, 0, IntPtr.Zero); } else { isoValue = 128f; d_volume = OCLManager.Context.CreateBuffer(MemFlags.READ_ONLY | MemFlags.ALLOC_HOST_PTR, VOLUME_VOXEL_X_LEN_REAL * VOLUME_VOXEL_Y_LEN_REAL * VOLUME_VOXEL_Z_LEN_REAL * VOXEL_SIZE); } // create VBOs --- now use 3 float, nvidia use 4 float originally. Vector3[] posArray = new Vector3[MAX_VERTS]; Vector2[] norm01Array = new Vector2[MAX_VERTS]; Vector2[] norm2tArray = new Vector2[MAX_VERTS]; hPosArray = GCHandle.Alloc(posArray, GCHandleType.Pinned); hNorm01Array = GCHandle.Alloc(norm01Array, GCHandleType.Pinned); hNorm2tArray = GCHandle.Alloc(norm2tArray, GCHandleType.Pinned); d_pos = OCLManager.Context.CreateBuffer(MemFlags.WRITE_ONLY | MemFlags.USE_HOST_PTR, MAX_VERTS * sizeof(float) * 3, hPosArray.AddrOfPinnedObject()); d_norm01 = OCLManager.Context.CreateBuffer(MemFlags.WRITE_ONLY | MemFlags.USE_HOST_PTR, MAX_VERTS * sizeof(float) * 2, hNorm01Array.AddrOfPinnedObject()); d_norm2t = OCLManager.Context.CreateBuffer(MemFlags.WRITE_ONLY | MemFlags.USE_HOST_PTR, MAX_VERTS * sizeof(float) * 2, hNorm2tArray.AddrOfPinnedObject()); // allocate device memory uint memSize = sizeof(uint) * MAX_VOXELS; d_voxelVerts = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, memSize); d_voxelVertsScan = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, memSize); d_voxelOccupied = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, memSize); d_voxelOccupiedScan = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, memSize); d_compVoxelArray = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, memSize); oclMCStatus = 1; }catch { oclMCStatus = -1; Debug.LogError("[OCLLOG]OclMarchingCubes is not available."); return(false); } // Other const helper -- TODO : size should be computed according to IMG_FORMAT volumeZeroConst = new byte[CHUNK_VOXEL_NUM_REAL * 4]; // all zero numChunks = 0; return(true); }
//functions public static void initScan(OpenCLManager OCLManager, CommandQueue cqCommandQueue, int numDataShift, int chunkSize) { OCLManager.BuildOptions = ""; OCLManager.Defines = ""; Program cpProgram; try{ TextAsset srcKernel = Resources.Load("OclKernel/ScanLargeArrays_Kernels") as TextAsset; Debug.Log("[OCLLOG]Build kernel:Scan"); cpProgram = OCLManager.CompileSource(srcKernel.text); srcKernel = null; } catch (OpenCLBuildException e) { string log = "[OCLLOG]Kernel Error: "; for (int i = 0; i < e.BuildLogs.Count; i++) { log += e.BuildLogs[i]; } Debug.LogError(log); throw; //return; } ckScanLargeKernel = cpProgram.CreateKernel("ScanLargeArrays"); ckBlockAddiKernel = cpProgram.CreateKernel("blockAddition"); ckPrefixSumKernel = cpProgram.CreateKernel("prefixSum"); zeromemKernel = cpProgram.CreateKernel("zeromem"); cpProgram = null; MaxGroupSize = (int)cqCommandQueue.Device.MaxWorkGroupSize; int maxWorkItemSize = cqCommandQueue.Device.MaxWorkItemSizes[0].ToInt32(); int maxWorkItemSize1 = cqCommandQueue.Device.MaxWorkItemSizes[1].ToInt32(); int maxWorkItemSize2 = cqCommandQueue.Device.MaxWorkItemSizes[2].ToInt32(); Debug.Log("[OCLLOG]SCAN MaxGroup:" + MaxGroupSize + " MaxWorkItem:" + maxWorkItemSize + "," + maxWorkItemSize1 + "," + maxWorkItemSize2); if (cqCommandQueue.Device.Name.Contains("RV7")) { MaxGroupSize = maxWorkItemSize = 32; Debug.Log("[OCLLOG]SCAN RV7xx lower MaxGroup:" + MaxGroupSize + "MaxWorkItem:" + maxWorkItemSize); } #if UNITY_STANDALONE_OSX else { MaxGroupSize = maxWorkItemSize = maxWorkItemSize / 4; Debug.Log("[OCLLOG]SCAN Apple lower(/4) MaxGroup:" + MaxGroupSize + "MaxWorkItem:" + maxWorkItemSize); } #endif if (maxWorkItemSize > chunkSize) { maxWorkItemSize = chunkSize; } blockSize = 1; blockSizeShift = 0; while (blockSize < maxWorkItemSize) { blockSize <<= 1; blockSizeShift++; } //blockSize >>= 1;blockSizeShift--; blockSizeUnMask = ~(blockSize - 1); // compute buffer length and offset Mem buffBlockSum; Mem buffTmpOutput; bool bSumDataAllocated = false; blockSumBufferList.Add(null); // Add a placeholder for inputBuffer outBufferList.Add(null); // Add a placeholder if (numDataShift < blockSizeShift) { numDataShift = blockSizeShift; // at least 1 even if blockSizeShift >= numDataShift } do { numDataShift -= blockSizeShift; buffBlockSum = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, (1 << numDataShift) * sizeof(uint)); blockSumBufferList.Add(buffBlockSum); if (bSumDataAllocated == false) { OFS_DATA_SIZE = (1 << numDataShift); ofsDataArray = new int[OFS_DATA_SIZE + SUM_DATA_SIZE]; hofsDataArray = GCHandle.Alloc(ofsDataArray, GCHandleType.Pinned); buffTmpOutput = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, (OFS_DATA_SIZE + SUM_DATA_SIZE) * sizeof(uint)); bSumDataAllocated = true; } else { buffTmpOutput = OCLManager.Context.CreateBuffer(MemFlags.READ_WRITE, (1 << numDataShift) * sizeof(uint)); } outBufferList.Add(buffTmpOutput); }while(numDataShift > blockSizeShift); #if CL_DEBUG houtArray = GCHandle.Alloc(outArray, GCHandleType.Pinned); hsumArray = GCHandle.Alloc(sumArray, GCHandleType.Pinned); #endif }