/// <summary> /// Creates a new 1D texture from array memory /// </summary> /// <param name="kernel"></param> /// <param name="texName"></param> /// <param name="addressMode"></param> /// <param name="filterMode"></param> /// <param name="flags"></param> /// <param name="array"></param> public CudaTextureArray1D(CudaKernel kernel, string texName, CUAddressMode addressMode, CUFilterMode filterMode, CUTexRefSetFlags flags, CudaArray1D array) { _texref = new CUtexref(); res = DriverAPINativeMethods.ModuleManagement.cuModuleGetTexRef(ref _texref, kernel.CUModule, texName); Debug.WriteLine(String.Format("{0:G}, {1}: {2}, Texture name: {3}", DateTime.Now, "cuModuleGetTexRef", res, texName)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 0, addressMode); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFilterMode(_texref, filterMode); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFilterMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFlags(_texref, flags); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFlags", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFormat(_texref, array.ArrayDescriptor.Format, (int)array.ArrayDescriptor.NumChannels); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFormat", res)); if (res != CUResult.Success) { throw new CudaException(res); } _filtermode = filterMode; _flags = flags; _addressMode = addressMode; _format = array.ArrayDescriptor.Format; _size = array.Width; _numChannels = (int)array.ArrayDescriptor.NumChannels; _name = texName; _module = kernel.CUModule; _cufunction = kernel.CUFunction; _channelSize = CudaHelperMethods.GetChannelSize(array.ArrayDescriptor.Format); _dataSize = array.Width * array.ArrayDescriptor.NumChannels * _channelSize; _array = array; res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetArray(_texref, _array.CUArray, CUTexRefSetArrayFlags.OverrideFormat); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetArray", res)); if (res != CUResult.Success) { throw new CudaException(res); } //res = DriverAPINativeMethods.ParameterManagement.cuParamSetTexRef(kernel.CUFunction, CUParameterTexRef.Default, _texref); //Debug.WriteLine("{0:G}, {1}: {2}", DateTime.Now, "cuParamSetTexRef", res); //if (res != CUResult.Success) throw new CudaException(res); }
/// <summary> see CUDA doc; </summary> public static void ModuleLoad(out CUmodule module, string fname) { IntPtr _fname = Marshal.StringToHGlobalAnsi(fname); CUresult res = my.cuModuleLoad(out module, _fname); Marshal.FreeHGlobal(_fname); TestResult(res); }
//private double ComputeObj(float[] w, float[] alpha, Problem<SparseVec> sub_prob, float[] diag) //{ // double v = 0, v1=0; // int nSV = 0; // for (int i = 0; i < w.Length; i++) // { // v += w[i] * w[i]; // v1 += 0.5*w[i] * w[i]; // } // for (int i = 0; i < alpha.Length; i++) // { // sbyte y_i = (sbyte)sub_prob.Y[i]; // //original line // //v += alpha[i] * (alpha[i] * diag[GETI(y_i, i)] - 2); // v += alpha[i] * (alpha[i] * diag[y_i + 1] - 2); // v1 += 0.5* alpha[i] * (alpha[i] * diag[y_i + 1] - 2); // if (alpha[i] > 0) ++nSV; // } // v = v / 2; // // Debug.WriteLine("Objective value = {0}", v); // // Debug.WriteLine("nSV = {0}", nSV); // return v; //} protected void InitCudaModule() { cuda = new CUDA(0, true); cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName)); cuFuncDotProd = cuda.GetModuleFunction(cudaProductKernelName); cuFuncSolver = cuda.GetModuleFunction(cudaSolveL2SVM); cuFuncUpdateW = cuda.GetModuleFunction(cudaUpdateW); }
public uint GetModuleGlobalBytes(CUmodule mod, string globalName) { CUdeviceptr dptr = new CUdeviceptr(); SizeT bytes = 0; this.LastError = CUDADriver.cuModuleGetGlobal(ref dptr, ref bytes, mod, globalName); return(bytes); }
/// <summary> see CUDA doc; </summary> public static void ModuleGetFunction(out CUfunction func, CUmodule module, string name) { IntPtr _name = Marshal.StringToHGlobalAnsi(name); CUresult res = my.cuModuleGetFunction(out func, module, _name); Marshal.FreeHGlobal(_name); TestResult(res); }
void InitKernels() { var path = @"..\..\..\CudaParticleSimulation\kernel.ptx"; if (!System.IO.File.Exists(path)) { Debug.Error(path + " doesnt exists"); return; } var cntxt = new CudaContext(); uint deviceCount = 1; var devices = new CUdevice[50]; OpenGLNativeMethods.CUDA3.cuGLGetDevices(ref deviceCount, devices, 50, CUGLDeviceList.All); var context = cntxt.Context; OpenGLNativeMethods.CUDA3.cuGLCtxCreate(ref context, CUCtxFlags.BlockingSync, devices[0]); Debug.Info("Found " + deviceCount + " OpenGL devices associated with current context"); CUmodule cumodule = cntxt.LoadModule(path); updateParticles = new CudaKernel("updateParticles", cumodule, cntxt); updateParticles.BlockDimensions = new dim3(16 * 16, 1, 1); updateParticles.GridDimensions = new dim3(16 * 16, 1, 1); generateParticles = new CudaKernel("generateParticles", cumodule, cntxt); generateParticles.BlockDimensions = updateParticles.BlockDimensions; generateParticles.GridDimensions = updateParticles.GridDimensions; var random = new Random(); var randomFloats = new float[1000]; for (int i = 0; i < randomFloats.Length; i++) { randomFloats[i] = (float)random.NextDouble(); } generateParticles.SetConstantVariable("randomFloats", randomFloats); // CudaGraphicsInteropResourceCollection resources.Clear(); foreach (var h in renderer.particleMesh.allBufferHandles) { var resoure = new CudaOpenGLBufferInteropResource(h, CUGraphicsRegisterFlags.None, CUGraphicsMapResourceFlags.None); resources.Add(resoure); } randomIndex_D = 0; randomIndex_D.CopyToDevice(0); }
public GrabCutGMM() { ctx = new CudaContext(CudaContext.GetMaxGflopsDeviceId(), false); //Load Kernel image from resources string resName; if (IntPtr.Size == 8) { resName = "GrabCutGMM_x64.ptx"; } else { resName = "GrabCutGMM.ptx"; } string resNamespace = "GrabCutNPP"; string resource = resNamespace + "." + resName; Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(resource); if (stream == null) { throw new ArgumentException("Kernel not found in resources."); } byte[] kernel = new byte[stream.Length]; int bytesToRead = (int)stream.Length; while (bytesToRead > 0) { bytesToRead -= stream.Read(kernel, (int)stream.Position, bytesToRead); } CUmodule module = ctx.LoadModulePTX(kernel); GMMReductionKernelCreateGmmFlags = new CudaKernel("_Z18GMMReductionKernelILi4ELb1EEviPfiPK6uchar4iPhiiiPj", module, ctx); GMMReductionKernelNoCreateGmmFlags = new CudaKernel("_Z18GMMReductionKernelILi4ELb0EEviPfiPK6uchar4iPhiiiPj", module, ctx); GMMFinalizeKernelInvertSigma = new CudaKernel("_Z17GMMFinalizeKernelILi4ELb1EEvPfS0_ii", module, ctx); GMMFinalizeKernelNoInvertSigma = new CudaKernel("_Z17GMMFinalizeKernelILi4ELb0EEvPfS0_ii", module, ctx); GMMcommonTerm = new CudaKernel("_Z13GMMcommonTermiPfi", module, ctx); DataTermKernel = new CudaKernel("_Z14DataTermKernelPiiiPKfiPK6uchar4iPKhiii", module, ctx); GMMAssignKernel = new CudaKernel("_Z15GMMAssignKerneliPKfiPK6uchar4iPhiii", module, ctx); GMMFindSplit = new CudaKernel("_Z12GMMFindSplitP10GMMSplit_tiPfi", module, ctx); GMMDoSplit = new CudaKernel("_Z10GMMDoSplitPK10GMMSplit_tiPfiPK6uchar4iPhiii", module, ctx); MeanEdgeStrengthReductionKernel = new CudaKernel("_Z31MeanEdgeStrengthReductionKerneliiPf", module, ctx); MeanEdgeStrengthFinalKernel = new CudaKernel("_Z27MeanEdgeStrengthFinalKernelPfi", module, ctx); EdgeCuesKernel = new CudaKernel("_Z14EdgeCuesKernelfPKfPiS1_S1_S1_S1_S1_S1_S1_iiii", module, ctx); SegmentationChangedKernel = new CudaKernel("_Z25SegmentationChangedKernelPiPhS0_iii", module, ctx); downscaleKernel1 = new CudaKernel("_Z18downscaleKernelBoxI6uchar4EvPT_iiiPKS1_iii", module, ctx); downscaleKernel2 = new CudaKernel("_Z18downscaleKernelMaxIhEvPT_iiiPKS0_iii", module, ctx); upsampleAlphaKernel = new CudaKernel("_Z19upsampleAlphaKernelPhS_iiii", module, ctx); GMMFinalizeKernelInvertSigma.SetConstantVariable("det_indices", det_indices); GMMFinalizeKernelInvertSigma.SetConstantVariable("inv_indices", inv_indices); GMMFinalizeKernelNoInvertSigma.SetConstantVariable("det_indices", det_indices); GMMFinalizeKernelNoInvertSigma.SetConstantVariable("inv_indices", inv_indices); }
public override void Init() { cuda = new CUDA(0, true); var cuCtx = cuda.CreateContext(0, CUCtxFlags.MapHost); cuda.SetCurrentContext(cuCtx); cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName)); cuFunc = cuda.GetModuleFunction(cudaEvaluatorKernelName); cuFuncSign = cuda.GetModuleFunction(cudaSignKernelName); //reserved memory based on dimension of support vector //svVector = new float[TrainedModel.SupportElements[0].Count]; stream = cuda.CreateStream(); //memSvSize = (uint)(TrainedModel.SupportElements[0].Count * sizeof(float)); memSvSize = (uint)(TrainedModel.SupportElements[0].Dim * sizeof(float)); //allocates memory for buffers svVecIntPtrs[0] = cuda.AllocateHost(memSvSize); svVecIntPtrs[1] = cuda.AllocateHost(memSvSize); mainVecPtr = cuda.CopyHostToDeviceAsync(svVecIntPtrs[0], memSvSize, stream); cuSVTexRef = cuda.GetModuleTexture(cuModule, "svTexRef"); cuda.SetTextureFlags(cuSVTexRef, 0); cuda.SetTextureAddress(cuSVTexRef, mainVecPtr, memSvSize); //todo: copy labels and alphas float[] svLabels = new float[TrainedModel.SupportElements.Length]; float[] svAlphas = new float[TrainedModel.SupportElements.Length]; Parallel.For(0, TrainedModel.SupportElementsIndexes.Length, i => { int idx = TrainedModel.SupportElementsIndexes[i]; svLabels[i] = TrainedModel.Y[i]; //svLabels[i] = TrainningProblem.Labels[idx]; svAlphas[i] = TrainedModel.Alpha[idx]; }); //for (int i = 0; i < TrainedModel.SupportElementsIndexes.Length; i++) //{ // int idx = TrainedModel.SupportElementsIndexes[i]; // svLabels[i]= TrainningProblem.Labels[idx]; // svAlphas[i] = TrainedModel.Alpha[idx]; //} labelsPtr = cuda.CopyHostToDevice(svLabels); alphasPtr = cuda.CopyHostToDevice(svAlphas); IsInitialized = true; }
public static void InitKernels() { CudaContext cntxt = new CudaContext(); //CUmodule cumodule = cntxt.LoadModule(@"C:\Users\Michał\Documents\Visual Studio 2013\Projects\cuda\Projekt cuda\Projekt cuda\Debug\kernel.ptx"); CUmodule cumodule = cntxt.LoadModule(@"D:\Grafika\cuda\Projekt cuda\Projekt cuda\Debug\kernel.ptx"); addWithCuda = new CudaKernel("_Z6kerneliiPi", cumodule, cntxt); }
public static void Initialize() { CUmodule module = _context.LoadModulePTX("Framework/Algorithms/Kernels/FlowMapUncertain.ptx"); //__global__ void FlowMapStep(cudaTextureObject_t mapT0, float* mapT1, const int width, const int height, const int numMembers, const float particleDensity, /*float timeScale, */ float stepSize, float minDensity) _advectParticlesKernel = new CudaKernel("FlowMapStep", module, _context); _copyMapDataKernel = new CudaKernel("FlowMapUpdate", module, _context); //_advectParticlesKernel = _context.LoadKernelPTX("Framework/Algorithms/Kernels/FlowMapUncertain.ptx", "FlowMapStep", new CUJITOption[] { }, null); }
public PatchTracker(int aMaxWidth, int aMaxHeight, List <int> aTileSizes, List <int> aMaxShifts, List <int> aLevels, CudaContext ctx) { forward = new CudaFFTPlanMany[aLevels.Count]; backward = new CudaFFTPlanMany[aLevels.Count]; //Allocate FFT plans SizeT oldFFTSize = 0; for (int i = 0; i < aTileSizes.Count; i++) { SizeT memFFT = InitFFT(i, aMaxWidth / aLevels[i], aMaxHeight / aLevels[i], aTileSizes[i], aMaxShifts[i]); if (memFFT > oldFFTSize) { oldFFTSize = memFFT; } } FTTBufferSize = oldFFTSize; //find maximum for allocations: for (int i = 0; i < aTileSizes.Count; i++) { currentWidth = aMaxWidth / aLevels[i]; currentHeight = aMaxHeight / aLevels[i]; currentTileSize = aTileSizes[i]; currentMaxShift = aMaxShifts[i]; int currentMaxPixelsShiftImage = (2 * currentMaxShift + 1) * (2 * currentMaxShift + 1) * CurrentBlockCountX * CurrentBlockCountY; maxPixelsShiftImage = Math.Max(currentMaxPixelsShiftImage, maxPixelsShiftImage); int tilePixels = CurrentBlockSize * CurrentBlockSize * CurrentBlockCountX * CurrentBlockCountY; maxPixelsImage = Math.Max(tilePixels, maxPixelsImage); int fftWidth = CurrentBlockSize / 2 + 1; int fftPixels = fftWidth * CurrentBlockSize * CurrentBlockCountX * CurrentBlockCountY; maxPixelsFFT = Math.Max(fftPixels, maxPixelsFFT); maxWidth = Math.Max(aMaxWidth / aLevels[i], maxWidth); maxHeight = Math.Max(aMaxHeight / aLevels[i], maxHeight); maxBlockCountX = Math.Max(maxBlockCountX, CurrentBlockCountX); maxBlockCountY = Math.Max(maxBlockCountY, CurrentBlockCountY); } CUmodule mod = ctx.LoadModule("kernel.ptx"); conjKernel = new conjugateComplexMulKernel(ctx, mod); convertToTiles = new convertToTilesOverlapKernel(ctx, mod); convertToTilesBorder = new convertToTilesOverlapBorderKernel(ctx, mod); squaredSumKernel = new squaredSumKernel(ctx, mod); boxFilterXKernel = new boxFilterWithBorderXKernel(ctx, mod); boxFilterYKernel = new boxFilterWithBorderYKernel(ctx, mod); normalizedCCKernel = new normalizedCCKernel(ctx, mod); findMinimumKernel = new findMinimumKernel(ctx, mod); }
private void InitCudaModule() { string modluePath = Path.Combine(Environment.CurrentDirectory, cudaModuleName); if (!File.Exists(modluePath)) { throw new ArgumentException("Failed access to cuda module" + modluePath); } cuModule = cuda.LoadModule(modluePath); cuFuncDense = cuda.GetModuleFunction(funcName); }
public AccumulateImagesSuperResKernel(CudaContext ctx, CUmodule module) : base("accumulateImagesSuperRes", module, ctx, BlockSizeX, BlockSizeY) { /* * accumulateImages( * unsigned short* __restrict__ dataIn, * float3 * __restrict__ imgOut, * float3 * __restrict__ totalWeights, * const float3 * __restrict__ certaintyMask, * const float3* __restrict__ kernelParam, * const float2* __restrict__ shifts, * float maxVal, int dimX, int dimY, int strideOut) */ }
private void InitializeCUDA() { context = new CudaContext(CudaContext.GetMaxGflopsDevice(), graphicsDevice.ComPointer, CUCtxFlags.SchedAuto, CudaContext.DirectXVersion.D3D11); module = context.LoadModulePTX(@"Kernels\kernel.ptx"); kernelPositionWeightNoiseCube = new CudaKernel("position_weight_noise_cube", module, context); kernelNormalAmbient = new CudaKernel("normal_ambient", module, context); kernelMarchingCubesCases = new CudaKernel("marching_cubes_cases", module, context); kernelMarchingCubesVertices = new CudaKernel("marching_cubes_vertices", module, context); kernelPositionWeightNoiseCubeWarp = new CudaKernel("position_weight_noise_cube_warp", module, context); kernelPositionWeightFormula = new CudaKernel("position_weight_formula", module, context); prefixScan = new CUDAPrefixScan(module, context); }
public JittedModule(String ptx, CUmodule handle) { CudaDriver.Ensure(); Ptx = ptx.AssertNotNull(); Handle = handle.AssertThat(h => h.IsNotNull); var match = Regex.Match(Ptx, @"\.entry\s*(?<entrypoint>\w*?)\s*\("); Functions = match.Unfoldi(m => m.NextMatch(), m => m.Success).Select(m => { var name = match.Result("${entrypoint}"); var hfunc = nvcuda.cuModuleGetFunction(this, name); return(new JittedFunction(hfunc, name)); }).ToReadOnly(); }
private CudaModule( ClrAssembly sourceAssembly, ModuleBuilder intermediateModule, LLVMTargetMachineRef targetMachine, CUmodule compiledModule, string entryPointName, CudaContext context) { this.SourceAssembly = sourceAssembly; this.IntermediateModule = intermediateModule; this.TargetMachine = targetMachine; this.TargetData = LLVM.CreateTargetDataLayout(TargetMachine); this.CompiledModule = compiledModule; this.EntryPointName = entryPointName; this.Context = context; }
private void InitCuda() { cuda = new CUDA(0, true); var cuCtx = cuda.CreateContext(0, CUCtxFlags.MapHost); cuda.SetCurrentContext(cuCtx); cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName)); cuFuncEval = cuda.GetModuleFunction(cudaEvaluatorKernelName); cuFuncReduce = cuda.GetModuleFunction(cudaReduceKernelName); }
public PreAlignment(NPPImage_32fC1 img, CudaContext ctx) { width = img.WidthRoi; height = img.HeightRoi; imgToTrackRotated = new NPPImage_32fC1(width, height); CUmodule mod = ctx.LoadModule("kernel.ptx"); int fftWidth = width / 2 + 1; conjKernel = new conjugateComplexMulKernel(ctx, mod); fourierFilterKernel = new fourierFilterKernel(ctx, mod); fftshiftKernel = new fftshiftKernel(ctx, mod); squaredSumKernel = new squaredSumKernel(ctx, mod); boxFilterXKernel = new boxFilterWithBorderXKernel(ctx, mod); boxFilterYKernel = new boxFilterWithBorderYKernel(ctx, mod); normalizedCCKernel = new normalizedCCKernel(ctx, mod); findMinimumKernel = new findMinimumKernel(ctx, mod); int n = 2; int[] dims = new int[] { height, width }; int batches = 1; int[] inembed = new int[] { 1, imgToTrackRotated.Pitch / 4 }; int[] onembed = new int[] { 1, fftWidth }; int idist = height * imgToTrackRotated.Pitch / 4; int odist = height * fftWidth; int istride = 1; int ostride = 1; cufftHandle handleForward = cufftHandle.Create(); cufftHandle handleBackward = cufftHandle.Create(); SizeT sizeForward = new SizeT(); SizeT sizeBackward = new SizeT(); forward = new CudaFFTPlanMany(handleForward, n, dims, batches, cufftType.R2C, inembed, istride, idist, onembed, ostride, odist, ref sizeForward, false); backward = new CudaFFTPlanMany(handleBackward, n, dims, batches, cufftType.C2R, onembed, ostride, odist, inembed, istride, idist, ref sizeBackward, false); FFTBufferSize = sizeForward > sizeBackward ? sizeForward : sizeBackward; }
protected void InitCudaModule() { int deviceNr = 0; cuda = new CUDA(deviceNr, true); cuCtx = cuda.CreateContext(deviceNr, CUCtxFlags.MapHost); string modluePath = Path.Combine(Environment.CurrentDirectory, cudaModuleName); if (!File.Exists(modluePath)) { throw new ArgumentException("Failed access to cuda module" + modluePath); } cuModule = cuda.LoadModule(modluePath); cuFunc = cuda.GetModuleFunction(cudaProductKernelName); }
public HuForceDirectedLayout(int steps) { #if !DEBUG try { #endif CUDADriver.cuInit(0); dev = new CUdevice(); CUDADriver.cuDeviceGet(ref dev, 0); ctx = new CUcontext(); CUDADriver.cuCtxCreate(ref ctx, 0, dev); mod = new CUmodule(); CUDADriver.cuModuleLoad(ref mod, "BarnesHut.cubin"); prop = new CUDeviceProperties(); CUDADriver.cuDeviceGetProperties(ref prop, dev); int version = 0; CUDADriver.cuDriverGetVersion(ref version); string caps = ""; GASS.CUDA.CUDARuntime.cudaRuntimeGetVersion(ref version); caps += "\tClock rate = " + prop.clockRate / 1000000 + " MHz\n"; caps += "\tMemory size = " + prop.totalConstantMemory / 1024 + " KB\n"; caps += "\tThreads per block = " + prop.maxThreadsPerBlock + "\n"; caps += "\tWarp size = " + prop.SIMDWidth + "\n"; caps += "\tCUDA version = " + version + "\n"; Logger.AddMessage(LogEntryType.Info, "Successfully initialized CUDA GPU computation\n" + caps); #if !DEBUG } catch (Exception ex) { Logger.AddMessage(LogEntryType.Warning, "CUDA not available, falling back to CPU. Exception was: " + ex.Message); CUDAEnabled = false; } #endif }
private void InitCudaModule() { cuda = new CUDA(0, true); cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName)); cuFuncDotProd = cuda.GetModuleFunction(cudaProductKernelName); cuFuncGradFinalize = cuda.GetModuleFunction(cudaGradFinalizeName); cuFuncComputeBBstep = cuda.GetModuleFunction(cudaComputeBBStepName); cuFuncObjSquareW = cuda.GetModuleFunction(cudaObjWName); cuFuncObjSquareAlpha = cuda.GetModuleFunction(cudaObjAlphaName); cuFuncUpdateW = cuda.GetModuleFunction(cudaUpdateW); cuFuncUpdateAlpha = cuda.GetModuleFunction(cudaUpdateAlphaName); cuFuncLinPart = cuda.GetModuleFunction(cudaLinPartName); }
public override void Init() { int N = DataGenerator.InputCount; CudaContext cntxt = new CudaContext(); CUmodule cumodule = cntxt.LoadModule(@"kernel.cubin"); myKernel = new CudaKernel("proccess", cumodule, cntxt); //myKernel.GridDimensions = (N + 255) / 256; //myKernel.BlockDimensions = Math.Min(N, 256); myKernel.GridDimensions = (N + 255) / 256; myKernel.BlockDimensions = 256; // https://softwarehut.com/blog/general-purpose-computing-gpu-net-world-part-1/ //https://stackoverflow.com/questions/2392250/understanding-cuda-grid-dimensions-block-dimensions-and-threads-organization-s //myKernel.GridDimensions = new dim3(1, 1, 1); //myKernel.BlockDimensions = new dim3(16, 16); // init input parameters input1_dev = new CudaDeviceVariable <int>(DataGenerator.In1.Length); input2_dev = new CudaDeviceVariable <int>(DataGenerator.In2.Length); input3_dev = new CudaDeviceVariable <double>(DataGenerator.In3.Length); input4_dev = new CudaDeviceVariable <byte>(DataGenerator.In4_3_bytes.Length); result_dev = new CudaDeviceVariable <byte>(resultsBytes.Length); resultCalc_dev = new CudaDeviceVariable <double>(calculatables.Length); // copy input parameters input1_dev.CopyToDevice(DataGenerator.In1); input2_dev.CopyToDevice(DataGenerator.In2); input3_dev.CopyToDevice(DataGenerator.In3); input4_dev.CopyToDevice(DataGenerator.In4_3_bytes); // init output parameters //result_dev = new CudaDeviceVariable<bool>(results.Length); //myKernel.SetConstantVariable("width", DataGenerator.Width); //myKernel.SetConstantVariable("inputCount", N); //myKernel.SetConstantVariable("height", DataGenerator.Height); }
public static CUdeviceptr GetGlobalAddress(LLVMValueRef value, CUmodule module, out SizeT size) { if (value.IsAConstantExpr().Pointer != IntPtr.Zero && value.GetConstOpcode() == LLVMOpcode.LLVMBitCast) { return(GetGlobalAddress(value.GetOperand(0), module, out size)); } var ptr = new CUdeviceptr(); size = new SizeT(); var result = ManagedCuda.DriverAPINativeMethods.ModuleManagement.cuModuleGetGlobal_v2( ref ptr, ref size, module, GetGlobalName(value)); if (result == ManagedCuda.BasicTypes.CUResult.Success) { return(ptr); } else { throw new CudaException(result); } }
public OpticalFlow(int width, int height, CudaContext ctx) { CUmodule mod = ctx.LoadModulePTX("opticalFlow.ptx"); warpingKernel = new WarpingKernel(ctx, mod); createFlowFieldFromTiles = new CreateFlowFieldFromTiles(ctx, mod); computeDerivativesKernel = new ComputeDerivativesKernel(ctx, mod); lukasKanade = new LukasKanadeKernel(ctx, mod); d_tmp = new NPPImage_32fC1(width, height); d_Ix = new NPPImage_32fC1(width, height); d_Iy = new NPPImage_32fC1(width, height); d_Iz = new NPPImage_32fC1(width, height); d_flow = new NPPImage_32fC2(width, height); buffer = new CudaDeviceVariable <byte>(d_tmp.MeanStdDevGetBufferHostSize() * 3); mean = new CudaDeviceVariable <double>(1); std = new CudaDeviceVariable <double>(1); d_filterX = new float[] { -0.25f, 0.25f, -0.25f, 0.25f }; d_filterY = new float[] { -0.25f, -0.25f, 0.25f, 0.25f }; d_filterT = new float[] { 0.25f, 0.25f, 0.25f, 0.25f }; }
public KernelModule(CudaContext context, string path) { _context = context; _module = _context.LoadModule(path); }
public static void For(int number_of_threads, SimpleKernel simpleKernel) { if (Campy.Utils.Options.IsOn("import-only")) { JustImport(simpleKernel); return; } GCHandle handle1 = default(GCHandle); GCHandle handle2 = default(GCHandle); try { unsafe { System.Reflection.MethodInfo method_info = simpleKernel.Method; String kernel_assembly_file_name = method_info.DeclaringType.Assembly.Location; Mono.Cecil.ModuleDefinition md = Campy.Meta.StickyReadMod.StickyReadModule( kernel_assembly_file_name, new ReaderParameters { ReadSymbols = true }); MethodReference method_reference = md.ImportReference(method_info); CUfunction ptr_to_kernel = default(CUfunction); CUmodule module = default(CUmodule); Campy.Utils.TimePhase.Time("compile ", () => { IntPtr image = Singleton._compiler.Compile(method_reference, simpleKernel.Target); module = Singleton._compiler.SetModule(method_reference, image); Singleton._compiler.StoreJits(module); ptr_to_kernel = Singleton._compiler.GetCudaFunction(method_reference, module); }); RUNTIME.BclCheckHeap(); BUFFERS buffer = Singleton.Buffer; IntPtr kernel_target_object = IntPtr.Zero; Campy.Utils.TimePhase.Time("deep copy ", () => { int count = simpleKernel.Method.GetParameters().Length; var bb = Singleton._compiler.GetBasicBlock(method_reference); if (bb.HasThis) { count++; } if (!(count == 1 || count == 2)) { throw new Exception("Expecting at least one parameter for kernel."); } if (bb.HasThis) { kernel_target_object = buffer.AddDataStructure(simpleKernel.Target); } }); Campy.Utils.TimePhase.Time("kernel cctor set up", () => { // For each cctor, run on GPU. // Construct dependency graph of methods. List <MethodReference> order_list = COMPILER.Singleton.ConstructCctorOrder(); // Finally, call cctors. foreach (var bb in order_list) { if (Campy.Utils.Options.IsOn("trace-cctors")) { System.Console.WriteLine("Executing cctor " + bb.FullName); } var cctor = Singleton._compiler.GetCudaFunction(bb, module); var res = CUresult.CUDA_SUCCESS; Campy.Utils.CudaHelpers.MakeLinearTiling(1, out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles); res = Cuda.cuLaunchKernel( cctor, tiles.x, tiles.y, tiles.z, // grid has one block. tile_size.x, tile_size.y, tile_size.z, // n threads. 0, // no shared memory default(CUstream), (IntPtr)IntPtr.Zero, (IntPtr)IntPtr.Zero ); CudaHelpers.CheckCudaError(res); res = Cuda.cuCtxSynchronize(); // Make sure it's copied back to host. CudaHelpers.CheckCudaError(res); } }); if (Campy.Utils.Options.IsOn("trace-cctors")) { System.Console.WriteLine("Done with cctors"); } Campy.Utils.TimePhase.Time("kernel call ", () => { IntPtr[] parm1 = new IntPtr[1]; IntPtr[] parm2 = new IntPtr[1]; parm1[0] = kernel_target_object; parm2[0] = buffer.New(BUFFERS.SizeOf(typeof(int))); IntPtr[] x1 = parm1; handle1 = GCHandle.Alloc(x1, GCHandleType.Pinned); IntPtr pointer1 = handle1.AddrOfPinnedObject(); IntPtr[] x2 = parm2; handle2 = GCHandle.Alloc(x2, GCHandleType.Pinned); IntPtr pointer2 = handle2.AddrOfPinnedObject(); IntPtr[] kp = new IntPtr[] { pointer1, pointer2 }; var res = CUresult.CUDA_SUCCESS; fixed(IntPtr * kernelParams = kp) { Campy.Utils.CudaHelpers.MakeLinearTiling(number_of_threads, out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles); //MakeLinearTiling(1, out dim3 tile_size, out dim3 tiles); res = Cuda.cuLaunchKernel( ptr_to_kernel, tiles.x, tiles.y, tiles.z, // grid has one block. tile_size.x, tile_size.y, tile_size.z, // n threads. 0, // no shared memory default(CUstream), (IntPtr)kernelParams, (IntPtr)IntPtr.Zero ); }
//public void SetConstantVariable(string name, CUdeviceptr value) { m_kernel.SetConstantVariable(name, value); } public MyCudaKernel(string kernelName, CUmodule module, CudaContext cuda, int GPU) { m_GPU = GPU; m_kernel = new CudaKernel(kernelName, module, cuda); MAX_THREADS = m_kernel.MaxThreadsPerBlock; }
/// <summary> see CUDA doc; </summary> public static void ModuleLoadData(out CUmodule module, IntPtr img) { CUresult res = my.cuModuleLoadData(out module, img); TestResult(res); }
/// <summary> /// Creates a new mipmapped texture from array memory. Allocates a new mipmapped array. /// </summary> /// <param name="kernel"></param> /// <param name="texName"></param> /// <param name="addressMode0"></param> /// <param name="addressMode1"></param> /// <param name="addressMode2"></param> /// <param name="filterMode"></param> /// <param name="flags"></param> /// <param name="descriptor"></param> /// <param name="numMipmapLevels"></param> /// <param name="maxAniso"></param> /// <param name="mipmapFilterMode"></param> /// <param name="mipmapLevelBias"></param> /// <param name="minMipmapLevelClamp"></param> /// <param name="maxMipmapLevelClamp"></param> public CudaTextureMipmappedArray(CudaKernel kernel, string texName, CUAddressMode addressMode0, CUAddressMode addressMode1, CUAddressMode addressMode2, CUFilterMode filterMode, CUTexRefSetFlags flags, CUDAArray3DDescriptor descriptor, uint numMipmapLevels, uint maxAniso, CUFilterMode mipmapFilterMode, float mipmapLevelBias, float minMipmapLevelClamp, float maxMipmapLevelClamp) { _maxAniso = maxAniso; _mipmapFilterMode = mipmapFilterMode; _mipmapLevelBias = mipmapLevelBias; _minMipmapLevelClamp = minMipmapLevelClamp; _maxMipmapLevelClamp = maxMipmapLevelClamp; _texref = new CUtexref(); res = DriverAPINativeMethods.ModuleManagement.cuModuleGetTexRef(ref _texref, kernel.CUModule, texName); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}, Texture name: {3}", DateTime.Now, "cuModuleGetTexRef", res, texName)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 0, addressMode0); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 1, addressMode1); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 2, addressMode2); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFilterMode(_texref, filterMode); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFilterMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFlags(_texref, flags); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFlags", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFormat(_texref, descriptor.Format, (int)descriptor.NumChannels); Debug.Write("");//Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFormat", res)); if (res != CUResult.Success) { throw new CudaException(res); } _filtermode = filterMode; _flags = flags; _addressMode0 = addressMode0; _addressMode1 = addressMode1; _addressMode2 = addressMode2; _arrayDescriptor = descriptor; _name = texName; _module = kernel.CUModule; _cufunction = kernel.CUFunction; _array = new CudaMipmappedArray(descriptor, numMipmapLevels); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetMipmappedArray(_texref, _array.CUMipmappedArray, CUTexRefSetArrayFlags.OverrideFormat); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetMipmappedArray", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetMaxAnisotropy(_texref, maxAniso); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetMaxAnisotropy", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetMipmapFilterMode(_texref, mipmapFilterMode); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetMipmapFilterMode", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetMipmapLevelBias(_texref, mipmapLevelBias); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetMipmapLevelBias", res)); if (res != CUResult.Success) { throw new CudaException(res); } res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetMipmapLevelClamp(_texref, minMipmapLevelClamp, maxMipmapLevelClamp); Debug.Write(""); //Line(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetMipmapLevelClamp", res)); if (res != CUResult.Success) { throw new CudaException(res); } }
static void Main(string[] args) { //Read CL arguments for (int i = 0; i < args.Length; i++) { if (args[i] == "-d") { deviceID = int.Parse(args[++i]); } if (args[i] == "-lr") { learning_rate = double.Parse(args[++i], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture); } if (args[i] == "-iso") { ISO = args[++i]; } if (args[i] == "-t") { crosscheck = true; } if (args[i] == "-w") { warmStart = int.Parse(args[++i]); Console.WriteLine("Start with epoch " + warmStart); } if (args[i] == "-s") { saveImages = true; } } Console.WriteLine("Using device ID: " + deviceID); Console.WriteLine("Learning rate: " + learning_rate); //Init Cuda stuff ctx = new PrimaryContext(deviceID); ctx.SetCurrent(); Console.WriteLine("Context created"); CUmodule modPatch = ctx.LoadModulePTX("PatchProcessing.ptx"); Console.WriteLine("modPatch loaded"); CUmodule modBorder = ctx.LoadModulePTX("BorderTreatment.ptx"); Console.WriteLine("modBorder loaded"); CUmodule modError = ctx.LoadModulePTX("ErrorComputation.ptx"); Console.WriteLine("modError loaded"); CUmodule modPRelu = ctx.LoadModulePTX("PRelu.ptx"); Console.WriteLine("modPRelu loaded"); CUmodule modDeBayer = ctx.LoadModulePTX("DeBayer.ptx"); Console.WriteLine("all modules loaded"); deBayerGreenKernel = new DeBayerGreenKernel(modDeBayer, ctx); deBayerRedBlueKernel = new DeBayerRedBlueKernel(modDeBayer, ctx); //Both deBayer kernels are load from the same module: setting the constant variable for bayer pattern one is enough... deBayerGreenKernel.BayerPattern = new BayerColor[] { BayerColor.Red, BayerColor.Green, BayerColor.Green, BayerColor.Blue }; prepareDataKernel = new PrepareDataKernel(modPatch, ctx); restoreImageKernel = new RestoreImageKernel(modPatch, ctx); Console.WriteLine("kernels loaded"); int countOwn = 468083; int count5k = 33408; string fileBase = @"/ssd/data/TrainingsDataNN/"; List <float3> WhiteBalanceFactors = new List <float3>(); FileStream fs1 = new FileStream(fileBase + "FromOwnDataset/WhiteBalancesOwn.txt", FileMode.Open, FileAccess.Read); FileStream fs2 = new FileStream(fileBase + "From5kDataset/WhiteBalances5k.txt", FileMode.Open, FileAccess.Read); StreamReader sr1 = new StreamReader(fs1); StreamReader sr2 = new StreamReader(fs2); for (int i = 0; i < countOwn; i++) { fileRawList.Add(fileBase + "FromOwnDataset/ISO" + ISO + "/img_" + i.ToString("0000000") + ".bin"); fileTrouthList.Add(fileBase + "FromOwnDataset/GroundTruth/img_" + i.ToString("0000000") + ".bin"); string line = sr1.ReadLine(); string[] values = line.Split('\t'); float3 wb = new float3(float.Parse(values[1], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[2], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[3], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture)); WhiteBalanceFactors.Add(wb); } for (int i = 0; i < count5k; i++) { fileRawList.Add(fileBase + "From5kDataset/ISO" + ISO + "/img_" + i.ToString("0000000") + ".bin"); fileTrouthList.Add(fileBase + "From5kDataset/GroundTruth/img_" + i.ToString("0000000") + ".bin"); string line = sr2.ReadLine(); string[] values = line.Split('\t'); float3 wb = new float3(float.Parse(values[1], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[2], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[3], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture)); WhiteBalanceFactors.Add(wb); } sr2.Close(); sr1.Close(); baOriginal = new float3[countOwn + count5k][]; baRAW = new float[countOwn + count5k][]; Random rand = new Random(0); //random order for the image patches for (int i = 0; i < countOwn + count5k - 1; i++) { int r = i + (rand.Next() % (countOwn + count5k - i)); string temp = fileRawList[i]; fileRawList[i] = fileRawList[r]; fileRawList[r] = temp; temp = fileTrouthList[i]; fileTrouthList[i] = fileTrouthList[r]; fileTrouthList[r] = temp; float3 tempf = WhiteBalanceFactors[i]; WhiteBalanceFactors[i] = WhiteBalanceFactors[r]; WhiteBalanceFactors[r] = tempf; } Console.WriteLine("Initialization done!"); int trainingSize = (int)((countOwn + count5k) * 0.9f); //4 patches per file int testSize = fileRawList.Count - trainingSize; CudaBlas blas = new CudaBlas(PointerMode.Host); CudaDNNContext cudnn = new CudaDNNContext(); int patchSize = 31; int patchSize4 = 66; //Size of an 2x2 patch read from file int batch = 64; float normalization = 0.5f; //define neural network: StartLayer start = new StartLayer(patchSize, patchSize, 3, batch); FinalLayer final = new FinalLayer(patchSize, patchSize, 3, batch, FinalLayer.Norm.Mix, ctx, modError); ConvolutionalLayer conv1 = new ConvolutionalLayer(patchSize, patchSize, 3, patchSize, patchSize, 64, batch, 9, 9, ConvolutionalLayer.Activation.PRelu, blas, cudnn, ctx, modBorder, modPRelu); ConvolutionalLayer conv2 = new ConvolutionalLayer(patchSize, patchSize, 64, patchSize, patchSize, 64, batch, 5, 5, ConvolutionalLayer.Activation.PRelu, blas, cudnn, ctx, modBorder, modPRelu); ConvolutionalLayer conv3 = new ConvolutionalLayer(patchSize, patchSize, 64, patchSize, patchSize, 3, batch, 5, 5, ConvolutionalLayer.Activation.None, blas, cudnn, ctx, modBorder, modPRelu); start.ConnectFollowingLayer(conv1); conv1.ConnectFollowingLayer(conv2); conv2.ConnectFollowingLayer(conv3); conv3.ConnectFollowingLayer(final); CudaDeviceVariable <float3> imgA = new CudaDeviceVariable <float3>(patchSize4 * patchSize4); CudaDeviceVariable <float3> imgB = new CudaDeviceVariable <float3>(patchSize4 * patchSize4); CudaDeviceVariable <float> rawd = new CudaDeviceVariable <float>(patchSize4 * patchSize4); CudaDeviceVariable <float> inputImgs = new CudaDeviceVariable <float>(patchSize * patchSize * 3 * batch); CudaDeviceVariable <float> groundTrouth = new CudaDeviceVariable <float>(patchSize * patchSize * 3 * batch); NPPImage_8uC3 imgU3a = new NPPImage_8uC3(patchSize, patchSize); NPPImage_8uC3 imgU3b = new NPPImage_8uC3(patchSize, patchSize); NPPImage_8uC3 imgU3c = new NPPImage_8uC3(patchSize, patchSize); Bitmap a = new Bitmap(patchSize, patchSize, PixelFormat.Format24bppRgb); Bitmap b = new Bitmap(patchSize, patchSize, PixelFormat.Format24bppRgb); Bitmap c = new Bitmap(patchSize, patchSize, PixelFormat.Format24bppRgb); Random randImageOutput = new Random(0); Random randForInit = new Random(0); start.InitRandomWeight(randForInit); conv1.SetActivation(0.1f); conv2.SetActivation(0.1f); int startEpoch = warmStart; FileStream fs; //restore network in case of warm start: if (warmStart > 0) { fs = new FileStream("epoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + (warmStart - 1) + ".cnn", FileMode.Open, FileAccess.Read); start.RestoreValues(fs); fs.Close(); fs.Dispose(); } //validate results on validation data set if (crosscheck) { FileStream csvResult = new FileStream("results_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + ".csv", FileMode.Append, FileAccess.Write); StreamWriter sw = new StreamWriter(csvResult); sw.WriteLine("L1;L2;Mix;Filename"); for (int i = 0; i < 2000; i += 1) { string filename = "epoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + i + ".cnn"; try { FileStream cnn = new FileStream(filename, FileMode.Open, FileAccess.Read); start.RestoreValues(cnn); cnn.Close(); cnn.Dispose(); } catch (Exception) { Console.WriteLine("Skipping: " + i); continue; } double errorL1 = 0; double errorL2 = 0; double errorMix = 0; for (int iter = 0; iter < testSize / batch * 4; iter++) { //Prepare batch for training: for (int ba = 0; ba < batch / 4; ba++) { int idx = iter * (batch / 4) + ba + trainingSize; float3[] original; float[] raw; if (baRAW[idx - trainingSize] == null) { original = ReadRAWFloat3(fileTrouthList[idx]); raw = ReadRAWFloat(fileRawList[idx]); baOriginal[idx - trainingSize] = original; baRAW[idx - trainingSize] = raw; } else { original = baOriginal[idx - trainingSize]; raw = baRAW[idx - trainingSize]; } rawd.CopyToDevice(raw); imgA.CopyToDevice(original); deBayerGreenKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); deBayerRedBlueKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); prepareDataKernel.RunSafe(imgA, imgB, groundTrouth, inputImgs, ba, normalization, WhiteBalanceFactors[idx]); } start.SetData(inputImgs); final.SetGroundTrouth(groundTrouth); float err = start.InferenceTraining(inputImgs); errorMix += err; errorL1 += final.GetError(FinalLayer.Norm.L1); errorL2 += final.GetError(FinalLayer.Norm.L2); } Console.WriteLine("Results for: " + filename); Console.WriteLine("Mean Error L1: " + errorL1 / testSize * batch / 4); Console.WriteLine("Mean Error L2: " + errorL2 / testSize * batch / 4); Console.WriteLine("Mean Error Mix: " + errorMix / testSize * batch / 4); sw.Write((errorL1 / testSize * batch / 4).ToString().Replace(".", ",")); sw.Write(";"); sw.Write((errorL2 / testSize * batch / 4).ToString().Replace(".", ",")); sw.Write(";"); sw.Write((errorMix / testSize * batch / 4).ToString().Replace(".", ",")); sw.Write(";"); sw.WriteLine(filename); sw.Flush(); } sw.Close(); csvResult.Close(); csvResult.Dispose(); } //or train existing network: else { double error = 0; double errorEpoch = 0; for (int epoch = startEpoch; epoch < 2000; epoch++) { errorEpoch = 0; error = 0; for (int iter = 0; iter < trainingSize / batch * 4; iter++) { //Prepare batch for training: for (int ba = 0; ba < batch / 4; ba++) { int idx = iter * (batch / 4) + ba; float3[] original; float[] raw; if (baRAW[idx] == null) { original = ReadRAWFloat3(fileTrouthList[idx]); raw = ReadRAWFloat(fileRawList[idx]); baOriginal[idx] = original; baRAW[idx] = raw; } else { original = baOriginal[idx]; raw = baRAW[idx]; } rawd.CopyToDevice(raw); imgA.CopyToDevice(original); deBayerGreenKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); deBayerRedBlueKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); prepareDataKernel.RunSafe(imgA, imgB, groundTrouth, inputImgs, ba, normalization, WhiteBalanceFactors[idx]); } start.SetData(inputImgs); final.SetGroundTrouth(groundTrouth); float err = start.InferenceTraining(inputImgs); final.BackPropagation(groundTrouth); start.UpdateWeights(GetLearningRate(epoch * (trainingSize) / batch * 4 + iter));//*0+951342 error += err; errorEpoch += err; if ((epoch * trainingSize / batch * 4 + iter) % 1000 == 0 && iter != 0) { FileStream status = new FileStream("status_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + ".csv", FileMode.Append, FileAccess.Write); StreamWriter sw = new StreamWriter(status); sw.WriteLine((error / 1000.0).ToString().Replace(".", ",") + ";" + GetLearningRate(epoch * trainingSize / batch * 4 + iter).ToString().Replace(".", ",")); sw.Close(); status.Close(); status.Dispose(); error = 0; } //if ((epoch * trainingSize / batch * 4 + iter) % 10000 == 0) //{ // fs = new FileStream("iter_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + (epoch * trainingSize / batch * 4 + iter) + ".cnn", FileMode.Create, FileAccess.Write); // start.SaveValues(fs); // fs.Close(); // fs.Dispose(); // Console.WriteLine("Network saved for iteration " + (epoch * trainingSize / batch * 4 + iter) + "!"); //} Console.WriteLine("Epoch: " + epoch + " Iteration: " + (epoch * trainingSize / batch * 4 + iter) + ", Error: " + err); if (saveImages && iter == 0)//(epoch * trainingSize / batch * 4 + iter) % 10000 == 0 && { for (int i = 0; i < 1; i++) { int imgidx = randImageOutput.Next(batch); float3 wb = WhiteBalanceFactors[iter * (batch / 4) + imgidx / 4]; restoreImageKernel.RunSafe(groundTrouth, imgU3a, imgidx, wb.x, wb.y, wb.z, normalization); restoreImageKernel.RunSafe(inputImgs, imgU3b, imgidx, wb.x, wb.y, wb.z, normalization); CudaDeviceVariable <float> res = final.GetResult(); restoreImageKernel.RunSafe(res, imgU3c, imgidx, wb.x, wb.y, wb.z, normalization); imgU3a.CopyToHost(a); imgU3b.CopyToHost(b); imgU3c.CopyToHost(c); a.Save("GroundTrouth_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + "_" + imgidx + ".png");// * trainingSize / batch * 4 + iter b.Save("Input_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + "_" + imgidx + ".png"); c.Save("Result_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + "_" + imgidx + ".png"); } } } errorEpoch /= trainingSize / batch * 4; fs = new FileStream("errorEpoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + ".csv", FileMode.Append, FileAccess.Write); StreamWriter sw2 = new StreamWriter(fs); sw2.WriteLine(errorEpoch.ToString().Replace(".", ",")); sw2.Close(); fs.Close(); fs.Dispose(); fs = new FileStream("epoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + ".cnn", FileMode.Create, FileAccess.Write); start.SaveValues(fs); fs.Close(); fs.Dispose(); } } }