/// <summary>Returns the matrix product M1*M2</summary> /// <param name="M1">First matrix</param> /// <param name="M2">Second matrix</param> public float[,] MultiplyLocals(float[,] M1, float[,] M2) { //M pxq, N qxr int p = M1.GetLength(0); int q = M1.GetLength(1); int r = M2.GetLength(1); if (q != M2.GetLength(0)) throw new Exception("Matrix dimensions do not match for multiplication"); float[] vecM1 = MatrixToVector(M1, ref p, ref q); float[] vecM2 = MatrixToVector(M2, ref q, ref r); float[] vecResp = new float[p * r]; CLCalc.Program.Variable varResp = new CLCalc.Program.Variable(vecResp); CLCalc.Program.Variable varM1 = new CLCalc.Program.Variable(vecM1); CLCalc.Program.Variable varM2 = new CLCalc.Program.Variable(vecM2); //Finaliza a soma dos elementos int[] vecQ = new int[1] { q }; CLCalc.Program.Variable varQ = new CLCalc.Program.Variable(vecQ); CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[4] { varResp, varM1, varM2, varQ }; int[] max = new int[2] { p, r }; floatMatrixMultLocals.Execute(args, max, new int[] { 8, 8 }); varResp.ReadFromDeviceTo(vecResp); varResp.Dispose(); return VectorToMatrix(vecResp, ref p, ref r); }
static void Main() { //Initializes OpenCL Platforms and Devices and sets everything up CLCalc.InitCL(); //Create vectors with 2000 numbers float[] v1 = new float[n], v2 = new float[n]; var vResult = new float[n]; //Creates population for v1 and v2 for (int i = 0; i < n; i++) { v1[i] = (float)i / 10; v2[i] = -(float)i / 9; } //var prog = new ComputeProgram(CLCalc.Program.Context, ""); //Compiles the source codes. The source is a string array because the user may want //to split the source into many strings. CLCalc.Program.Compile(new string[] { vecSum }); //Gets host access to the OpenCL floatVectorSum kernel CLCalc.Program.Kernel VectorSum = new CLCalc.Program.Kernel("floatVectorSum"); //Creates vectors v1 and v2 in the device memory CLCalc.Program.Variable varV1 = new CLCalc.Program.Variable(v1); CLCalc.Program.Variable varV2 = new CLCalc.Program.Variable(v2); //Arguments of VectorSum kernel CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { varV1, varV2 }; //How many workers will there be? We need "n", one for each element int[] workers = new int[1] { n }; sw.Start(); //Execute the kernel for (int i = 0; i < count; i++) DoOCL(VectorSum, args, workers); sw.Stop(); //Read device memory varV1 to host memory vResult varV1.ReadFromDeviceTo(vResult); Console.WriteLine("OpenCL: {0}", sw.ElapsedTicks); sw.Restart(); for (int i = 0; i < count; i++) DoCPU(v1, v2, vResult); sw.Stop(); Console.WriteLine("CPU: {0}", sw.ElapsedTicks); PressAny(); }
/// <summary>ImageData constructor. Reads data from a bitmap</summary> /// <param name="bmp">Bitmap to read from</param> public ImageData(Bitmap bmp) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL(); width = bmp.Width; height = bmp.Height; //Allocates space for data Data = new byte[3 * width * height]; //Reads bmp to local Data variable ReadToLocalData(bmp); //Transfer data to OpenCL device varData = new CLCalc.Program.Variable(Data); }
/// <summary>Computes the Discrete Fourier Transform of a double2 vector x whose length is a power of 16. /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 16 (Length = 2*pow(16,n))</summary> public static double[] FFT16(double[] x) { if (CLx == null || CLx.OriginalVarLength != x.Length) { CLx = new CLCalc.Program.Variable(x); CLy = new CLCalc.Program.Variable(x); } //Writes original content CLx.WriteToDevice(x); CLy = FFT16(ref CLx); double[] y = new double[x.Length]; CLy.ReadFromDeviceTo(y); return y; }
/// <summary>Computes the Discrete Fourier Transform of a double2 vector x whose length is a power of 16. /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 16 (Length = 2*pow(16,n))</summary> public static CLCalc.Program.Variable FFT16(ref CLCalc.Program.Variable CLx) { if (CLy == null || CLy.OriginalVarLength != CLx.OriginalVarLength) CLy = new CLCalc.Program.Variable(new float[CLx.OriginalVarLength]); if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) return null; int nn = (int)Math.Log(CLx.OriginalVarLength >> 1, 16); nn = 1 << ((nn << 2) + 1); if (nn != CLx.OriginalVarLength) throw new Exception("Number of elements should be a power of 16 ( vector length should be 2*pow(16,n) )"); if (kernelfft_radix16 == null) { InitKernels(); } CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLx, CLy, CLp }; CLCalc.Program.Variable[] args2 = new CLCalc.Program.Variable[] { CLy, CLx, CLp }; bool usar2 = true; int[] p = new int[] { 1 }; CLp.WriteToDevice(p); int n = CLx.OriginalVarLength >> 5; while (p[0] <= n) { usar2 = !usar2; if (usar2) kernelfft_radix16.Execute(args2, n); else kernelfft_radix16.Execute(args, n); p[0] = p[0] << 4; CLp.WriteToDevice(p); } if (usar2) { CLCalc.Program.Variable temp = CLx; CLx = CLy; CLy = temp; } return CLy; }
/// <summary>Applies given filter to the image</summary> /// <param name="imgDt">Image to be filtered</param> /// <param name="Filter">Filter. [3*size*size]</param> public static void ApplyFilter(ImageData imgDt, float[] Filter, bool useOpenCL, bool useWorkDim2) { int FilterSize = (int)Math.Sqrt(Filter.Length/3); if (Filter.Length != 3 * FilterSize * FilterSize) throw new Exception("Invalid filter"); if (!Initialized && useOpenCL) Init(FilterSize); //Writes filter to device if(useOpenCL) varFilter.WriteToDevice(Filter); if (FilteredVals == null || FilteredVals.Length != imgDt.Height * imgDt.Width * 3) { //Filtered values FilteredVals = new float[imgDt.Height * imgDt.Width * 3]; varFiltered = new CLCalc.Program.Variable(FilteredVals); } //Width if (useOpenCL) varWidth.WriteToDevice(new int[] { imgDt.Width }); //Executes filtering int mean = (FilterSize - 1) / 2; if (useOpenCL) { CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { imgDt.varData, varFilter, varFiltered, varWidth }; if (useWorkDim2) { kernelApplyFilterWorkDim2.Execute(args, new int[] { imgDt.Width - FilterSize, imgDt.Height - FilterSize }); } else { kernelApplyFilter.Execute(args, new int[] { imgDt.Height - FilterSize }); } //Reads data back varFiltered.ReadFromDeviceTo(FilteredVals); } else { ApplyFilter(imgDt.Data, Filter, FilteredVals, new int[] { imgDt.Width }, imgDt.Height - FilterSize); } //Writes to image data for (int y = mean; y < imgDt.Height - mean - 1; y++) { int wy = imgDt.Width * y; for (int x = mean; x < imgDt.Width - mean - 1; x++) { int ind = 3 * (x + wy); imgDt.Data[ind] = (byte)FilteredVals[ind]; imgDt.Data[ind + 1] = (byte)FilteredVals[ind + 1]; imgDt.Data[ind + 2] = (byte)FilteredVals[ind + 2]; } } //Writes filtered values //In the future this rewriting can be avoided //because byte_addressable will be widely available if (useOpenCL) imgDt.varData.WriteToDevice(imgDt.Data); }
/// <summary>Initializes class</summary> private static void Init(int FilterSize) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL(); //Compiles source code CLCalc.Program.Compile((new CLFilterSrc()).src); //Creates kernel kernelApplyFilter = new CLCalc.Program.Kernel("ApplyFilter"); kernelApplyFilterWorkDim2 = new CLCalc.Program.Kernel("ImgFilter"); //Creates filter varFilter = new CLCalc.Program.Variable(new float[3 * FilterSize * FilterSize]); //Width varWidth = new CLCalc.Program.Variable(new int[1]); Initialized = true; }
/// <summary>New matrix constructor</summary> /// <param name="Vals">Matrix values</param> public floatMatrix(float[,] Vals) { nRows = Vals.GetLength(0); nCols = Vals.GetLength(1); Values = new float[nRows * nCols]; if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLValues = new CLCalc.Program.Variable(Values); CLDim = new CLCalc.Program.Variable(new int[] { nRows, nCols }); CLCoef = new CLCalc.Program.Variable(new float[1]); } SetValues(Vals); }
/// <summary>Constructor.</summary> public SparseLinalg() { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { try { CLCalc.InitCL(); } catch { } } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { //Creates control variables dprod = new float[SparseLinalg.GLOBALWORKSIZE]; dotProd = new CLCalc.Program.Variable(dprod); dotProdSum = new CLCalc.Program.Variable(new float[1]); int[] i = new int[1]; vLenBy4 = new CLCalc.Program.Variable(i); CLNonZeroElemsPerRow = new CLCalc.Program.Variable(new int[1]); } }
private static void InitKernels() { string s = new CLFFTSrc().s; CLCalc.InitCL(); try { CLCalc.Program.Compile(s); } catch { } kernelfft_radix16 = new CLCalc.Program.Kernel("fft_radix16"); kernelfft_radix4 = new CLCalc.Program.Kernel("fft_radix4"); kernelConjugate = new CLCalc.Program.Kernel("Conjugate"); CLp = new CLCalc.Program.Variable(new int[1]); }
/// <summary>Computes the inverse Discrete Fourier Transform of a float2 vector x whose length is a power of 4. /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 4 (Length = 2*pow(4,n))</summary> public static float[] iFFT4(float[] x) { if (CLx == null || CLx.OriginalVarLength != x.Length) { CLx = new CLCalc.Program.Variable(x); CLy = new CLCalc.Program.Variable(x); } //Writes original content CLx.WriteToDevice(x); CLy = iFFT4(CLx); float[] y = new float[x.Length]; CLy.ReadFromDeviceTo(y); return y; }
/// <summary>Constructor.</summary> /// <param name="InitialState">Initial state of system</param> /// <param name="StepSize">Desired step per integration pass</param> /// <param name="InitialIndepVarValue">Initial independent variable value</param> /// <param name="DerivativeCalculator">Function to calculate derivatives vector</param> public doubleODE46(double InitialIndepVarValue, double StepSize, double[] InitialState, DerivCalcDeleg DerivativeCalculator) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.NotUsingCL) throw new Exception("OpenCL not available"); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { ODE46Source Source = new ODE46Source(); string[] s = new string[] { @" #pragma OPENCL EXTENSION cl_khr_fp64 : enable ", Source.doubleStep2, Source.doubleStep3, Source.doubleStep4, Source.doubleStep5, Source.doubleStep6, Source.doubleFinalizeCalc }; CLCalc.Program.Compile(s); //Calculador de derivada Derivs = DerivativeCalculator; //Scalars double[] xx = new double[1] { InitialIndepVarValue }; x = new CLCalc.Program.Variable(xx); xsav = new CLCalc.Program.Variable(xx); //Sets initial values to Device and local variables hdid = new CLCalc.Program.Variable(xx); currentX = InitialIndepVarValue; SetStep(StepSize); //Vectors yy = new double[InitialState.Length]; for (int i = 0; i < InitialState.Length; i++) yy[i] = InitialState[i]; ysav = new CLCalc.Program.Variable(yy); k1 = new CLCalc.Program.Variable(InitialState); k2 = new CLCalc.Program.Variable(InitialState); k3 = new CLCalc.Program.Variable(InitialState); k4 = new CLCalc.Program.Variable(InitialState); k5 = new CLCalc.Program.Variable(InitialState); k6 = new CLCalc.Program.Variable(InitialState); absError = new CLCalc.Program.Variable(new double[InitialState.Length]); y = new CLCalc.Program.Variable(yy); //Kernels KernelFinalizeCalc = new CLCalc.Program.Kernel("doubleFinalizeCalc"); KernelUpdateX = new CLCalc.Program.Kernel("doubleUpdateX"); KernelRK46YStep2 = new CLCalc.Program.Kernel("doubleYStep2"); KernelRK46XStep2 = new CLCalc.Program.Kernel("doubleXStep2"); KernelRK46YStep3 = new CLCalc.Program.Kernel("doubleYStep3"); KernelRK46XStep3 = new CLCalc.Program.Kernel("doubleXStep3"); KernelRK46YStep4 = new CLCalc.Program.Kernel("doubleYStep4"); KernelRK46XStep4 = new CLCalc.Program.Kernel("doubleXStep4"); KernelRK46YStep5 = new CLCalc.Program.Kernel("doubleYStep5"); KernelRK46XStep5 = new CLCalc.Program.Kernel("doubleXStep5"); KernelRK46YStep6 = new CLCalc.Program.Kernel("doubleYStep6"); KernelRK46XStep6 = new CLCalc.Program.Kernel("doubleXStep6"); //Kernel arguments ArgsFinalize = new CLCalc.Program.Variable[] { x, hdid, y, ysav, absError, k1, k2, k3, k4, k5, k6 }; ArgsRK46Y = new CLCalc.Program.Variable[] { x, hdid, y, ysav, k1, k2, k3, k4, k5, k6 }; ArgsRK46X = new CLCalc.Program.Variable[] { x, hdid, xsav }; NStates = new int[1] { InitialState.Length }; NScalar = new int[1] { 1 }; //Data retrieving yerr = new double[NStates[0]]; xRet = new double[NScalar[0]]; } }
/// <summary>Creates vector from M elements sequentially</summary> /// <param name="symM">Symmetric matrix to use</param> public floatVector(floatSymPosDefMatrix symM) { this.CLValues = symM.CLValues; this.Values = symM.Values; //Since I'm probably going to modify the matrix, I want a new Cholesky factorization //if I ever call a LinearSolve symM.IsCholeskyFactorized = false; if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLCoef = new CLCalc.Program.Variable(new float[1]); } }
/// <summary>OpenCL vector constructor</summary> /// <param name="Vals">Vector elements</param> public floatVector(float[] Vals) { this.Values = (float[])Vals.Clone(); if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLValues = new CLCalc.Program.Variable(Values); CLCoef = new CLCalc.Program.Variable(new float[1]); } }
private void LocalInitCL() { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL(); if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLoffSet = new CLCalc.Program.Variable(new int[1]); CLValues = new CLCalc.Program.Variable(this.Values); invL11 = new float[(SUBMATRIXSIZE * (SUBMATRIXSIZE + 1)) >> 1]; CLinvl11 = new CLCalc.Program.Variable(invL11); int NMultiple = N; if (N % SUBMATRIXSIZE != 0) { NMultiple = N / SUBMATRIXSIZE; NMultiple = SUBMATRIXSIZE * (NMultiple + 1); cholDec = new float[(NMultiple * (NMultiple + 1)) >> 1]; for (int i = 0; i < Values.Length; i++) cholDec[i] = Values[i]; } else { cholDec = (float[])this.Values.Clone(); } CLcholDec = new CLCalc.Program.Variable(cholDec); CLprevVals = new CLCalc.Program.Variable(new float[N]); CLb = new CLCalc.Program.Variable(new float[N]); CLy = new CLCalc.Program.Variable(new float[N]); CLn = new CLCalc.Program.Variable(new int[] { N }); } }
/// <summary>Backsubstitutes to solve a linear system with a matrix right hand size</summary> private void LinsolveCLMatrix(floatMatrix M, ref floatMatrix resp) { //System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); //System.Diagnostics.Stopwatch sw1 = new System.Diagnostics.Stopwatch(); //sw.Start(); //number of RHS as multiple of SUBMATRIXSIZE int nRHSMult = M.Rows / SUBMATRIXSIZE; int nRHSleftOver = M.Rows - SUBMATRIXSIZE*nRHSMult; if (!UseOpenCLIfAvailable || CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { linsolveMatrix(M, ref resp); return; } //Copy elements to CLb if (CLb == null || CLb.OriginalVarLength < M.Values.Length) { CLb = new CLCalc.Program.Variable(M.Values); CLy = new CLCalc.Program.Variable(M.Values); } kernelCopyBuffer.Execute(new CLCalc.Program.MemoryObject[] { M.CLValues, CLb }, M.Values.Length); int nEqs = M.Rows; CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLcholDec, CLy, CLb, CLoffSet, CLn }; int[] offset = new int[1]; //DEBUG //float[] yDebug = new float[M.Values.Length]; //float[] bDebug = new float[M.Values.Length]; //this.CLcholDec.ReadFromDeviceTo(cholDec); //Forward substitution int i; for (i = 0; i < N; i += SUBMATRIXSIZE) { offset[0] = i; CLoffSet.WriteToDevice(offset); int size = Math.Min(SUBMATRIXSIZE, N - i); kernelFwdUpperBackSubs.Execute(args, new int[] { size, nEqs }, new int[] { size, 1 }); ////DEBUG //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); //sw1.Start(); //propagation if (i + SUBMATRIXSIZE < N) { if (nRHSMult > 0) kernelFwdPropag.Execute(args, new int[] { N - i - SUBMATRIXSIZE, nRHSMult * SUBMATRIXSIZE }, new int[] { 1, SUBMATRIXSIZE }); if (nRHSleftOver > 0) kernelFwdPropag2.Execute(args, new int[] { N - i - SUBMATRIXSIZE, nRHSleftOver }, new int[] { 1, nRHSleftOver }, new int[] { 0, nRHSMult * SUBMATRIXSIZE }); } //OpenCLTemplate.CLCalc.Program.CommQueues[OpenCLTemplate.CLCalc.Program.DefaultCQ].Finish(); //sw1.Stop(); ////DEBUG //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); } //Backward subst. Stores answer in CLb args = new CLCalc.Program.Variable[] { CLcholDec, CLb, CLy, CLoffSet, CLn }; //Backward substitution for (i = N - SUBMATRIXSIZE; i >= 0; i -= SUBMATRIXSIZE) { offset[0] = i; CLoffSet.WriteToDevice(offset); int size = SUBMATRIXSIZE; kernelBkLowerBackSubs.Execute(args, new int[] { size, nEqs }, new int[] { size, 1 }); if (i > 0) { //Propagation using __local storage if (nRHSMult > 0) kernelBackPropag.Execute(args, new int[] { i, nRHSMult * SUBMATRIXSIZE }, new int[] { 1, SUBMATRIXSIZE }); //leftovers (not multiples of SUBMATRIXSIZE) if (nRHSleftOver > 0) kernelBackPropag2.Execute(args, new int[] { i, nRHSleftOver }, new int[] { 1, nRHSleftOver }, new int[] { 0, nRHSMult * SUBMATRIXSIZE }); } } if (SUBMATRIXSIZE + i > 0) { offset[0] = 0; CLoffSet.WriteToDevice(offset); kernelBkLowerBackSubs.Execute(args, new int[] { SUBMATRIXSIZE + i, nEqs }, new int[] { SUBMATRIXSIZE + i, 1 }); } kernelCopyBuffer.Execute(new CLCalc.Program.Variable[] { CLb, resp.CLValues }, resp.Values.Length); //OpenCLTemplate.CLCalc.Program.CommQueues[OpenCLTemplate.CLCalc.Program.DefaultCQ].Finish(); //sw.Stop(); }
private void linsolveCL(floatVector CLbb, ref floatVector resp) { if (!UseOpenCLIfAvailable || CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { linsolve(CLbb.Values, ref resp); return; } //int NMultiple = N; ////float[] bAugm; //if (N % SUBMATRIXSIZE != 0) //{ // NMultiple = N / SUBMATRIXSIZE; // NMultiple = SUBMATRIXSIZE * (NMultiple + 1); //} ////bAugm = new float[NMultiple]; ////for (int i = 0; i < bb.Length; i++) bAugm[i] = bb[i]; if (resp == null) resp = new floatVector(new float[N]); //Copy elements to CLb if (CLb == null || CLb.OriginalVarLength < CLbb.Length) CLb = new CLCalc.Program.Variable(CLbb.Values); kernelCopyBuffer.Execute(new CLCalc.Program.MemoryObject[] { CLbb.CLValues, CLb }, CLbb.Length); CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLcholDec, CLy, CLb, CLoffSet, CLn }; int[] offset = new int[1]; //float[] yDebug = new float[N]; //float[] bDebug = new float[N]; //Forward substitution int i; for (i = 0; i < N; i += SUBMATRIXSIZE) { offset[0] = i; CLoffSet.WriteToDevice(offset); int size = Math.Min(SUBMATRIXSIZE, N - i); kernelFwdUpperBackSubs.Execute(args, new int[] { size }, new int[] { size }); ////DEBUG //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); //propagation if (i + SUBMATRIXSIZE < N) { kernelFwdPropag.Execute(args, N - i - SUBMATRIXSIZE); } ////DEBUG //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); //CLcholDec.ReadFromDeviceTo(cholDec); } //Backward subst. Stores answer in CLb args = new CLCalc.Program.Variable[] { CLcholDec, CLb, CLy, CLoffSet, CLn }; //Backward substitution for (i = N - SUBMATRIXSIZE; i >= 0; i -= SUBMATRIXSIZE) { offset[0] = i; CLoffSet.WriteToDevice(offset); int size = SUBMATRIXSIZE; kernelBkLowerBackSubs.Execute(args, new int[] { size }, new int[] { size }); ////DEBUG //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); if (i > 0) { kernelBackPropag.Execute(args, i); } //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); } if (SUBMATRIXSIZE + i > 0) { offset[0] = 0; CLoffSet.WriteToDevice(offset); kernelBkLowerBackSubs.Execute(args, new int[] { SUBMATRIXSIZE + i }, new int[] { SUBMATRIXSIZE + i }); } //CLy.ReadFromDeviceTo(yDebug); //CLb.ReadFromDeviceTo(bDebug); kernelCopyBuffer.Execute(new CLCalc.Program.Variable[] { CLb, resp.CLValues }, N); }
/// <summary>Cholesky decomposition using OpenCL with Blocks</summary> public void CLBlockCholesky() { //If matrix dimension is not a multiple of SUBMATRIXSIZE //pad with zeros. int NMultiple = N; if (N % SUBMATRIXSIZE != 0) { NMultiple = N / SUBMATRIXSIZE; NMultiple = SUBMATRIXSIZE * (NMultiple + 1); } if (!IsMatrixInClMemoryUpdated) { for (int i = 0; i < Values.Length; i++) cholDec[i] = Values[i]; CLcholDec.WriteToDevice(cholDec); } else { kernelCopyBuffer.Execute(new CLCalc.Program.MemoryObject[] { CLValues, CLcholDec }, CLValues.OriginalVarLength); } int SubMatrixSize = SUBMATRIXSIZE; int GlobalSize; //Important. Set offset to zero CLoffSet.WriteToDevice(new int[] { 0 }); CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLcholDec, CLoffSet, CLinvl11 }; GlobalSize = (SubMatrixSize * (SubMatrixSize + 1)) >> 1; for (int i = 0; i < NMultiple; i += SubMatrixSize) { //Computes Cholesky factor L11 and its inverse kernelCholeskyDiagBlock.Execute(args, new int[] { GlobalSize }, new int[] { GlobalSize }); //CLcholDec.ReadFromDeviceTo(cholDec); //Computes column panel L21 //Note: offSet has been updated, kernel should use its value-1 //Number of submatrices to update: (N-i)/SubMatrixSize int nSubMatrices = (NMultiple - i) / SubMatrixSize - 1; if (nSubMatrices > 0) { //Computes panels and updates main diagonals kernelCholeskyComputePanel.Execute(args, new int[] { nSubMatrices * SubMatrixSize, SubMatrixSize }, new int[] { SubMatrixSize, SubMatrixSize }); //CLcholDec.ReadFromDeviceTo(cholDec); //Still need to update nSubMatrices*(nSubMatrices-1)/2 full matrices in the Cholesky decomposition //They start at indexes [i+SubMatrixSize i], and they are the offdiagonal block matrices int totalSubMatricesToUpdate = ((nSubMatrices - 1) * nSubMatrices) >> 1; if (totalSubMatricesToUpdate > 0) { kernelCholeskyForwardProp.Execute(args, new int[] { totalSubMatricesToUpdate * SubMatrixSize, SubMatrixSize }, new int[] { SubMatrixSize, SubMatrixSize }); } } //CLcholDec.ReadFromDeviceTo(cholDec); } //CLcholDec.ReadFromDeviceTo(cholDec); this.IsCholeskyFactorized = true; }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> /// <param name="x">Initial guess</param> public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol, ref CLImgVector x) { int n = b.Length; int nBy4 = 1 + ((n - 1) >> 2); if (lambda == null) { lambda = new float[1]; CLlambda = new CLCalc.Program.Variable(lambda); } if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); //x = new CLImgVector(n); Ap = new CLImgVector(n); temp = new CLImgVector(n); } if (temp == null) temp = new CLImgVector(n); if (x == null || x.Length != n) x = new CLImgVector(n); float alpha, beta, RDotROld, RDotR; //Initialization Multiply(M, x, Ap); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; kernelInitRP.Execute(args, nBy4); //Loop int count = 0; RDotR = DotProduct(r, r); while (count<1 || ((RDotR > tol) && (count < n*MAXITER))) { RDotROld = RDotR; //if ((count & 0x0080) == 0) //{ // Multiply(M, x, Ap); // args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; // kernelInitRP.Execute(args, nBy4); //} Multiply(M, p, Ap); alpha = RDotROld / DotProduct(Ap, p); //Update x kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4); lambda[0] = alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4); //Update r kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4); lambda[0] = -alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4); RDotR = DotProduct(r, r); beta = RDotR / RDotROld; //Update p kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4); lambda[0] = beta; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4); count++; } }
/// <summary>Calculates LU decomposition of M matrix</summary> /// <param name="M">Matrix to decompose</param> /// <param name="n">Matrix dimension</param> /// <param name="varindx">Swap index</param> private CLCalc.Program.Variable LUDecomp(double[,] M, int n, out CLCalc.Program.Variable varindx) { //arguments and work_dim CLCalc.Program.Variable[] args; int[] max; //Matrix to vector double[] vecM = MatrixToVector(M, ref n, ref n); CLCalc.Program.Variable varM = new Program.Variable(vecM); //Scaling transformation double[] vv = new double[n]; CLCalc.Program.Variable varvv = new Program.Variable(vv); max = new int[1] { n }; args = new CLCalc.Program.Variable[] { varM, varvv }; doubleLUScale.Execute(args, max); //In order LU factorization (Crout) int[] J = new int[1] { 0 }; CLCalc.Program.Variable varJ = new Program.Variable(J); int[] N = new int[1] { n }; CLCalc.Program.Variable varN = new Program.Variable(N); int[] indx = new int[n]; varindx = new Program.Variable(indx); args = new Program.Variable[] { varM, varJ, varN, varindx, varvv }; for (J[0] = 0; J[0] < n; J[0]++) { varJ.WriteToDevice(J); max[0] = J[0]; doubleLUCalcBetas.Execute(args, max); max[0] = n - J[0]; doubleLUCalcAlphas.Execute(args, max); max[0] = 1; doubleLUCalcPivo.Execute(args, max); max[0] = n; doubleLUTrocaCols.Execute(args, max); if (J[0] != n - 1) { max[0] = n - J[0] - 1; doubleLUDivByPivot.Execute(args, max); } } return varM; }
/// <summary>Constructor.</summary> /// <param name="nMasses">Number of masses in the system</param> /// <param name="nConnections">Number of connections</param> /// <param name="Masses">Mass of each vertex</param> /// <param name="InitialStateSpace">Position and velocity of vertexes /// [2*3*i] - posx, [2*(3*i+1)] - posy, [2*(3*i+2)] - posz, /// [1+2*3*i] - velx, [1+2*(3*i+1)] - vely, [1+2*(3*i+2)] - velz</param> /// <param name="Origins">Origin vertex of connections. Spring connects Origin[i] to Dests[i]</param> /// <param name="Dests">Destination vertex of connections. Spring connects Origin[i] to Dests[i]</param> /// <param name="SpringKs">Spring constant for each connection</param> /// <param name="GroundKs">Spring constant for each mass, connecting to ground (nMass)</param> /// <param name="Damp">Structural damping (relative-speed dependant) (nConnections)</param> /// <param name="GroundDamp">Absolute damping proportional to speed relative to Earth (nMass)</param> public floatDEM(int nMasses, int nConnections, float[] Masses, float[] InitialStateSpace, int[] Origins, int[] Dests, float[] SpringKs, float[] Damp, float[] GroundKs, float[] GroundDamp) { #region Consistency check if (Masses.Length != nMasses) throw new Exception("Invalid Masses length (!=nMasses)"); if (InitialStateSpace.Length != 6 * nMasses) throw new Exception("Invalid positions length (!=6*nMasses - x, y, z)"); if (Origins.Length != nConnections) throw new Exception("Invalid Origins length (!=nConnections)"); if (Dests.Length != nConnections) throw new Exception("Invalid Dests length (!=nConnections)"); if (SpringKs.Length != nConnections) throw new Exception("Invalid SpringKs length (!=nConnections)"); if (GroundKs.Length != nMasses) throw new Exception("Invalid GroundKs length (!=nMasses)"); if (Damp.Length != nConnections) throw new Exception("Invalid Damp length (!=nConnections)"); if (GroundDamp.Length != nMasses) throw new Exception("Invalid GroundDamp length (!=nMasses)"); #endregion if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } #region Variables reading //Sizes nConn = new int[1] { nConnections }; nM = new int[1] { nMasses }; //Inputs m = new Program.Variable(Masses); float[] InitialPositions = new float[3 * nMasses]; for (int i = 0; i < 3 * nMasses; i++) InitialPositions[i] = InitialStateSpace[2 * i]; posOrig = new Program.Variable(InitialPositions); origs = new Program.Variable(Origins); dests = new Program.Variable(Dests); k = new Program.Variable(SpringKs); kGround = new Program.Variable(GroundKs); c = new Program.Variable(Damp); cGround = new Program.Variable(GroundDamp); //Outputs L0 = new Program.Variable(new float[nConnections]); forces = new Program.Variable(new float[3 * nMasses]); connForces = new Program.Variable(new float[3 * nConnections]); nConnec = new Program.Variable(new int[1] { nConnections }); int[] nodesConnects = new int[30 * nMasses]; for (int i = 0; i < nodesConnects.Length; i++) nodesConnects[i] = -1; nodesConnections = new Program.Variable(nodesConnects); #endregion #region Kernels initialization DEMSource Source = new DEMSource(); string[] s = new string[] { Source.floatcalcL0, Source.floatresetForces, Source.floatcalcForces, Source.floatderivs, Source.floatcalcGroundForces, Source.floatcalcNodesConnections }; CLCalc.Program.Compile(s); KernelcalcL0 = new Program.Kernel("floatcalcL0"); argscalcL0 = new Program.Variable[] { posOrig, origs, dests, L0 }; KernelcalcNodesConnections = new Program.Kernel("floatcalcNodesConnections"); argscalcNodesConnections = new Program.Variable[] { nodesConnections, nConnec, origs, dests }; KernelresetForces = new Program.Kernel("floatresetForces"); argsresetForces = new Program.Variable[] { forces }; KernelcalcForces = new Program.Kernel("floatcalcForces"); KernelcalcGroundForces = new Program.Kernel("floatcalcGroundForces"); Kernelderivs = new Program.Kernel("floatderivs"); #endregion // Initial lengths calculation KernelcalcL0.Execute(argscalcL0, nConn); //Connections calculation KernelcalcNodesConnections.Execute(argscalcNodesConnections, nM); //nodesConnections.ReadFromDeviceTo(nodesConnects); }
/// <summary>Sums the components of a vector using __local memory and coalesced access</summary> /// <param name="CLv">Vector whose components should be summed</param> public static float SumVectorElements(floatVector CLv) { float resp = 0; if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { /* The idea here is to create a reduction in which the access pattern to the vectors is coalesced. The first step is to reduce the number of non-summed items to a multiple of NWORKITEMS and then coalesce the access */ int LOCALWORKSIZE = Math.Min(256, (int)CLCalc.Program.CommQueues[CLCalc.Program.DefaultCQ].Device.MaxWorkGroupSize); int NWORKITEMS = 16 * LOCALWORKSIZE; int n = CLv.Length; float[] resps = new float[NWORKITEMS]; if (CLv.CLresps == null) { CLv.CLresps = new CLCalc.Program.Variable(resps); CLv.CLn = new CLCalc.Program.Variable(new int[1]); } CLv.CLn.WriteToDevice(new int[] { n }); CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLv.CLValues, CLv.CLresps, CLv.CLn }; //Write n = k*NWORKITEMS + p. Preprocess to eliminate p`s and leave summation only to a multiple of NWORKITEMS int k = n / NWORKITEMS; int p = n - k * NWORKITEMS; //Clears partial responses kernelClear.Execute(args, NWORKITEMS); //Sums the p last elements into the p first elements kernelPreSum.Execute(args, p); //Use CLn to inform each work-item its workload. Each one will access and sum k numbers CLv.CLn.WriteToDevice(new int[] { k }); kernelCoalLocalSum.Execute(args, new int[] { NWORKITEMS }, new int[] { LOCALWORKSIZE }); CLv.CLresps.ReadFromDeviceTo(resps); //Serial part int imax = NWORKITEMS / LOCALWORKSIZE; for (int i = 0; i < imax; i++) resp += resps[i]; } else { double sum = 0; for (int i = 0; i < CLv.Length; i++) sum += CLv.Values[i]; resp = (float)sum; } return resp; }
/// <summary>Calculates isosurface corresponding to a given isolevel</summary> /// <param name="isoLvl"></param> public void CalcIsoSurface(float isoLvl) { //Copies iso level to video memory if (isoLvl != isoLevel[0]) { isoLevel[0] = isoLvl; varIsoLevel.WriteToDevice(isoLevel); } //Interpolation CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLFuncVals, varIsoLevel, varEdgeCoords, varInitVals, varStep }; kernelInterpPts.Execute(args, max); if (ComputeNormals) { //Polygonization args = new CLCalc.Program.Variable[] { CLFuncVals, varIsoLevel, varEdgeCoords, varEdgePrelimNormals, varElemIndex }; int[] GlobalWorkSize = new int[] { max[0] - 1, max[1] - 1, max[2] - 1 }; kernelPolygonize.Execute(args, GlobalWorkSize); //Normal smoothing args = new CLCalc.Program.Variable[] { varEdgePrelimNormals, varEdgeNormals }; kernelSmoothNormals.Execute(args, max); } else { //Polygonization args = new CLCalc.Program.Variable[] { CLFuncVals, varIsoLevel, varEdgeCoords, varElemIndex }; int[] GlobalWorkSize = new int[] { max[0] - 1, max[1] - 1, max[2] - 1 }; kernelPolygonizeNoNormals.Execute(args, GlobalWorkSize); } }
/// <summary>Initializes physics program. Components indexes: [i] - x, [i+1] - y, [i+2] - z</summary> /// <param name="nParticles">Number of particles</param> public floatBodyPhysics(int nParticles) { string[] s = new string[] { CollisionAppliers, ForceAppliers, ConstAccelMotionEDOSolver }; Program.Compile(s); //Kernels MotionStep = new Program.Kernel("constAccelStep"); Kernel_ApplyGravity = new Program.Kernel("ApplyGravity"); Kernel_FloorCollision = new Program.Kernel("FloorCollision"); Kernel_SelfCollision = new Program.Kernel("SelfCollision"); Kernel_WallCollision = new Program.Kernel("WallCollision"); Kernel_ResetForces = new Program.Kernel("ResetForces"); Kernel_ResetCloseNeighbors = new Program.Kernel("ResetCloseNeighbors"); float[] t = new float[1] { 0 }; float[] gg = new float[3] { 0, 0, 0 }; step = new float[1] { 0 }; //Tamanho de alocacao de velocidades e posicoes float[] aloc = new float[nParticles * 3]; //Tamanho de alocacao de caracteristicas das particulas float[] alocPart = new float[nParticles]; //3*Nparticulas CL_pos = new CLCalc.Program.Variable(aloc); CL_vel = new CLCalc.Program.Variable(aloc); CL_forces = new CLCalc.Program.Variable(aloc); //Nparticulas closeNeighbors = new int[nParticles]; CL_closeNeighbors = new CLCalc.Program.Variable(closeNeighbors); for (int i = 0; i < nParticles; i++) alocPart[i] = 1f; //inicializa massas como 1 e tamanhos de colisao como 1 CL_masses = new CLCalc.Program.Variable(alocPart); CL_collisionSizes = new CLCalc.Program.Variable(alocPart); //escalares CL_t = new CLCalc.Program.Variable(t); CL_step = new CLCalc.Program.Variable(step); //gravidade CL_g = new CLCalc.Program.Variable(gg); //Argumentos de funcoes stepArgs = new CLCalc.Program.Variable[] { CL_t, CL_step, CL_forces, CL_masses, CL_pos, CL_vel }; applyGravArgs = new CLCalc.Program.Variable[] { CL_forces, CL_masses, CL_g }; floorCollisionArgs = new CLCalc.Program.Variable[] { CL_vel, CL_pos, CL_collisionSizes }; wallCollisionArgs = floorCollisionArgs; selfCollisionArgs = new CLCalc.Program.Variable[] { CL_vel, CL_pos, CL_masses, CL_forces, CL_closeNeighbors, CL_collisionSizes }; resetForcesArgs = new Program.Variable[] { CL_forces }; resetCloseNeighborsArgs = new Program.Variable[] { CL_closeNeighbors }; nArgs = new int[1] { nParticles * 3 }; nPartics = new int[1] { nParticles }; nPartics2 = new int[2] { nParticles, nParticles }; }
/// <summary>Computes the inverse Discrete Fourier Transform of a float2 vector x whose length is a power of 4. /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 4 (Length = 2*pow(4,n))</summary> public static CLCalc.Program.Variable iFFT4(CLCalc.Program.Variable CLx) { if (CLScale == null) CLScale = new CLCalc.Program.Variable(new float[1]); //Trick: DFT-1 (x) = DFT(x*)*/N; //Conjugate float[] vx = new float[CLx.OriginalVarLength]; CLx.ReadFromDeviceTo(vx); float[] scale = new float[] { 1 }; CLScale.WriteToDevice(scale); kernelConjugate.Execute(new CLCalc.Program.Variable[] { CLx, CLScale }, CLx.OriginalVarLength >> 1); CLx.ReadFromDeviceTo(vx); CLy = FFT4(ref CLx); scale[0] = 1 / (float)(CLx.OriginalVarLength >> 1); CLScale.WriteToDevice(scale); kernelConjugate.Execute(new CLCalc.Program.Variable[] { CLy, CLScale }, CLy.OriginalVarLength >> 1); return CLy; }
/// <summary>Creates the diagonal matrix D(v) with elements associated to those of vector v. Uses the same objects.</summary> /// <param name="v">Reference vector</param> public floatDiag(floatVector v) { nRows = v.Length; nCols = v.Length; this.Values = v.Values; this.CLValues = v.CLValues; }
/// <summary>Computes transpose(A)*A and transpose(A)*b weighted by W using OpenCL. Lambda is regularization term</summary> private static floatSymPosDefMatrix AuxLSAtACL(floatMatrix A, floatDiag W, floatVector lambda, ref floatSymPosDefMatrix AtA) { if (AtA == null || AtA.CLValues.OriginalVarLength != (A.Cols * (A.Cols + 1)) >> 1) { AtA = new floatSymPosDefMatrix(new float[(A.Cols * (A.Cols + 1)) >> 1]); } CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { A.CLValues, A.CLDim, W.CLValues, AtA.CLValues, lambda.CLValues }; kernelComputeAtWA.Execute(args, AtA.CLValues.OriginalVarLength); //Just modified values in CL memory, matrix is no longer Cholesky factorized AtA.IsCholeskyFactorized = false; return AtA; }
public OpenCLTest() { CLCalc.InitCL(); var buildlogs = new List<string>(); CLCalc.Program.Compile(new[] { run }, out buildlogs); Random r = new Random(); float[] sample = Enumerable.Range(0, 768).Select(x => (float)r.NextDouble() * 2 - 1).ToArray(); float[] hlW = Enumerable.Range(0, 10000 * 769).Select(x => (float)r.NextDouble() * 2 - 1).ToArray(); float[] olW = Enumerable.Range(0, 10001 * 10).Select(x => (float)r.NextDouble() * 2 - 1).ToArray(); float[] hlDest = new float[10000]; float[] olDest = new float[10]; //float[] sample = new[] { .3f, .7f }; //float[] hlW = new[] { .4f, .4f, .1f, .5f, .5f, .7f }; //float[] olW = new[] { .4f, .6f, .8f }; //float[] hlDest = new[] { 0f, 0f }; //float[] olDest = new[] { 0f }; var sw = Stopwatch.StartNew(); var sw2 = Stopwatch.StartNew(); var gSample = sample.ToGraphics(); var ghlW = hlW.ToGraphics(); var golW = olW.ToGraphics(); var ghlDest = hlDest.ToGraphics(); var golDest = olDest.ToGraphics(); sw2.Stop(); var kernel = new CLCalc.Program.Kernel("runNN"); var args = new[] {gSample, new CLCalc.Program.Variable(new[] { sample.Length}), ghlDest, new CLCalc.Program.Variable(new[] { hlDest.Length}), ghlW}; kernel.Execute(args, hlDest.Length); args = new CLCalc.Program.Variable[5]; args[0] = ghlDest; args[1] = new CLCalc.Program.Variable(new[] { hlDest.Length }); args[2] = golDest; args[3] = new CLCalc.Program.Variable(new[] { olDest.Length }); args[4] = golW; kernel.Execute(args, olDest.Length); sw2.Start(); ghlDest.ReadFromDeviceTo(hlDest); golDest.ReadFromDeviceTo(olDest); sw2.Stop(); sw.Stop(); Console.WriteLine("hlDest\n" + string.Join("\n", hlDest)); Console.WriteLine("olDest\n" + string.Join("\n", olDest)); Console.WriteLine("Total {0}ms", sw.ElapsedMilliseconds); Console.WriteLine("Memory {0}ms", sw2.ElapsedMilliseconds); }
/// <summary>Creates a new isosurface calculator. You may pass variables created from a OpenGL context to the CL variables if you are using interop or NULL /// if not using OpenCL/GL interop.</summary> /// <param name="FuncValues">Values of the evaluated 3D function f(x,y,z). FuncValues=float[maxX,maxY,maxZ]</param> /// <param name="CLEdgeCoords">OpenCL variable (float) to hold edge coordinates. Dimension has to be 9 * maxX * maxY * maxZ</param> /// <param name="CLEdgeNormals">OpenCL variable (float) to hold edge normals. Dimension has to be 9 * maxX * maxY * maxZ</param> /// <param name="CLElementArrayIndex">OpenCL variable (int) to hold element array index. Dimension has to be 5 * 3 * (maxX - 1) * (maxY - 1) * (maxZ - 1)</param> private void InitMarchingCubes(float[, ,] FuncValues, CLCalc.Program.Variable CLEdgeCoords, CLCalc.Program.Variable CLEdgeNormals, CLCalc.Program.Variable CLElementArrayIndex) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL(); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { //Reads maximum lengths int maxX = FuncValues.GetLength(0); int maxY = FuncValues.GetLength(1); int maxZ = FuncValues.GetLength(2); max = new int[] { maxX, maxY, maxZ }; #region Creating variables //Isolevel isoLevel = new float[1] { 1.32746E-5f }; varIsoLevel = new CLCalc.Program.Variable(isoLevel); //Step size and x0,y0,z0 varStep = new CLCalc.Program.Variable(step); varInitVals = new CLCalc.Program.Variable(initVals); //Create and copy function values funcVals = new float[maxX * maxY * maxZ]; CLFuncVals = new CLCalc.Program.Variable(funcVals); SetFuncVals(FuncValues); //Edge coordinates - 3 coords * 3 possible directions * number of points edgeCoords = new float[9 * maxX * maxY * maxZ]; if (CLEdgeCoords != null) { varEdgeCoords = CLEdgeCoords; varEdgeCoords.WriteToDevice(edgeCoords); } else varEdgeCoords = new CLCalc.Program.Variable(edgeCoords); //4 preliminary normals per edge - has to be averaged afterwards edgePrelimNormals = new float[36 * maxX * maxY * maxZ]; varEdgePrelimNormals = new CLCalc.Program.Variable(edgePrelimNormals); //Edge normals edgeNormals = new float[9 * maxX * maxY * maxZ]; if (CLEdgeNormals != null) { varEdgeNormals = CLEdgeNormals; varEdgeNormals.WriteToDevice(edgeNormals); } else varEdgeNormals = new CLCalc.Program.Variable(edgeNormals); //Number of cubes: (maxX-1)*(maxY-1)*(maxZ-1) //Marching cube algorithm: each cube can have 5 triangles drawn, 3 vertexes per triangle //q-th vertex of p-th triangle of the ijk-th cube: [(5*(i+(maxX-1)*j+k*(maxX-1)*(maxY-1))+p)*3+q] elementIndex = new int[5 * 3 * (maxX - 1) * (maxY - 1) * (maxZ - 1)]; if (CLElementArrayIndex != null) { varElemIndex = CLElementArrayIndex; varElemIndex.WriteToDevice(elementIndex); } else varElemIndex = new CLCalc.Program.Variable(elementIndex); //Edge remapping to build output edges = new int[edgeCoords.Length / 3]; for (int i = 0; i < edges.Length; i++) edges[i] = -1; #endregion #region Compile code and create kernels CLMarchingCubesSrc cmsrc = new CLMarchingCubesSrc(); CLCalc.Program.Compile(new string[] { cmsrc.definitions, cmsrc.src }); kernelInterpPts = new CLCalc.Program.Kernel("interpPts"); kernelPolygonize = new CLCalc.Program.Kernel("Polygonize"); kernelSmoothNormals = new CLCalc.Program.Kernel("SmoothNormals"); kernelPolygonizeNoNormals = new CLCalc.Program.Kernel("PolygonizeNoNormals"); #endregion } else throw new Exception("OpenCL not available"); }
/// <summary>OpenCL diagonal matrix constructor</summary> /// <param name="Vals">Main diagonal elements</param> public floatDiag(float[] Vals) { this.Values = (float[])Vals.Clone(); if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLValues = new CLCalc.Program.Variable(Values); } nRows = Vals.Length; nCols = Vals.Length; }