private static int[] ExecuteStepDpq(Polynomial x, Polynomial y) { if (x.Degree != y.Degree) { throw new InvalidOperationException("Only works for polynomials of same degree!"); } var d = Math.Min(x.Degree, y.Degree); var n = d + 1; var dpq = new int[n * n]; CLCalc.InitCL(); CLCalc.Program.Compile(new[] { KernelCodeStepDpq }); var kernel = new CLCalc.Program.Kernel("StepDpq"); var nCl = new CLCalc.Program.Variable(new[] { n }); var xCl = new CLCalc.Program.Variable(x.Coefficients); var yCl = new CLCalc.Program.Variable(y.Coefficients); var dpqCl = new CLCalc.Program.Variable(dpq); CLCalc.Program.MemoryObject[] args = { nCl, xCl, yCl, dpqCl }; var workers = new[] { 2 * n - 2 }; kernel.Execute(args, workers); dpqCl.ReadFromDeviceTo(dpq); return(dpq); }
private void button6_Click(object sender, EventArgs e) { float[] x = new float[] { 0 }; float[] y = new float[] { 0 }; CLCalc.InitCL(); if (!string.IsNullOrWhiteSpace(textBox1.Text) && !string.IsNullOrWhiteSpace(textBox2.Text)) { x[0] = (float)Convert.ToDouble(textBox1.Text.Replace('.', ',')); y[0] = (float)Convert.ToDouble(textBox2.Text.Replace('.', ',')); } string s = @" __kernel void sum(global float4 *x, global float4 *y) { x[0] = x[0] + y[0]; }"; CLCalc.Program.Compile(new string[] { s }); CLCalc.Program.Kernel sum = new CLCalc.Program.Kernel("sum"); CLCalc.Program.Variable varx = new CLCalc.Program.Variable(x); CLCalc.Program.Variable vary = new CLCalc.Program.Variable(y); CLCalc.Program.Variable[] args = { varx, vary }; int[] max = new int[] { 1 }; sum.Execute(args, max); varx.ReadFromDeviceTo(x); textBox3.Text = Convert.ToString(x[0]); }
/// <summary>Constructor.</summary> /// <param name="N">NxN dimension of the matrix</param> /// <param name="nonZeroElemsPerRow">Maximum number of non-zero elements per row</param> public CLImgSparseMatrix(int N, int nonZeroElemsPerRow) { elemsPerRow = nonZeroElemsPerRow - 1; elemsPerRow = (elemsPerRow >> 2) + 1; elemsPerRow = elemsPerRow << 2; numRows = N; nElems = numRows * elemsPerRow; if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { try { CLCalc.InitCL(); } catch { } } //OpenCL image allocation int CLImgNumRows = ((nElems - 1) >> 14) + 1; MatrixData = new float[IMGWIDTH * CLImgNumRows * 4]; Columns = new int[IMGWIDTH * CLImgNumRows * 4]; for (int i = 0; i < Columns.Length; i++) { Columns[i] = -1; } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLMatrixData = new CLCalc.Program.Image2D(MatrixData, IMGWIDTH, CLImgNumRows); CLColumns = new CLCalc.Program.Image2D(Columns, IMGWIDTH, CLImgNumRows); } }
/// <summary>Static Constructor. Builds kernels</summary> static SparseLinalg() { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { try { CLCalc.InitCL(); } catch { } } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { //Kernel CLLinalgSrc src = new CLLinalgSrc(); CLCalc.Program.Compile(new string[] { src.srcDotProd, src.srcMatVecMult, src.srcLinConjGrad }); kernelDotProduct = new CLCalc.Program.Kernel("dotProd"); kernelSum = new CLCalc.Program.Kernel("sumDotProd"); kernelGetDotSum = new CLCalc.Program.Kernel("GetResp"); kernelSparseMatrixVecMult = new CLCalc.Program.Kernel("SparseMatrixVecMult"); //Linear solving kernelInitRP = new CLCalc.Program.Kernel("InitRP"); kernelMultiplyAdd = new CLCalc.Program.Kernel("MultiplyAdd"); kernelCopyToTemp = new CLCalc.Program.Kernel("CopyToTemp"); } }
/// <summary>Creates a new vector allocated in OpenCL Image2D object.</summary> /// <param name="Length">Vector length. For convenience some extra memory is allocated but calculations only go up to vector dimensions</param> public CLImgVector(int Length) { //Stores length and computes number of necessary rows n = Length; //nRows = n/2^14 (4096 float4's) + 1 (at least one row) nRows = ((n - 1) >> 14) + 1; //Trick: //y = n >> 12; //y = n / 2^12; //x = n & 0xfff; //x = n mod (2^12-1); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { try { CLCalc.InitCL(); } catch { } } //Allocates vector. Width = IMGWIDTH, Height = nRows, Total number of elements = 4*Width*Height VectorData = new float[IMGWIDTH * nRows * 4]; if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLVector = new CLCalc.Program.Image2D(VectorData, IMGWIDTH, nRows); } }
static void Main(string[] args) { string vecSum = @" __kernel void floatVectorSum(__global float * v1, __global float * v2) { // Vector element index int i = get_global_id(0); v1[i] = v1[i] + v2[i]; }"; //Initializes OpenCL Platforms and Devices and sets everything up CLCalc.InitCL(); //Compiles the source codes. The source is a string array because the user may want //to split the source into many strings. CLCalc.Program.Compile(new string[] { vecSum }); //Gets host access to the OpenCL floatVectorSum kernel CLCalc.Program.Kernel VectorSum = new OpenCLTemplate.CLCalc.Program.Kernel("floatVectorSum"); //We want to sum 2000 numbers int n = 2000; //Create vectors with 2000 numbers float[] v1 = new float[n], v2 = new float[n]; //Creates population for v1 and v2 for (int i = 0; i < n; i++) { v1[i] = (float)i / 10; v2[i] = -(float)i / 9; } //Creates vectors v1 and v2 in the device memory OpenCLTemplate.CLCalc.Program.Variable varV1 = new OpenCLTemplate.CLCalc.Program.Variable(v1); OpenCLTemplate.CLCalc.Program.Variable varV2 = new OpenCLTemplate.CLCalc.Program.Variable(v2); //Arguments of VectorSum kernel OpenCLTemplate.CLCalc.Program.Variable[] argsCL = new OpenCLTemplate.CLCalc.Program.Variable[] { varV1, varV2 }; int[] workers = new int[1] { n }; //Execute the kernel VectorSum.Execute(argsCL, workers); //Read device memory varV1 to host memory v1 varV1.ReadFromDeviceTo(v1); }
/// <summary>Compiles code and initializes kernel for this svm stance</summary> private void CLSVMInit() { if (CLResource == null) { CLResource = new int[0]; } lock (CLResource) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { if (kernelComputeKernelRBF == null) { CLSVMSrc s = new CLSVMSrc(); CLCalc.Program.Compile(new string[] { s.srcKernels, s.srcFindMaxMinErr, s.srcMultClass }); //Kernel computation kernelComputeKernelRBF = new CLCalc.Program.Kernel("ComputeKernelRBF"); kernelMaxErr = new CLCalc.Program.Kernel("maxErr"); kernelComputeMax = new CLCalc.Program.Kernel("computeMax"); kernelMinErr = new CLCalc.Program.Kernel("minErr"); kernelComputeMin = new CLCalc.Program.Kernel("computeMin"); kernelGetResp = new CLCalc.Program.Kernel("getResp"); //Update error kernelUpdateErr = new CLCalc.Program.Kernel("UpdateErr"); //Multiple classification kernelComputeMultiKernelRBF = new CLCalc.Program.Kernel("ComputeMultiKernelRBF"); kernelSumKernels = new CLCalc.Program.Kernel("SumKernels"); } //Memory obbjects //Find max/min CLErrLen = new CLCalc.Program.Variable(new int[1]); HostResp = new int[1]; CLResp = new CLCalc.Program.Variable(HostResp); CLMaxMinErrs = new CLCalc.Program.Variable(new float[MAXMINWORKSIZE]); CLMaxMinInds = new CLCalc.Program.Variable(new int[MAXMINWORKSIZE]); //Update error CLUpdtErrParams = new CLCalc.Program.Variable(new float[3]); } } }
/// <summary>Constructor. Loads parameters from a file.</summary> /// <param name="svmFile">File to read</param> public CLObjClassifier(string svmFile) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } SVM = new MultiClassSVM(new TrainingSet()); SVM.SVMs.Add(new SVM()); SVM.Classifications.Add(1.0f); SVM.SVMs[0].Load(svmFile); InitKernel(); }
/// <summary>Constructor. Loads and classifies face dataset if desired</summary> /// <param name="TrainFaceDataset">Load and classify face dataset?</param> public CLObjClassifier(bool TrainFaceDataset) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (TrainFaceDataset) { LoadMITFaceClassifier(); SVM = new MultiClassSVM(tSet); InitKernel(); } }
/// <summary>Creates a new filter from a given compiled kernel.</summary> /// <param name="filterCode">Code to compile</param> /// <param name="filterName">Filter name</param> public CLFilter(CLCalc.Program.Kernel filterKernel, string filterName) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { throw new Exception("OpenCL not available"); } this.FilterName = filterName; this.FilterKernel = filterKernel; }
private static void InitKernels() { string s = new CLFFTSrc().s; CLCalc.InitCL(); try { CLCalc.Program.Compile(s); } catch { } kernelfft_radix16 = new CLCalc.Program.Kernel("fft_radix16"); kernelfft_radix4 = new CLCalc.Program.Kernel("fft_radix4"); kernelConjugate = new CLCalc.Program.Kernel("Conjugate"); CLp = new CLCalc.Program.Variable(new int[1]); }
/// <summary>Creates a new filter from a given code. Filter kernel name has to be the same as kernel name /// (disregarding spaces - Ex: Kernel is GaussianBlur and filter name is Gaussian Blur)</summary> /// <param name="filterCode">Complete OpenCL kernel code to compile</param> /// <param name="filterName">Filter name</param> public CLFilter(string filterCode, string filterName) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(Cloo.ComputeDeviceTypes.Gpu); } if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { throw new Exception("OpenCL not available"); } CLCalc.Program.Compile(filterCode); this.FilterName = filterName; string kernelname = FilterName.Replace(" ", "").ToLower(); FilterKernel = new CLCalc.Program.Kernel(kernelname); }
/// <summary>ImageData constructor. Reads data from a bitmap</summary> /// <param name="bmp">Bitmap to read from</param> public ImageData(Bitmap bmp) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } width = bmp.Width; height = bmp.Height; //Allocates space for data Data = new byte[3 * width * height]; //Reads bmp to local Data variable ReadToLocalData(bmp); //Transfer data to OpenCL device varData = new CLCalc.Program.Variable(Data); }
private void button1_Click(object sender, EventArgs e) { CLCalc.InitCL(); double[] a = new double[] { 2, 147483647, 2, 7 }; double[] b = new double[] { 1, 2, 7, 4 }; double[] c = new double[4]; CLCalc.Program.Variable v1 = new CLCalc.Program.Variable(a); CLCalc.Program.Variable v2 = new CLCalc.Program.Variable(b); CLCalc.Program.Variable v3 = new CLCalc.Program.Variable(c); CLCalc.CLPrograms.VectorSum VecSum = new CLCalc.CLPrograms.VectorSum(); CLCalc.CLPrograms.MinDifs Mdifs = new CLCalc.CLPrograms.MinDifs(); //string[] s = new string[] { VecSum.intVectorSum, VecSum.floatVectorSum }; string[] s = new string[] { VecSum.doubleVectorSum }; CLCalc.Program.Compile(s); CLCalc.Program.Kernel k = new CLCalc.Program.Kernel("doubleVectorSum"); //CLCalc.Program.Kernel k2 = new CLCalc.Program.Kernel("intVectorSum"); //CLCalc.Program.Kernel k = new CLCalc.Program.Kernel("floatMinDifs"); CLCalc.Program.Variable[] vv = new CLCalc.Program.Variable[3] { v1, v2, v3 }; int[] max = new int[1] { a.Length }; k.Execute(vv, max); CLCalc.Program.Sync(); v3.ReadFromDeviceTo(c); CLCalc.FinishCL(); }
private List <Bitmap> CLHSL(Bitmap bmp) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { return(null); } InitKernels(); if (CLBmpSaturation == null) { CLBmpSaturation = new CLCalc.Program.Image2D(bmp); CLBmpIntens = new CLCalc.Program.Image2D(bmp); } if (CLbmp == null || CLbmp.Height != bmp.Height || CLbmp.Width != bmp.Width) { CLbmp = new CLCalc.Program.Image2D(bmp); CLNewBmp = new CLCalc.Program.Image2D(bmp); CLBmpSaturation = new CLCalc.Program.Image2D(bmp); CLBmpIntens = new CLCalc.Program.Image2D(bmp); } else { CLbmp.WriteBitmap(bmp); CLN.WriteToDevice(new int[] { NLumIntens }); CLWidth.WriteToDevice(new int[] { bmp.Width }); CLHeight.WriteToDevice(new int[] { bmp.Height }); } kernelComputeHue.Execute(new CLCalc.Program.MemoryObject[] { CLbmp, CLNewBmp, CLBmpSaturation, CLBmpIntens }, new int[] { bmp.Width, bmp.Height }); return(new List <Bitmap>() { CLNewBmp.ReadBitmap(), CLBmpSaturation.ReadBitmap(), CLBmpIntens.ReadBitmap() }); }
private void Form1_Load(object sender, EventArgs e) { try { CLCalc.InitCL(); } catch { } CLSrc src = new CLSrc(); string s = src.src; try { CLCalc.Program.Compile(s); } catch { } }
/// <summary>Button to test code</summary> private void btnCompileTest_Click(object sender, EventArgs e) { this.TopMost = true; if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } try { CLCalc.Program.Compile(rTBCLCode.Text, out BuildLogs); btnCompileTest.BackColor = Color.Green; } catch { btnCompileTest.BackColor = Color.Red; } this.TopMost = false; }
public void InitGPU() { kernels = new List <CLCalc.Program.Kernel>(); //OpenCLTemplate.CLCalc.InitCL(Cloo.ComputeDeviceTypes.All); CLCalc.InitCL(); CLCalc.Program.DefaultCQ = 0; string text = ""; using (StreamReader stream = new StreamReader(@"C:\Users\Marat\Documents\Visual Studio 2013\Projects\MathLib\MathLib\Programs.txt")) { text = stream.ReadToEnd(); } CLCalc.Program.Compile(new string[] { text }); kernels.Add(new CLCalc.Program.Kernel("vecMul")); // 0 kernels.Add(new CLCalc.Program.Kernel("vecElemMul")); // 1 kernels.Add(new CLCalc.Program.Kernel("vecSum")); // 2 kernels.Add(new CLCalc.Program.Kernel("vecDif")); // 3 kernels.Add(new CLCalc.Program.Kernel("matrMul")); // 4 }
public CollisionDetector() { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { try { CLCalc.InitCL(); } catch { } } try { if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { return; } CollisionDetector.DisplacementSource displacementSource = new CollisionDetector.DisplacementSource(); CollisionDetector.ExactCollisionSource exactCollisionSource = new CollisionDetector.ExactCollisionSource(); CollisionDetector.VertexCollisionSource vertexCollisionSource = new CollisionDetector.VertexCollisionSource(); CLCalc.Program.Compile(new string[4] { displacementSource.srcCalcRotacoes, displacementSource.srcCalcTransl, exactCollisionSource.srcExactCollision, vertexCollisionSource.srcVertexCollision }); this.kernelCalcRotacoes = new CLCalc.Program.Kernel("CalcRotacoes"); this.kernelCalcTransl = new CLCalc.Program.Kernel("CalcTransl"); this.kernelCalcExactCollision = new CLCalc.Program.Kernel("CalcExactCollision"); this.kernelCalcVertexCollision = new CLCalc.Program.Kernel("CalcVertexCollision"); } catch (Exception ex) { int num = (int)MessageBox.Show(ex.ToString()); } }
/// <summary>Constructor.</summary> public SparseLinalg() { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { try { CLCalc.InitCL(); } catch { } } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { //Creates control variables dprod = new float[SparseLinalg.GLOBALWORKSIZE]; dotProd = new CLCalc.Program.Variable(dprod); dotProdSum = new CLCalc.Program.Variable(new float[1]); int[] i = new int[1]; vLenBy4 = new CLCalc.Program.Variable(i); CLNonZeroElemsPerRow = new CLCalc.Program.Variable(new int[1]); } }
public void DoubleSumTest() { string text = File.ReadAllText("Examples/SumTest.cl"); CLCalc.InitCL(); CLCalc.Program.Compile(new string[] { text }); int count = 2000; var a = new double[count]; var b = new double[count]; var ab = new double[count]; for (int i = 0; i < count; i++) { a[i] = i / 10.0; b[i] = -i / 9.0; } using (CLCalc.Program.Kernel Kernel = new CLCalc.Program.Kernel("doubleVectorSum")) { using (CLCalc.Program.Variable varA = new CLCalc.Program.Variable(a), varB = new CLCalc.Program.Variable(b)) { var args = new CLCalc.Program.Variable[] { varA, varB }; var workers = new int[1] { count }; Kernel.Execute(args, workers); varA.ReadFromDeviceTo(ab); } } for (int i = 0; i < count; i++) { Assert.AreEqual(-i / 90.0, ab[i], 1E-13); } }
public static Polynomial MultiplyKaratsubaOpenCl(Polynomial x, Polynomial y) { if (x.Degree != y.Degree) { throw new InvalidOperationException("Only works for polynomials of same degree!"); } var d = Math.Min(x.Degree, y.Degree); var n = d + 1; var di = ExecuteStepDi(x, y); var dpq = ExecuteStepDpq(x, y); var z = new int[2 * n - 1]; z[0] = di[0]; z[2 * n - 2] = di[n - 1]; CLCalc.InitCL(); CLCalc.Program.Compile(new[] { KernelCodeStepKaratsuba }); var kernel = new CLCalc.Program.Kernel("StepKaratsuba"); var nCl = new CLCalc.Program.Variable(new[] { n }); var xCl = new CLCalc.Program.Variable(x.Coefficients); var yCl = new CLCalc.Program.Variable(y.Coefficients); var diCl = new CLCalc.Program.Variable(di); var dpqCl = new CLCalc.Program.Variable(dpq); var zCl = new CLCalc.Program.Variable(z); CLCalc.Program.MemoryObject[] args = { nCl, xCl, yCl, diCl, dpqCl, zCl }; var workers = new[] { 2 * n - 3 }; kernel.Execute(args, workers); zCl.ReadFromDeviceTo(z); return(new Polynomial(z)); }
/// <summary>Equalizes image histogram using OpenCL</summary> private void CLEqualizeHistogram(ref Bitmap bmp) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { return; } float[] PartialHistograms = new float[NLumIntens * bmp.Width]; float[] histLuminance = new float[NLumIntens]; if (kernelComputeHistograms == null || CLN == null || CLHistogram == null) { CLHistogram = new CLCalc.Program.Variable(histLuminance); CLPartialHistograms = new CLCalc.Program.Variable(PartialHistograms); } InitKernels(); System.Diagnostics.Stopwatch swTotal = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swCopyBmp = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swRescaling = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swComputeHistPartial = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swComputeHistConsolid = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swHistIntegral = new System.Diagnostics.Stopwatch(); swTotal.Start(); swCopyBmp.Start(); if (CLbmp == null || CLbmp.Height != bmp.Height || CLbmp.Width != bmp.Width) { CLbmp = new CLCalc.Program.Image2D(bmp); CLNewBmp = new CLCalc.Program.Image2D(bmp); CLPartialHistograms = new CLCalc.Program.Variable(PartialHistograms); } else { CLbmp.WriteBitmap(bmp); CLN.WriteToDevice(new int[] { NLumIntens }); CLWidth.WriteToDevice(new int[] { bmp.Width }); CLHeight.WriteToDevice(new int[] { bmp.Height }); } swCopyBmp.Stop(); swComputeHistPartial.Start(); //Partial histograms CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLbmp, CLPartialHistograms, CLHeight, CLN }; kernelComputeHistograms.Execute(args, bmp.Width); CLCalc.Program.Sync(); swComputeHistPartial.Stop(); swComputeHistConsolid.Start(); args = new CLCalc.Program.MemoryObject[] { CLPartialHistograms, CLHistogram, CLHeight, CLN }; kernelConsolidateHist.Execute(args, NLumIntens); CLHistogram.ReadFromDeviceTo(histLuminance); swComputeHistConsolid.Stop(); swHistIntegral.Start(); //Perform histogram integration - better performance in CPU //Compute histogram integrals in-place for (int i = 1; i < NLumIntens; i++) { histLuminance[i] += histLuminance[i - 1]; } float scale = 0.9f / histLuminance[NLumIntens - 1]; //Scales histograms for (int i = 0; i < NLumIntens; i++) { histLuminance[i] *= scale; } //Writes histogram integral CLHistogram.WriteToDevice(histLuminance); swHistIntegral.Stop(); swRescaling.Start(); //Computes equalized image args = new CLCalc.Program.MemoryObject[] { CLbmp, CLNewBmp, CLHistogram, CLN }; kernelPerformNormalization.Execute(args, new int [] { bmp.Width, bmp.Height }); bmp = CLNewBmp.ReadBitmap(); swRescaling.Stop(); swTotal.Stop(); }
static DynamicShading() { #region OpenCL Source #region Thresholding string srcThresh = @" __kernel void Threshold(__constant int * cfg, __read_only image2d_t imgSrc, __global uchar * byteInfo, __write_only image2d_t imgThresh) { const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates CLK_ADDRESS_CLAMP | //Clamp to zeros CLK_FILTER_NEAREST; //Don't interpolate int thresh = cfg[0]; int2 coord = (int2)(get_global_id(0),get_global_id(1)); uint4 pix = read_imageui(imgSrc, smp, coord); int pixBW = (int)pix.x+(int)pix.y+(int)pix.z; pixBW = pixBW > 3*thresh ? 255 : 0; byteInfo[coord.x+get_global_size(0)*coord.y] = pixBW; pix = (uint4)((uint)pixBW,(uint)pixBW,(uint)pixBW,255); write_imageui(imgThresh,coord,pix); } __kernel void RestoreBlackPixels(__constant int * cfg, __read_only image2d_t imgSrc, __read_only image2d_t imgRender, __write_only image2d_t dst) { const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates CLK_ADDRESS_CLAMP | //Clamp to zeros CLK_FILTER_NEAREST; //Don't interpolate int thresh = cfg[0]; int2 coord = (int2)(get_global_id(0),get_global_id(1)); uint4 pix = read_imageui(imgSrc, smp, coord); int pixBW = (int)pix.x+(int)pix.y+(int)pix.z; if (pixBW <= 3*thresh) pix = (uint4)(0,0,0,255); else pix = read_imageui(imgRender, smp, coord); write_imageui(dst,coord,pix); } "; #endregion #region Propagate distance to line string srcPropag = @" __kernel void initWeight(__global float * weight) { int2 coord = (int2)(get_global_id(0),get_global_id(1)); int w = get_global_size(0); int idx = coord.x+w*coord.y; weight[idx] = 1e20f; } __kernel void initTotalWeight(__global float * weight) { int2 coord = (int2)(get_global_id(0),get_global_id(1)); int w = get_global_size(0); int idx = coord.x+w*coord.y; weight[idx] = 0.0f; } __kernel void AddToTotalWeight(__global float * totalWeight, __global const float * weight, __constant float * color, __read_only image2d_t curImg, __write_only image2d_t dstImg, __global const uchar* byteInfo) { const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates CLK_ADDRESS_CLAMP | //Clamp to zeros CLK_FILTER_NEAREST; //Don't interpolate int x = get_global_id(0); int y = get_global_id(1); int w = get_global_size(0); int idx = x+w*y; float totWeight = totalWeight[idx]; float myWeight = native_recip(weight[idx]); myWeight = powr(myWeight, 1.6f); //myWeight = native_log(1.0f + myWeight); int2 coord = (int2)(x,y); uint4 pix = read_imageui(curImg, smp, coord); float4 curColor = (float4)((float)pix.x,(float)pix.y,(float)pix.z,255.0f); float4 newColor = (float4)((float)color[0],(float)color[1],(float)color[2],255.0f); newColor = (newColor * myWeight + curColor * totWeight)*native_recip(myWeight+totWeight); newColor = clamp(newColor, 0.0f, 255.0f); pix = (uint4)((uint)newColor.x,(uint)newColor.y,(uint)newColor.z,255); if (byteInfo[idx] == 0) pix = (uint4)(0,0,0,255); write_imageui(dstImg,coord,pix); totalWeight[idx] = totWeight + myWeight; } __kernel void PropagateDist(__global int * changed, __read_only image2d_t imgStroke, __global const uchar* byteInfo, __global float * weight, __write_only image2d_t imgDists) { const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates CLK_ADDRESS_CLAMP | //Clamp to zeros CLK_FILTER_NEAREST; //Don't interpolate int2 coord = (int2)(get_global_id(0),get_global_id(1)); int w = get_global_size(0); uint4 pix = read_imageui(imgStroke, smp, coord); int val = max((int)pix.z,max((int)pix.x,(int)pix.y)); int idx = coord.x+w*coord.y; float curW = 0.0f; if (val > 0) curW == 1E-9f; else if (coord.x == 0 || coord.y == 0 || coord.x == w-1 || coord.y == get_global_size(1)-1 || byteInfo[idx]==0) curW = 1e20f; else { curW = weight[idx-1]+1; curW = fmin(curW, weight[idx+1] + 1); curW = fmin(curW, weight[idx+w] + 1); curW = fmin(curW, weight[idx-w] + 1); curW = fmin(curW, weight[idx-w-1] + 1.41421356237f); curW = fmin(curW, weight[idx-w+1] + 1.41421356237f); curW = fmin(curW, weight[idx+w-1] + 1.41421356237f); curW = fmin(curW, weight[idx+w+1] + 1.41421356237f); } if (weight[idx] != curW) changed[0] = 1; weight[idx] = curW; //float pixBW = clamp(curW,0.0f,255.0f); //uint4 pix2 = (uint4)((uint)pixBW,(uint)0,255-(uint)pixBW,255); //if (byteInfo[idx]==0) pix2 = (uint4)((uint)0,(uint)0,(uint)0,255); //write_imageui(imgDists,coord,pix2); } "; #endregion #endregion if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } CLCalc.Program.Compile(new string[] { srcThresh, srcPropag }); kernelThreshold = new CLCalc.Program.Kernel("Threshold"); kernelPropagateDist = new CLCalc.Program.Kernel("PropagateDist"); kernelinitWeight = new CLCalc.Program.Kernel("initWeight"); kernelinitTotalWeight = new CLCalc.Program.Kernel("initTotalWeight"); kernelAddToTotalWeight = new CLCalc.Program.Kernel("AddToTotalWeight"); kernelRestoreBlackPixels = new CLCalc.Program.Kernel("RestoreBlackPixels"); }
static void Main(string[] args) { string brute = @" __kernel void bruteDeForce(global char * alphabet, global double * alphabetSize, global int * maxLen, global double * steps, global char * password, global int * match) { // Vector element index int i = get_global_id(0); int stepPointer = 0; char word[7]; int pos = 0; if (i >= steps[stepPointer]){ stepPointer++; } int j = 0; double sum = 0; for (; j <= stepPointer; j++){ pos = (int)fmod((i - sum) / pow(alphabetSize[0], (double)j), alphabetSize[0]); sum = sum + pow((pos + 1),(double)j); word[(stepPointer-j)] = alphabet[pos]; } word[j] = '\0'; } " ; //Initializes OpenCL Platforms and Devices and sets everything up CLCalc.InitCL(); //Compiles the source codes. The source is a string array because the user may want //to split the source into many strings. CLCalc.Program.Compile(new string[] { brute }); //Gets host access to the OpenCL floatVectorSum kernel CLCalc.Program.Kernel VectorSum = new OpenCLTemplate.CLCalc.Program.Kernel("bruteDeForce"); int[] maxLen = { 3 }; char[] password = new char[10]; int[] match = { 0 }; // char[] alphabet = new char[] { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', //'s', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', //'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ','!','$','%','@','-','_'}; char[] alphabet = new char[] { 'a', 'b', 'c' }; int[] alphabetSize = { alphabet.Length }; double len = 0; double[] steps = new double[maxLen[0]]; for (int i = 1; i <= maxLen[0]; i++) { len += Math.Pow(alphabetSize[0], i); if (i == 1) { steps[(i - 1)] = Math.Pow(alphabetSize[0], i); } else { steps[(i - 1)] = Math.Pow(alphabetSize[0], i) + steps[(i - 2)]; } } char[] word = new char[maxLen[0]]; //Creates vectors v1 and v2 in the device memory OpenCLTemplate.CLCalc.Program.Variable varAlphabet = new OpenCLTemplate.CLCalc.Program.Variable(alphabet); OpenCLTemplate.CLCalc.Program.Variable varAlphabetSize = new OpenCLTemplate.CLCalc.Program.Variable(alphabetSize); OpenCLTemplate.CLCalc.Program.Variable varMaxLen = new OpenCLTemplate.CLCalc.Program.Variable(maxLen); OpenCLTemplate.CLCalc.Program.Variable varSteps = new OpenCLTemplate.CLCalc.Program.Variable(steps); OpenCLTemplate.CLCalc.Program.Variable varPassword = new OpenCLTemplate.CLCalc.Program.Variable(password); OpenCLTemplate.CLCalc.Program.Variable varMatch = new OpenCLTemplate.CLCalc.Program.Variable(match); //Arguments of VectorSum kernel OpenCLTemplate.CLCalc.Program.Variable[] argsCL = new OpenCLTemplate.CLCalc.Program.Variable[] { varAlphabet, varAlphabetSize, varMaxLen, varSteps, varPassword, varMatch }; int[] workers = new int[1] { 10 }; //OpenCLTemplate.CLCalc.Program.DefaultCQ = 0; //Execute the kernel VectorSum.Execute(argsCL, workers); //Read device memory varV1 to host memory v1 varMatch.ReadFromDeviceTo(match); varPassword.ReadFromDeviceTo(password); Console.WriteLine(match[0]); Console.ReadLine(); }
/// <summary>Constructor.</summary> /// <param name="InitialState">Initial state of system</param> /// <param name="StepSize">Desired step per integration pass</param> /// <param name="InitialIndepVarValue">Initial independent variable value</param> /// <param name="DerivativeCalculator">Function to calculate derivatives vector</param> public floatODE46(float InitialIndepVarValue, float StepSize, float[] InitialState, DerivCalcDeleg DerivativeCalculator) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.NotUsingCL) { throw new Exception("OpenCL not available"); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { ODE46Source Source = new ODE46Source(); string[] s = new string[] { Source.floatStep2, Source.floatStep3, Source.floatStep4, Source.floatStep5, Source.floatStep6, Source.floatFinalizeCalc }; CLCalc.Program.Compile(s); //Calculador de derivada Derivs = DerivativeCalculator; //Scalars float[] xx = new float[1] { InitialIndepVarValue }; x = new CLCalc.Program.Variable(xx); xsav = new CLCalc.Program.Variable(xx); //Sets initial values to Device and local variables hdid = new CLCalc.Program.Variable(xx); currentX = InitialIndepVarValue; SetStep(StepSize); //Vectors yy = new float[InitialState.Length]; for (int i = 0; i < InitialState.Length; i++) { yy[i] = InitialState[i]; } ysav = new CLCalc.Program.Variable(yy); k1 = new CLCalc.Program.Variable(InitialState); k2 = new CLCalc.Program.Variable(InitialState); k3 = new CLCalc.Program.Variable(InitialState); k4 = new CLCalc.Program.Variable(InitialState); k5 = new CLCalc.Program.Variable(InitialState); k6 = new CLCalc.Program.Variable(InitialState); absError = new CLCalc.Program.Variable(new float[InitialState.Length]); y = new CLCalc.Program.Variable(yy); //Kernels KernelFinalizeCalc = new CLCalc.Program.Kernel("floatFinalizeCalc"); KernelUpdateX = new CLCalc.Program.Kernel("floatUpdateX"); KernelRK46YStep2 = new CLCalc.Program.Kernel("floatYStep2"); KernelRK46XStep2 = new CLCalc.Program.Kernel("floatXStep2"); KernelRK46YStep3 = new CLCalc.Program.Kernel("floatYStep3"); KernelRK46XStep3 = new CLCalc.Program.Kernel("floatXStep3"); KernelRK46YStep4 = new CLCalc.Program.Kernel("floatYStep4"); KernelRK46XStep4 = new CLCalc.Program.Kernel("floatXStep4"); KernelRK46YStep5 = new CLCalc.Program.Kernel("floatYStep5"); KernelRK46XStep5 = new CLCalc.Program.Kernel("floatXStep5"); KernelRK46YStep6 = new CLCalc.Program.Kernel("floatYStep6"); KernelRK46XStep6 = new CLCalc.Program.Kernel("floatXStep6"); //Kernel arguments ArgsFinalize = new CLCalc.Program.Variable[] { x, hdid, y, ysav, absError, k1, k2, k3, k4, k5, k6 }; ArgsRK46Y = new CLCalc.Program.Variable[] { x, hdid, y, ysav, k1, k2, k3, k4, k5, k6 }; ArgsRK46X = new CLCalc.Program.Variable[] { x, hdid, xsav }; NStates = new int[1] { InitialState.Length }; NScalar = new int[1] { 1 }; //Data retrieving yerr = new float[NStates[0]]; xRet = new float[NScalar[0]]; } }
/// <summary>Creates a new isosurface calculator. You may pass variables created from a OpenGL context to the CL variables if you are using interop or NULL /// if not using OpenCL/GL interop.</summary> /// <param name="FuncValues">Values of the evaluated 3D function f(x,y,z). FuncValues=float[maxX,maxY,maxZ]</param> /// <param name="CLEdgeCoords">OpenCL variable (float) to hold edge coordinates. Dimension has to be 9 * maxX * maxY * maxZ</param> /// <param name="CLEdgeNormals">OpenCL variable (float) to hold edge normals. Dimension has to be 9 * maxX * maxY * maxZ</param> /// <param name="CLElementArrayIndex">OpenCL variable (int) to hold element array index. Dimension has to be 5 * 3 * (maxX - 1) * (maxY - 1) * (maxZ - 1)</param> private void InitMarchingCubes(float[, ,] FuncValues, CLCalc.Program.Variable CLEdgeCoords, CLCalc.Program.Variable CLEdgeNormals, CLCalc.Program.Variable CLElementArrayIndex) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { //Reads maximum lengths int maxX = FuncValues.GetLength(0); int maxY = FuncValues.GetLength(1); int maxZ = FuncValues.GetLength(2); max = new int[] { maxX, maxY, maxZ }; #region Creating variables //Isolevel isoLevel = new float[1] { 1.32746E-5f }; varIsoLevel = new CLCalc.Program.Variable(isoLevel); //Step size and x0,y0,z0 varStep = new CLCalc.Program.Variable(step); varInitVals = new CLCalc.Program.Variable(initVals); //Create and copy function values funcVals = new float[maxX * maxY * maxZ]; CLFuncVals = new CLCalc.Program.Variable(funcVals); SetFuncVals(FuncValues); //Edge coordinates - 3 coords * 3 possible directions * number of points edgeCoords = new float[9 * maxX * maxY * maxZ]; if (CLEdgeCoords != null) { varEdgeCoords = CLEdgeCoords; varEdgeCoords.WriteToDevice(edgeCoords); } else { varEdgeCoords = new CLCalc.Program.Variable(edgeCoords); } //4 preliminary normals per edge - has to be averaged afterwards edgePrelimNormals = new float[36 * maxX * maxY * maxZ]; varEdgePrelimNormals = new CLCalc.Program.Variable(edgePrelimNormals); //Edge normals edgeNormals = new float[9 * maxX * maxY * maxZ]; if (CLEdgeNormals != null) { varEdgeNormals = CLEdgeNormals; varEdgeNormals.WriteToDevice(edgeNormals); } else { varEdgeNormals = new CLCalc.Program.Variable(edgeNormals); } //Number of cubes: (maxX-1)*(maxY-1)*(maxZ-1) //Marching cube algorithm: each cube can have 5 triangles drawn, 3 vertexes per triangle //q-th vertex of p-th triangle of the ijk-th cube: [(5*(i+(maxX-1)*j+k*(maxX-1)*(maxY-1))+p)*3+q] elementIndex = new int[5 * 3 * (maxX - 1) * (maxY - 1) * (maxZ - 1)]; if (CLElementArrayIndex != null) { varElemIndex = CLElementArrayIndex; varElemIndex.WriteToDevice(elementIndex); } else { varElemIndex = new CLCalc.Program.Variable(elementIndex); } //Edge remapping to build output edges = new int[edgeCoords.Length / 3]; for (int i = 0; i < edges.Length; i++) { edges[i] = -1; } #endregion #region Compile code and create kernels CLMarchingCubesSrc cmsrc = new CLMarchingCubesSrc(); CLCalc.Program.Compile(new string[] { cmsrc.definitions, cmsrc.src }); kernelInterpPts = new CLCalc.Program.Kernel("interpPts"); kernelPolygonize = new CLCalc.Program.Kernel("Polygonize"); kernelSmoothNormals = new CLCalc.Program.Kernel("SmoothNormals"); kernelPolygonizeNoNormals = new CLCalc.Program.Kernel("PolygonizeNoNormals"); #endregion } else { throw new Exception("OpenCL not available"); } }
private void frmCLInfo_Load(object sender, EventArgs e) { CLCalc.InitCL(ComputeDeviceTypes.All); if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { cmbPlat.Items.Add("OpenCL ERROR"); if (cmbPlat.Items.Count > 0) { cmbPlat.SelectedIndex = 0; } } else { foreach (ComputePlatform p in CLCalc.CLPlatforms) { cmbPlat.Items.Add(p.Name + " " + p.Profile + " " + p.Vendor + " " + p.Version); } if (cmbPlat.Items.Count > 0) { cmbPlat.SelectedIndex = 0; } int i = 0; foreach (ComputeDevice d in CLCalc.CLDevices) { //if (d.CLDeviceAvailable) //{ cmbDevices.Items.Add(d.Name + " " + d.Type + " " + d.Vendor + " " + d.Version); cmbCurDevice.Items.Add(d.Name + " " + d.Type + " " + d.Vendor + " " + d.Version); //} //else //{ // cmbDevices.Items.Add("NOT AVAILABLE: " + d.CLDeviceName + " " + d.CLDeviceType + " " + d.CLDeviceVendor + " " + d.CLDeviceVersion); // cmbCurDevice.Items.Add("NOT AVAILABLE: " + d.CLDeviceName + " " + d.CLDeviceType + " " + d.CLDeviceVendor + " " + d.CLDeviceVersion); //} i++; } if (cmbDevices.Items.Count > 0) { cmbDevices.SelectedIndex = 0; cmbCurDevice.SelectedIndex = CLCalc.Program.DefaultCQ; } } ReadImportantRegistryEntries(); //int[] n = new int[3] {1,1,1}; //int[] nn = new int[3]; //CLCalc.Program.Variable v = new CLCalc.Program.Variable(n); //v.WriteToDevice(n); //v.ReadFromDeviceTo(nn); string s = @" kernel void teste() {}"; CLCalc.Program.Compile(s); try { CLCalc.Program.Kernel k = new CLCalc.Program.Kernel("teste"); } catch { MessageBox.Show(""); } }
/// <summary>Initializes CL kernels</summary> public static void Init() { if (kernelCholeskyDiagBlock == null) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { if (kernelCholeskyDiagBlock == null) { SUBMATRIXSIZE = (int)Math.Sqrt((double)CLCalc.Program.CommQueues[CLCalc.Program.DefaultCQ].Device.MaxWorkGroupSize); SUBMATRIXSIZE = Math.Min(16, SUBMATRIXSIZE); string strSubSize = SUBMATRIXSIZE.ToString(); string strTotSize = (SUBMATRIXSIZE * (SUBMATRIXSIZE + 1) / 2).ToString(); LinalgSrc src = new LinalgSrc(); string srcBlockChol = src.srcBlockCholesky.Replace("CONSTSUBMATRIXSIZE", strSubSize).Replace("CONSTGLOBALSIZE", strTotSize); CLCalc.Program.Compile(new string[] { srcBlockChol, src.srcBkSubs, src.srcOperations, src.srcVecSum, src.srcpNorm, src.strFeasibFunc, src.srcLogistReg }); kernelCholeskyDiagBlock = new CLCalc.Program.Kernel("CholeskyDiagBlock"); kernelCholeskyComputePanel = new CLCalc.Program.Kernel("CholeskyComputePanel"); kernelCholeskyForwardProp = new CLCalc.Program.Kernel("CholeskyForwardProp"); kernelFwdUpperBackSubs = new CLCalc.Program.Kernel("FwdUpperBackSubs"); kernelBkLowerBackSubs = new CLCalc.Program.Kernel("BkLowerBackSubs"); kernelFwdPropag = new CLCalc.Program.Kernel("FwdPropag"); kernelFwdPropag2 = new CLCalc.Program.Kernel("FwdPropag2"); kernelBackPropag = new CLCalc.Program.Kernel("BackPropag"); kernelBackPropag2 = new CLCalc.Program.Kernel("BackPropag2"); kernelInPlaceSubtract = new CLCalc.Program.Kernel("InPlaceSubtract"); kernelElemWiseAbs = new CLCalc.Program.Kernel("ElemWiseAbs"); kernelInnerProd = new CLCalc.Program.Kernel("InnerProd"); //Linear algebra kernelSymMatrVecMultiply = new CLCalc.Program.Kernel("SymMatrVecMultiply"); kernelSymMatrMatrMultiply = new CLCalc.Program.Kernel("SymMatrMatrMultiply"); kernelComputeAtWA = new CLCalc.Program.Kernel("ComputeAtWA"); kernelComputeAinvHAt = new CLCalc.Program.Kernel("ComputeAinvHAt"); kernelRegularMatrTranspMatrProd = new CLCalc.Program.Kernel("RegularMatrTranspMatrProd"); kernelRegularMatrMatrProd = new CLCalc.Program.Kernel("RegularMatrMatrProd"); kernelCopyBuffer = new CLCalc.Program.Kernel("CopyBuffer"); kernelLinearComb = new CLCalc.Program.Kernel("LinearComb"); kernelMatrVecProd = new CLCalc.Program.Kernel("MatrVecProd"); kernelTranspMatrVecProdW = new CLCalc.Program.Kernel("TranspMatrVecProdW"); kernelMatrVecProdSumVec = new CLCalc.Program.Kernel("MatrVecProdSumVec"); kernelDiagVecProd = new CLCalc.Program.Kernel("DiagVecProd"); kernelDiagTranspMatProd = new CLCalc.Program.Kernel("DiagTranspMatProd"); kernelElemWiseProd = new CLCalc.Program.Kernel("ElemWiseProd"); kernelElemWiseInv = new CLCalc.Program.Kernel("ElemWiseInv"); kernelElemWiseInv2 = new CLCalc.Program.Kernel("ElemWiseInv2"); kernelClear = new CLCalc.Program.Kernel("ClearResps"); kernelPreSum = new CLCalc.Program.Kernel("PreSum"); kernelCoalLocalSum = new CLCalc.Program.Kernel("CoalLocalSum"); kernelHasPositiveEntry = new CLCalc.Program.Kernel("HasPositiveEntry"); //pNorm minimization floatOptimization.CurveFitting.kernelpNorm = new CLCalc.Program.Kernel("pNorm"); floatOptimization.CurveFitting.kerneldpNorm = new CLCalc.Program.Kernel("dpNorm"); //Logistic regression floatOptimization.LogisticRegression.kernelComputeLogistRegParams = new CLCalc.Program.Kernel("ComputeLogistRegParams"); floatOptimization.LogisticRegression.kernelpNorm = floatOptimization.CurveFitting.kernelpNorm; floatOptimization.LogisticRegression.kerneldpNorm = floatOptimization.CurveFitting.kerneldpNorm; //Feasibility floatOptimization.QuadraticProgramming.kernelgetLast = new CLCalc.Program.Kernel("getLast"); } } } }
static LaserLineTrack() { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } #region Source string src = @" constant int PIXELSTOSEARCH = PXtoSearch; const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates CLK_ADDRESS_CLAMP | //Clamp to zeros CLK_FILTER_NEAREST; //Don't interpolate __kernel void initIntM(__read_only image2d_t img, __global float * intM, __constant float * threshold) { int y = get_global_id(0); uint4 imgColor = read_imageui(img, smp, (int2)(0,y)); float intens = ((float)imgColor.x + (float)imgColor.y + (float)imgColor.z) * 0.001307189542f; // *1.0f/(3*255) intM[y] = intens > threshold[0] ? intens : 0.0f; } //penalties[0] = distancePenalty, penalties[1] = changePenalty __kernel void integrateM(__read_only image2d_t img, __global float * intM, __constant float * threshold, __constant int * x, __constant float * penalties) { int y = get_global_id(0); int H = get_global_size(0); uint4 imgColor = read_imageui(img, smp, (int2)(x[0],y)); float intens = ((float)imgColor.x + (float)imgColor.y + (float)imgColor.z) * 0.001307189542f; // *1.0f/(3*255) float maxv = -1e-8f; //intM[H*(x[0] - 1) + y] - penalties[0]; for (int k = -PIXELSTOSEARCH; k <= PIXELSTOSEARCH; k++) { if (y + k >= 0 && y + k < H) maxv = fmax(maxv, intM[H*(x[0] - 1) + y + k] - sqrt(1.0f + k * k) * penalties[0] - (k != 0 ? penalties[1] : 0.0f)); } intM[y + H*x[0]] = maxv + (intens > threshold[0] ? intens : 0.0f); } __kernel void incCounter(__global int * x) { x[0]++; } __kernel void backTrack( __global float * intM, __global int * path, __constant int * Dimensions) { int W = Dimensions[0]; int H = Dimensions[1]; int idMax = 0; float valMax = intM[H*(W - 1)]; //find most amplified path for (int y = 0; y < H; y++) { if (valMax < intM[H*(W - 1) + y]) { valMax = intM[H*(W - 1) + y]; idMax = y; } } path[W - 1] = idMax; for (int x = W - 2; x >= 0; x--) { int y = path[x + 1]; float maxv = -1e8f; int idv = -1; for (int k = -PIXELSTOSEARCH; k <= PIXELSTOSEARCH; k++) { if (y + k >= 0 && y + k < H) { if (intM[H*x + y + k] > maxv) { maxv = intM[H* x + y + k]; idv = y + k; } } } path[x] = idv; } } "; #endregion CLCalc.Program.Compile(src.Replace("PXtoSearch", PIXELSTOSEARCH.ToString())); CLthresh = new CLCalc.Program.Variable(new float[1]); CLx = new CLCalc.Program.Variable(new int[1]); CLDim = new CLCalc.Program.Variable(new int[2]); CLpenalties = new CLCalc.Program.Variable(new float[2]); kernelinitIntM = new CLCalc.Program.Kernel("initIntM"); kernelintM = new CLCalc.Program.Kernel("integrateM"); kernelBackTrack = new CLCalc.Program.Kernel("backTrack"); kernelincCounter = new CLCalc.Program.Kernel("incCounter"); }