Exemple #1
0
        /// <summary>Returns the matrix product M1*M2</summary>
        /// <param name="M1">First matrix</param>
        /// <param name="M2">Second matrix</param>
        public float[,] MultiplyLocals(float[,] M1, float[,] M2)
        {
            //M pxq, N qxr
            int p = M1.GetLength(0);
            int q = M1.GetLength(1);
            int r = M2.GetLength(1);

            if (q != M2.GetLength(0)) throw new Exception("Matrix dimensions do not match for multiplication");

            float[] vecM1 = MatrixToVector(M1, ref p, ref q);
            float[] vecM2 = MatrixToVector(M2, ref q, ref r);
            float[] vecResp = new float[p * r];

            CLCalc.Program.Variable varResp = new CLCalc.Program.Variable(vecResp);

            CLCalc.Program.Variable varM1 = new CLCalc.Program.Variable(vecM1);
            CLCalc.Program.Variable varM2 = new CLCalc.Program.Variable(vecM2);

            //Finaliza a soma dos elementos
            int[] vecQ = new int[1] { q };
            CLCalc.Program.Variable varQ = new CLCalc.Program.Variable(vecQ);
            CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[4] { varResp, varM1, varM2, varQ };
            int[] max = new int[2] { p, r };

            floatMatrixMultLocals.Execute(args, max, new int[] { 8, 8 });

            varResp.ReadFromDeviceTo(vecResp);

            varResp.Dispose();

            return VectorToMatrix(vecResp, ref p, ref r);
        }
Exemple #2
0
        static void Main()
        {
            //Initializes OpenCL Platforms and Devices and sets everything up
            CLCalc.InitCL();

            //Create vectors with 2000 numbers
            float[] v1 = new float[n], v2 = new float[n];

            var vResult = new float[n];

            //Creates population for v1 and v2
            for (int i = 0; i < n; i++)
            {
                v1[i] = (float)i / 10;
                v2[i] = -(float)i / 9;
            }

            //var prog = new ComputeProgram(CLCalc.Program.Context, "");

            //Compiles the source codes. The source is a string array because the user may want
            //to split the source into many strings.
            CLCalc.Program.Compile(new string[] { vecSum });

            //Gets host access to the OpenCL floatVectorSum kernel
            CLCalc.Program.Kernel VectorSum = new CLCalc.Program.Kernel("floatVectorSum");

            //Creates vectors v1 and v2 in the device memory
            CLCalc.Program.Variable varV1 = new CLCalc.Program.Variable(v1);
            CLCalc.Program.Variable varV2 = new CLCalc.Program.Variable(v2);

            //Arguments of VectorSum kernel
            CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { varV1, varV2 };

            //How many workers will there be? We need "n", one for each element
            int[] workers = new int[1] { n };

            sw.Start();
            //Execute the kernel
            for (int i = 0; i < count; i++) DoOCL(VectorSum, args, workers);
            sw.Stop();

            //Read device memory varV1 to host memory vResult
            varV1.ReadFromDeviceTo(vResult);

            Console.WriteLine("OpenCL: {0}", sw.ElapsedTicks);

            sw.Restart();
            for (int i = 0; i < count; i++) DoCPU(v1, v2, vResult);
            sw.Stop();

            Console.WriteLine("CPU: {0}", sw.ElapsedTicks);

            PressAny();
        }
Exemple #3
0
        /// <summary>ImageData constructor. Reads data from a bitmap</summary>
        /// <param name="bmp">Bitmap to read from</param>
        public ImageData(Bitmap bmp)
        {
            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL();

            width = bmp.Width;
            height = bmp.Height;

            //Allocates space for data
            Data = new byte[3 * width * height];

            //Reads bmp to local Data variable
            ReadToLocalData(bmp);

            //Transfer data to OpenCL device
            varData = new CLCalc.Program.Variable(Data);
        }
Exemple #4
0
        /// <summary>Computes the Discrete Fourier Transform of a double2 vector x whose length is a power of 16. 
        /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 16 (Length = 2*pow(16,n))</summary>
        public static double[] FFT16(double[] x)
        {
            if (CLx == null || CLx.OriginalVarLength != x.Length)
            {
                CLx = new CLCalc.Program.Variable(x);
                CLy = new CLCalc.Program.Variable(x);
            }

            //Writes original content
            CLx.WriteToDevice(x);

            CLy = FFT16(ref CLx);

            double[] y = new double[x.Length];
            CLy.ReadFromDeviceTo(y);
            return y;
        }
Exemple #5
0
        /// <summary>Computes the Discrete Fourier Transform of a double2 vector x whose length is a power of 16. 
        /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 16 (Length = 2*pow(16,n))</summary>
        public static CLCalc.Program.Variable FFT16(ref CLCalc.Program.Variable CLx)
        {
            if (CLy == null || CLy.OriginalVarLength != CLx.OriginalVarLength) CLy = new CLCalc.Program.Variable(new float[CLx.OriginalVarLength]);

            if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) return null;

            int nn = (int)Math.Log(CLx.OriginalVarLength >> 1, 16);
            nn = 1 << ((nn << 2) + 1);
            if (nn != CLx.OriginalVarLength) throw new Exception("Number of elements should be a power of 16 ( vector length should be 2*pow(16,n) )");

            if (kernelfft_radix16 == null)
            {
                InitKernels();
            }

            CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLx, CLy, CLp };
            CLCalc.Program.Variable[] args2 = new CLCalc.Program.Variable[] { CLy, CLx, CLp };
            bool usar2 = true;

            int[] p = new int[] { 1 };
            CLp.WriteToDevice(p);
            int n = CLx.OriginalVarLength >> 5;

            while (p[0] <= n)
            {
                usar2 = !usar2;
                if (usar2)
                    kernelfft_radix16.Execute(args2, n);
                else
                    kernelfft_radix16.Execute(args, n);

                p[0] = p[0] << 4;
                CLp.WriteToDevice(p);

            }

            if (usar2)
            {
                CLCalc.Program.Variable temp = CLx;
                CLx = CLy; CLy = temp;
            }

            return CLy;
        }
Exemple #6
0
        /// <summary>Applies given filter to the image</summary>
        /// <param name="imgDt">Image to be filtered</param>
        /// <param name="Filter">Filter. [3*size*size]</param>
        public static void ApplyFilter(ImageData imgDt, float[] Filter, bool useOpenCL, bool useWorkDim2)
        {
            int FilterSize = (int)Math.Sqrt(Filter.Length/3);

            if (Filter.Length != 3 * FilterSize * FilterSize)
                throw new Exception("Invalid filter");

            if (!Initialized && useOpenCL) Init(FilterSize);

            //Writes filter to device
            if(useOpenCL) varFilter.WriteToDevice(Filter);

            if (FilteredVals == null || FilteredVals.Length != imgDt.Height * imgDt.Width * 3)
            {
                //Filtered values
                FilteredVals = new float[imgDt.Height * imgDt.Width * 3];
                varFiltered = new CLCalc.Program.Variable(FilteredVals);
            }

            //Width
            if (useOpenCL) varWidth.WriteToDevice(new int[] { imgDt.Width });

            //Executes filtering
            int mean = (FilterSize - 1) / 2;
            if (useOpenCL)
            {
                CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { imgDt.varData, varFilter, varFiltered, varWidth };
                if (useWorkDim2)
                {
                    kernelApplyFilterWorkDim2.Execute(args, new int[] { imgDt.Width - FilterSize, imgDt.Height - FilterSize });
                }
                else
                {
                    kernelApplyFilter.Execute(args, new int[] { imgDt.Height - FilterSize });
                }
                //Reads data back
                varFiltered.ReadFromDeviceTo(FilteredVals);

            }
            else
            {
                ApplyFilter(imgDt.Data, Filter, FilteredVals, new int[] { imgDt.Width }, imgDt.Height - FilterSize);
            }

            //Writes to image data
            for (int y = mean; y < imgDt.Height - mean - 1; y++)
            {
                int wy = imgDt.Width * y;
                for (int x = mean; x < imgDt.Width - mean - 1; x++)
                {
                    int ind = 3 * (x + wy);
                    imgDt.Data[ind] = (byte)FilteredVals[ind];
                    imgDt.Data[ind + 1] = (byte)FilteredVals[ind + 1];
                    imgDt.Data[ind + 2] = (byte)FilteredVals[ind + 2];
                }
            }

            //Writes filtered values
            //In the future this rewriting can be avoided
            //because byte_addressable will be widely available
            if (useOpenCL) imgDt.varData.WriteToDevice(imgDt.Data);
        }
Exemple #7
0
        /// <summary>Initializes class</summary>
        private static void Init(int FilterSize)
        {
            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL();

            //Compiles source code
            CLCalc.Program.Compile((new CLFilterSrc()).src);

            //Creates kernel
            kernelApplyFilter = new CLCalc.Program.Kernel("ApplyFilter");
            kernelApplyFilterWorkDim2 = new CLCalc.Program.Kernel("ImgFilter");

            //Creates filter
            varFilter = new CLCalc.Program.Variable(new float[3 * FilterSize * FilterSize]);
            //Width
            varWidth = new CLCalc.Program.Variable(new int[1]);

            Initialized = true;
        }
Exemple #8
0
            /// <summary>New matrix constructor</summary>
            /// <param name="Vals">Matrix values</param>
            public floatMatrix(float[,] Vals)
            {
                nRows = Vals.GetLength(0);
                nCols = Vals.GetLength(1);
                Values = new float[nRows * nCols];

                if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
                {
                    CLValues = new CLCalc.Program.Variable(Values);
                    CLDim = new CLCalc.Program.Variable(new int[] { nRows, nCols });
                    CLCoef = new CLCalc.Program.Variable(new float[1]);
                }

                SetValues(Vals);
            }
        /// <summary>Constructor.</summary>
        public SparseLinalg()
        {
            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown)
            {
                try { CLCalc.InitCL(); }
                catch
                {
                }
            }

            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
            {
                //Creates control variables
                dprod = new float[SparseLinalg.GLOBALWORKSIZE];
                dotProd = new CLCalc.Program.Variable(dprod);
                dotProdSum = new CLCalc.Program.Variable(new float[1]);

                int[] i = new int[1];
                vLenBy4 = new CLCalc.Program.Variable(i);

                CLNonZeroElemsPerRow = new CLCalc.Program.Variable(new int[1]);
            }
        }
Exemple #10
0
 private static void InitKernels()
 {
     string s = new CLFFTSrc().s;
     CLCalc.InitCL();
     try
     {
         CLCalc.Program.Compile(s);
     }
     catch
     {
     }
     kernelfft_radix16 = new CLCalc.Program.Kernel("fft_radix16");
     kernelfft_radix4 = new CLCalc.Program.Kernel("fft_radix4");
     kernelConjugate = new CLCalc.Program.Kernel("Conjugate");
     CLp = new CLCalc.Program.Variable(new int[1]);
 }
Exemple #11
0
        /// <summary>Computes the inverse Discrete Fourier Transform of a float2 vector x whose length is a power of 4. 
        /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 4 (Length = 2*pow(4,n))</summary>
        public static float[] iFFT4(float[] x)
        {
            if (CLx == null || CLx.OriginalVarLength != x.Length)
            {
                CLx = new CLCalc.Program.Variable(x);
                CLy = new CLCalc.Program.Variable(x);
            }

            //Writes original content
            CLx.WriteToDevice(x);

            CLy = iFFT4(CLx);

            float[] y = new float[x.Length];
            CLy.ReadFromDeviceTo(y);

            return y;
        }
Exemple #12
0
        /// <summary>Constructor.</summary>
        /// <param name="InitialState">Initial state of system</param>
        /// <param name="StepSize">Desired step per integration pass</param>
        /// <param name="InitialIndepVarValue">Initial independent variable value</param>
        /// <param name="DerivativeCalculator">Function to calculate derivatives vector</param>
        public doubleODE46(double InitialIndepVarValue, double StepSize, double[] InitialState, DerivCalcDeleg DerivativeCalculator)
        {
            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown)
            {
                CLCalc.InitCL();
            }

            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.NotUsingCL)
                throw new Exception("OpenCL not available");

            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
            {
                ODE46Source Source = new ODE46Source();
                string[] s = new string[] { @"
                            #pragma OPENCL EXTENSION cl_khr_fp64 : enable
                            ", Source.doubleStep2, Source.doubleStep3, Source.doubleStep4, Source.doubleStep5, Source.doubleStep6, Source.doubleFinalizeCalc };
                CLCalc.Program.Compile(s);

                //Calculador de derivada
                Derivs = DerivativeCalculator;

                //Scalars
                double[] xx = new double[1] { InitialIndepVarValue };
                x = new CLCalc.Program.Variable(xx);
                xsav = new CLCalc.Program.Variable(xx);

                //Sets initial values to Device and local variables
                hdid = new CLCalc.Program.Variable(xx);
                currentX = InitialIndepVarValue;
                SetStep(StepSize);

                //Vectors
                yy = new double[InitialState.Length];
                for (int i = 0; i < InitialState.Length; i++) yy[i] = InitialState[i];

                ysav = new CLCalc.Program.Variable(yy);
                k1 = new CLCalc.Program.Variable(InitialState);
                k2 = new CLCalc.Program.Variable(InitialState);
                k3 = new CLCalc.Program.Variable(InitialState);
                k4 = new CLCalc.Program.Variable(InitialState);
                k5 = new CLCalc.Program.Variable(InitialState);
                k6 = new CLCalc.Program.Variable(InitialState);
                absError = new CLCalc.Program.Variable(new double[InitialState.Length]);

                y = new CLCalc.Program.Variable(yy);

                //Kernels
                KernelFinalizeCalc = new CLCalc.Program.Kernel("doubleFinalizeCalc");
                KernelUpdateX = new CLCalc.Program.Kernel("doubleUpdateX");
                KernelRK46YStep2 = new CLCalc.Program.Kernel("doubleYStep2");
                KernelRK46XStep2 = new CLCalc.Program.Kernel("doubleXStep2");
                KernelRK46YStep3 = new CLCalc.Program.Kernel("doubleYStep3");
                KernelRK46XStep3 = new CLCalc.Program.Kernel("doubleXStep3");
                KernelRK46YStep4 = new CLCalc.Program.Kernel("doubleYStep4");
                KernelRK46XStep4 = new CLCalc.Program.Kernel("doubleXStep4");
                KernelRK46YStep5 = new CLCalc.Program.Kernel("doubleYStep5");
                KernelRK46XStep5 = new CLCalc.Program.Kernel("doubleXStep5");
                KernelRK46YStep6 = new CLCalc.Program.Kernel("doubleYStep6");
                KernelRK46XStep6 = new CLCalc.Program.Kernel("doubleXStep6");

                //Kernel arguments
                ArgsFinalize = new CLCalc.Program.Variable[] { x, hdid, y, ysav, absError, k1, k2, k3, k4, k5, k6 };
                ArgsRK46Y = new CLCalc.Program.Variable[] { x, hdid, y, ysav, k1, k2, k3, k4, k5, k6 };
                ArgsRK46X = new CLCalc.Program.Variable[] { x, hdid, xsav };
                NStates = new int[1] { InitialState.Length };
                NScalar = new int[1] { 1 };

                //Data retrieving
                yerr = new double[NStates[0]];
                xRet = new double[NScalar[0]];

            }
        }
Exemple #13
0
            /// <summary>Creates vector from M elements sequentially</summary>
            /// <param name="symM">Symmetric matrix to use</param>
            public floatVector(floatSymPosDefMatrix symM)
            {
                this.CLValues = symM.CLValues;
                this.Values = symM.Values;

                //Since I'm probably going to modify the matrix, I want a new Cholesky factorization
                //if I ever call a LinearSolve
                symM.IsCholeskyFactorized = false;

                if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
                {
                    CLCoef = new CLCalc.Program.Variable(new float[1]);
                }
            }
Exemple #14
0
 /// <summary>OpenCL vector constructor</summary>
 /// <param name="Vals">Vector elements</param>
 public floatVector(float[] Vals)
 {
     this.Values = (float[])Vals.Clone();
     if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
     {
         CLValues = new CLCalc.Program.Variable(Values);
         CLCoef = new CLCalc.Program.Variable(new float[1]);
     }
 }
Exemple #15
0
            private void LocalInitCL()
            {
                if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL();

                if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
                {
                    CLoffSet = new CLCalc.Program.Variable(new int[1]);
                    CLValues = new CLCalc.Program.Variable(this.Values);

                    invL11 = new float[(SUBMATRIXSIZE * (SUBMATRIXSIZE + 1)) >> 1];
                    CLinvl11 = new CLCalc.Program.Variable(invL11);

                    int NMultiple = N;
                    if (N % SUBMATRIXSIZE != 0)
                    {
                        NMultiple = N / SUBMATRIXSIZE;
                        NMultiple = SUBMATRIXSIZE * (NMultiple + 1);
                        cholDec = new float[(NMultiple * (NMultiple + 1)) >> 1];
                        for (int i = 0; i < Values.Length; i++) cholDec[i] = Values[i];
                    }
                    else
                    {
                        cholDec = (float[])this.Values.Clone();
                    }

                    CLcholDec = new CLCalc.Program.Variable(cholDec);
                    CLprevVals = new CLCalc.Program.Variable(new float[N]);

                    CLb = new CLCalc.Program.Variable(new float[N]);
                    CLy = new CLCalc.Program.Variable(new float[N]);
                    CLn = new CLCalc.Program.Variable(new int[] { N });
                }
            }
Exemple #16
0
            /// <summary>Backsubstitutes to solve a linear system with a matrix right hand size</summary>
            private void LinsolveCLMatrix(floatMatrix M, ref floatMatrix resp)
            {
                //System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                //System.Diagnostics.Stopwatch sw1 = new System.Diagnostics.Stopwatch();
                //sw.Start();

                //number of RHS as multiple of SUBMATRIXSIZE
                int nRHSMult = M.Rows / SUBMATRIXSIZE;
                int nRHSleftOver = M.Rows - SUBMATRIXSIZE*nRHSMult;

                if (!UseOpenCLIfAvailable || CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL)
                {
                    linsolveMatrix(M, ref resp);
                    return;
                }

                //Copy elements to CLb
                if (CLb == null || CLb.OriginalVarLength < M.Values.Length)
                {
                    CLb = new CLCalc.Program.Variable(M.Values);
                    CLy = new CLCalc.Program.Variable(M.Values);
                }

                kernelCopyBuffer.Execute(new CLCalc.Program.MemoryObject[] { M.CLValues, CLb }, M.Values.Length);
                int nEqs = M.Rows;

                CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLcholDec, CLy, CLb, CLoffSet, CLn };
                int[] offset = new int[1];

                //DEBUG
                //float[] yDebug = new float[M.Values.Length];
                //float[] bDebug = new float[M.Values.Length];
                //this.CLcholDec.ReadFromDeviceTo(cholDec);

                //Forward substitution
                int i;
                for (i = 0; i < N; i += SUBMATRIXSIZE)
                {
                    offset[0] = i;
                    CLoffSet.WriteToDevice(offset);

                    int size = Math.Min(SUBMATRIXSIZE, N - i);
                    kernelFwdUpperBackSubs.Execute(args, new int[] { size, nEqs }, new int[] { size, 1 });

                    ////DEBUG
                    //CLy.ReadFromDeviceTo(yDebug);
                    //CLb.ReadFromDeviceTo(bDebug);

                    //sw1.Start();
                    //propagation
                    if (i + SUBMATRIXSIZE < N)
                    {
                        if (nRHSMult > 0) kernelFwdPropag.Execute(args, new int[] { N - i - SUBMATRIXSIZE, nRHSMult * SUBMATRIXSIZE }, new int[] { 1, SUBMATRIXSIZE });
                        if (nRHSleftOver > 0)
                            kernelFwdPropag2.Execute(args, new int[] { N - i - SUBMATRIXSIZE, nRHSleftOver }, new int[] { 1, nRHSleftOver }, new int[] { 0, nRHSMult * SUBMATRIXSIZE });
                    }
                    //OpenCLTemplate.CLCalc.Program.CommQueues[OpenCLTemplate.CLCalc.Program.DefaultCQ].Finish();
                    //sw1.Stop();

                    ////DEBUG
                    //CLy.ReadFromDeviceTo(yDebug);
                    //CLb.ReadFromDeviceTo(bDebug);
                }

                //Backward subst. Stores answer in CLb
                args = new CLCalc.Program.Variable[] { CLcholDec, CLb, CLy, CLoffSet, CLn };
                //Backward substitution
                for (i = N - SUBMATRIXSIZE; i >= 0; i -= SUBMATRIXSIZE)
                {
                    offset[0] = i;
                    CLoffSet.WriteToDevice(offset);

                    int size = SUBMATRIXSIZE;
                    kernelBkLowerBackSubs.Execute(args, new int[] { size, nEqs }, new int[] { size, 1 });

                    if (i > 0)
                    {
                        //Propagation using __local storage
                        if (nRHSMult > 0) kernelBackPropag.Execute(args, new int[] { i, nRHSMult * SUBMATRIXSIZE }, new int[] { 1, SUBMATRIXSIZE });

                        //leftovers (not multiples of SUBMATRIXSIZE)
                        if (nRHSleftOver > 0)
                            kernelBackPropag2.Execute(args, new int[] { i, nRHSleftOver }, new int[] { 1, nRHSleftOver }, new int[] { 0, nRHSMult * SUBMATRIXSIZE });

                    }

                }
                if (SUBMATRIXSIZE + i > 0)
                {
                    offset[0] = 0; CLoffSet.WriteToDevice(offset);
                    kernelBkLowerBackSubs.Execute(args, new int[] { SUBMATRIXSIZE + i, nEqs }, new int[] { SUBMATRIXSIZE + i, 1 });
                }

                kernelCopyBuffer.Execute(new CLCalc.Program.Variable[] { CLb, resp.CLValues }, resp.Values.Length);

                //OpenCLTemplate.CLCalc.Program.CommQueues[OpenCLTemplate.CLCalc.Program.DefaultCQ].Finish();
                //sw.Stop();
            }
Exemple #17
0
            private void linsolveCL(floatVector CLbb, ref floatVector resp)
            {
                if (!UseOpenCLIfAvailable || CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL)
                {
                    linsolve(CLbb.Values, ref resp);
                    return;
                }

                //int NMultiple = N;
                ////float[] bAugm;
                //if (N % SUBMATRIXSIZE != 0)
                //{
                //    NMultiple = N / SUBMATRIXSIZE;
                //    NMultiple = SUBMATRIXSIZE * (NMultiple + 1);
                //}
                ////bAugm = new float[NMultiple];
                ////for (int i = 0; i < bb.Length; i++) bAugm[i] = bb[i];

                if (resp == null) resp = new floatVector(new float[N]);

                //Copy elements to CLb
                if (CLb == null || CLb.OriginalVarLength < CLbb.Length) CLb = new CLCalc.Program.Variable(CLbb.Values);
                kernelCopyBuffer.Execute(new CLCalc.Program.MemoryObject[] { CLbb.CLValues, CLb }, CLbb.Length);

                CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLcholDec, CLy, CLb, CLoffSet, CLn };
                int[] offset = new int[1];

                //float[] yDebug = new float[N];
                //float[] bDebug = new float[N];

                //Forward substitution
                int i;
                for (i = 0; i < N; i += SUBMATRIXSIZE)
                {
                    offset[0] = i;
                    CLoffSet.WriteToDevice(offset);

                    int size = Math.Min(SUBMATRIXSIZE, N - i);
                    kernelFwdUpperBackSubs.Execute(args, new int[] { size }, new int[] { size });

                    ////DEBUG
                    //CLy.ReadFromDeviceTo(yDebug);
                    //CLb.ReadFromDeviceTo(bDebug);

                    //propagation
                    if (i + SUBMATRIXSIZE < N)
                    {
                        kernelFwdPropag.Execute(args, N - i - SUBMATRIXSIZE);
                    }

                    ////DEBUG
                    //CLy.ReadFromDeviceTo(yDebug);
                    //CLb.ReadFromDeviceTo(bDebug);
                    //CLcholDec.ReadFromDeviceTo(cholDec);
                }

                //Backward subst. Stores answer in CLb
                args = new CLCalc.Program.Variable[] { CLcholDec, CLb, CLy, CLoffSet, CLn };
                //Backward substitution
                for (i = N - SUBMATRIXSIZE; i >= 0; i -= SUBMATRIXSIZE)
                {
                    offset[0] = i;
                    CLoffSet.WriteToDevice(offset);

                    int size = SUBMATRIXSIZE;
                    kernelBkLowerBackSubs.Execute(args, new int[] { size }, new int[] { size });

                    ////DEBUG
                    //CLy.ReadFromDeviceTo(yDebug);
                    //CLb.ReadFromDeviceTo(bDebug);

                    if (i > 0)
                    {
                        kernelBackPropag.Execute(args, i);
                    }

                    //CLy.ReadFromDeviceTo(yDebug);
                    //CLb.ReadFromDeviceTo(bDebug);
                }
                if (SUBMATRIXSIZE + i > 0)
                {
                    offset[0] = 0; CLoffSet.WriteToDevice(offset);
                    kernelBkLowerBackSubs.Execute(args, new int[] { SUBMATRIXSIZE + i }, new int[] { SUBMATRIXSIZE + i });
                }
                //CLy.ReadFromDeviceTo(yDebug);
                //CLb.ReadFromDeviceTo(bDebug);

                kernelCopyBuffer.Execute(new CLCalc.Program.Variable[] { CLb, resp.CLValues }, N);
            }
Exemple #18
0
            /// <summary>Cholesky decomposition using OpenCL with Blocks</summary>
            public void CLBlockCholesky()
            {
                //If matrix dimension is not a multiple of SUBMATRIXSIZE
                //pad with zeros.

                int NMultiple = N;
                if (N % SUBMATRIXSIZE != 0)
                {
                    NMultiple = N / SUBMATRIXSIZE;
                    NMultiple = SUBMATRIXSIZE * (NMultiple + 1);
                }

                if (!IsMatrixInClMemoryUpdated)
                {
                    for (int i = 0; i < Values.Length; i++) cholDec[i] = Values[i];
                    CLcholDec.WriteToDevice(cholDec);
                }
                else
                {
                    kernelCopyBuffer.Execute(new CLCalc.Program.MemoryObject[] { CLValues, CLcholDec }, CLValues.OriginalVarLength);
                }

                int SubMatrixSize = SUBMATRIXSIZE;
                int GlobalSize;

                //Important. Set offset to zero
                CLoffSet.WriteToDevice(new int[] { 0 });
                CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLcholDec, CLoffSet, CLinvl11 };

                GlobalSize = (SubMatrixSize * (SubMatrixSize + 1)) >> 1;
                for (int i = 0; i < NMultiple; i += SubMatrixSize)
                {
                    //Computes Cholesky factor L11 and its inverse
                    kernelCholeskyDiagBlock.Execute(args, new int[] { GlobalSize }, new int[] { GlobalSize });

                    //CLcholDec.ReadFromDeviceTo(cholDec);

                    //Computes column panel L21
                    //Note: offSet has been updated, kernel should use its value-1

                    //Number of submatrices to update: (N-i)/SubMatrixSize
                    int nSubMatrices = (NMultiple - i) / SubMatrixSize - 1;

                    if (nSubMatrices > 0)
                    {
                        //Computes panels and updates main diagonals
                        kernelCholeskyComputePanel.Execute(args, new int[] { nSubMatrices * SubMatrixSize, SubMatrixSize }, new int[] { SubMatrixSize, SubMatrixSize });

                        //CLcholDec.ReadFromDeviceTo(cholDec);

                        //Still need to update nSubMatrices*(nSubMatrices-1)/2 full matrices in the Cholesky decomposition
                        //They start at indexes [i+SubMatrixSize i], and they are the offdiagonal block matrices

                        int totalSubMatricesToUpdate = ((nSubMatrices - 1) * nSubMatrices) >> 1;
                        if (totalSubMatricesToUpdate > 0)
                        {
                            kernelCholeskyForwardProp.Execute(args, new int[] { totalSubMatricesToUpdate * SubMatrixSize, SubMatrixSize }, new int[] { SubMatrixSize, SubMatrixSize });
                        }
                    }

                    //CLcholDec.ReadFromDeviceTo(cholDec);
                }

                //CLcholDec.ReadFromDeviceTo(cholDec);
                this.IsCholeskyFactorized = true;
            }
        /// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary>
        /// <param name="M">Matrix M</param>
        /// <param name="b">Vector b</param>
        /// <param name="tol">Error tolerance</param>
        /// <param name="x">Initial guess</param>
        public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol, ref CLImgVector x)
        {
            int n = b.Length;
            int nBy4 = 1 + ((n - 1) >> 2);

            if (lambda == null)
            {
                lambda = new float[1];
                CLlambda = new CLCalc.Program.Variable(lambda);
            }

            if (r == null || r.Length != n)
            {
                r = new CLImgVector(n);
                p = new CLImgVector(n);
                //x = new CLImgVector(n);
                Ap = new CLImgVector(n);
                temp = new CLImgVector(n);
            }
            if (temp == null) temp = new CLImgVector(n);

            if (x == null || x.Length != n) x = new CLImgVector(n);

            float alpha, beta, RDotROld, RDotR;

            //Initialization
            Multiply(M, x, Ap);

            CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector };
            kernelInitRP.Execute(args, nBy4);

            //Loop
            int count = 0;

            RDotR = DotProduct(r, r);

            while (count<1 || ((RDotR > tol) && (count < n*MAXITER)))
            {
                RDotROld = RDotR;

                //if ((count & 0x0080) == 0)
                //{
                //    Multiply(M, x, Ap);

                //    args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector };
                //    kernelInitRP.Execute(args, nBy4);
                //}

                Multiply(M, p, Ap);

                alpha = RDotROld / DotProduct(Ap, p);

                //Update x
                kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4);
                lambda[0] = alpha; CLlambda.WriteToDevice(lambda);
                kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4);

                //Update r
                kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4);
                lambda[0] = -alpha; CLlambda.WriteToDevice(lambda);
                kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4);

                RDotR = DotProduct(r, r);
                beta = RDotR / RDotROld;

                //Update p
                kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4);
                lambda[0] = beta; CLlambda.WriteToDevice(lambda);
                kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4);

                count++;
            }
        }
Exemple #20
0
                /// <summary>Calculates LU decomposition of M matrix</summary>
                /// <param name="M">Matrix to decompose</param>
                /// <param name="n">Matrix dimension</param>
                /// <param name="varindx">Swap index</param>
                private CLCalc.Program.Variable LUDecomp(double[,] M, int n, out CLCalc.Program.Variable varindx)
                {
                    //arguments and work_dim
                    CLCalc.Program.Variable[] args;
                    int[] max;

                    //Matrix to vector
                    double[] vecM = MatrixToVector(M, ref n, ref n);
                    CLCalc.Program.Variable varM = new Program.Variable(vecM);

                    //Scaling transformation
                    double[] vv = new double[n];
                    CLCalc.Program.Variable varvv = new Program.Variable(vv);
                    max = new int[1] { n };
                    args = new CLCalc.Program.Variable[] { varM, varvv };
                    doubleLUScale.Execute(args, max);

                    //In order LU factorization (Crout)
                    int[] J = new int[1] { 0 };
                    CLCalc.Program.Variable varJ = new Program.Variable(J);
                    int[] N = new int[1] { n };
                    CLCalc.Program.Variable varN = new Program.Variable(N);
                    int[] indx = new int[n];
                    varindx = new Program.Variable(indx);

                    args = new Program.Variable[] { varM, varJ, varN, varindx, varvv };
                    for (J[0] = 0; J[0] < n; J[0]++)
                    {
                        varJ.WriteToDevice(J);
                        max[0] = J[0];
                        doubleLUCalcBetas.Execute(args, max);

                        max[0] = n - J[0];
                        doubleLUCalcAlphas.Execute(args, max);

                        max[0] = 1;
                        doubleLUCalcPivo.Execute(args, max);

                        max[0] = n;
                        doubleLUTrocaCols.Execute(args, max);

                        if (J[0] != n - 1)
                        {
                            max[0] = n - J[0] - 1;
                            doubleLUDivByPivot.Execute(args, max);
                        }
                    }

                    return varM;
                }
Exemple #21
0
                /// <summary>Constructor.</summary>
                /// <param name="nMasses">Number of masses in the system</param>
                /// <param name="nConnections">Number of connections</param>
                /// <param name="Masses">Mass of each vertex</param>
                /// <param name="InitialStateSpace">Position and velocity of vertexes 
                /// [2*3*i] - posx, [2*(3*i+1)] - posy, [2*(3*i+2)] - posz, 
                /// [1+2*3*i] - velx, [1+2*(3*i+1)] - vely, [1+2*(3*i+2)] - velz</param>
                /// <param name="Origins">Origin vertex of connections. Spring connects Origin[i] to Dests[i]</param>
                /// <param name="Dests">Destination vertex of connections. Spring connects Origin[i] to Dests[i]</param>
                /// <param name="SpringKs">Spring constant for each connection</param>
                /// <param name="GroundKs">Spring constant for each mass, connecting to ground (nMass)</param>
                /// <param name="Damp">Structural damping (relative-speed dependant) (nConnections)</param>
                /// <param name="GroundDamp">Absolute damping proportional to speed relative to Earth (nMass)</param>
                public floatDEM(int nMasses, int nConnections,
                    float[] Masses, float[] InitialStateSpace,
                    int[] Origins, int[] Dests, float[] SpringKs, float[] Damp, float[] GroundKs, float[] GroundDamp)
                {
                    #region Consistency check
                    if (Masses.Length != nMasses)
                        throw new Exception("Invalid Masses length (!=nMasses)");
                    if (InitialStateSpace.Length != 6 * nMasses)
                        throw new Exception("Invalid positions length (!=6*nMasses - x, y, z)");
                    if (Origins.Length != nConnections)
                        throw new Exception("Invalid Origins length (!=nConnections)");
                    if (Dests.Length != nConnections)
                        throw new Exception("Invalid Dests length (!=nConnections)");
                    if (SpringKs.Length != nConnections)
                        throw new Exception("Invalid SpringKs length (!=nConnections)");
                    if (GroundKs.Length != nMasses)
                        throw new Exception("Invalid GroundKs length (!=nMasses)");
                    if (Damp.Length != nConnections)
                        throw new Exception("Invalid Damp length (!=nConnections)");
                    if (GroundDamp.Length != nMasses)
                        throw new Exception("Invalid GroundDamp length (!=nMasses)");
                    #endregion

                    if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown)
                    {
                        CLCalc.InitCL();
                    }

                    #region Variables reading
                    //Sizes
                    nConn = new int[1] { nConnections };
                    nM = new int[1] { nMasses };

                    //Inputs
                    m = new Program.Variable(Masses);

                    float[] InitialPositions = new float[3 * nMasses];
                    for (int i = 0; i < 3 * nMasses; i++) InitialPositions[i] = InitialStateSpace[2 * i];

                    posOrig = new Program.Variable(InitialPositions);
                    origs = new Program.Variable(Origins);
                    dests = new Program.Variable(Dests);
                    k = new Program.Variable(SpringKs);
                    kGround = new Program.Variable(GroundKs);
                    c = new Program.Variable(Damp);
                    cGround = new Program.Variable(GroundDamp);

                    //Outputs
                    L0 = new Program.Variable(new float[nConnections]);
                    forces = new Program.Variable(new float[3 * nMasses]);
                    connForces = new Program.Variable(new float[3 * nConnections]);
                    nConnec = new Program.Variable(new int[1] { nConnections });

                    int[] nodesConnects = new int[30 * nMasses];
                    for (int i = 0; i < nodesConnects.Length; i++) nodesConnects[i] = -1;

                    nodesConnections = new Program.Variable(nodesConnects);
                    #endregion

                    #region Kernels initialization
                    DEMSource Source = new DEMSource();
                    string[] s = new string[] { Source.floatcalcL0, Source.floatresetForces, Source.floatcalcForces, Source.floatderivs, Source.floatcalcGroundForces, Source.floatcalcNodesConnections };
                    CLCalc.Program.Compile(s);

                    KernelcalcL0 = new Program.Kernel("floatcalcL0");
                    argscalcL0 = new Program.Variable[] { posOrig, origs, dests, L0 };

                    KernelcalcNodesConnections = new Program.Kernel("floatcalcNodesConnections");
                    argscalcNodesConnections = new Program.Variable[] { nodesConnections, nConnec, origs, dests };

                    KernelresetForces = new Program.Kernel("floatresetForces");
                    argsresetForces = new Program.Variable[] { forces };

                    KernelcalcForces = new Program.Kernel("floatcalcForces");
                    KernelcalcGroundForces = new Program.Kernel("floatcalcGroundForces");

                    Kernelderivs = new Program.Kernel("floatderivs");

                    #endregion

                    // Initial lengths calculation
                    KernelcalcL0.Execute(argscalcL0, nConn);

                    //Connections calculation
                    KernelcalcNodesConnections.Execute(argscalcNodesConnections, nM);

                    //nodesConnections.ReadFromDeviceTo(nodesConnects);
                }
Exemple #22
0
            /// <summary>Sums the components of a vector using __local memory and coalesced access</summary>
            /// <param name="CLv">Vector whose components should be summed</param>
            public static float SumVectorElements(floatVector CLv)
            {
                float resp = 0;
                if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
                {
                    /*
                     The idea here is to create a reduction in which the access pattern to the vectors is coalesced.
                     The first step is to reduce the number of non-summed items to a multiple of NWORKITEMS and then coalesce the access
                     */

                    int LOCALWORKSIZE = Math.Min(256, (int)CLCalc.Program.CommQueues[CLCalc.Program.DefaultCQ].Device.MaxWorkGroupSize);
                    int NWORKITEMS = 16 * LOCALWORKSIZE;

                    int n = CLv.Length;
                    float[] resps = new float[NWORKITEMS];
                    if (CLv.CLresps == null)
                    {
                        CLv.CLresps = new CLCalc.Program.Variable(resps);
                        CLv.CLn = new CLCalc.Program.Variable(new int[1]);
                    }

                    CLv.CLn.WriteToDevice(new int[] { n });
                    CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLv.CLValues, CLv.CLresps, CLv.CLn };

                    //Write n = k*NWORKITEMS + p. Preprocess to eliminate p`s and leave summation only to a multiple of NWORKITEMS
                    int k = n / NWORKITEMS;
                    int p = n - k * NWORKITEMS;

                    //Clears partial responses
                    kernelClear.Execute(args, NWORKITEMS);

                    //Sums the p last elements into the p first elements
                    kernelPreSum.Execute(args, p);

                    //Use CLn to inform each work-item its workload. Each one will access and sum k numbers
                    CLv.CLn.WriteToDevice(new int[] { k });

                    kernelCoalLocalSum.Execute(args, new int[] { NWORKITEMS }, new int[] { LOCALWORKSIZE });

                    CLv.CLresps.ReadFromDeviceTo(resps);

                    //Serial part
                    int imax = NWORKITEMS / LOCALWORKSIZE;
                    for (int i = 0; i < imax; i++) resp += resps[i];

                }
                else
                {
                    double sum = 0;
                    for (int i = 0; i < CLv.Length; i++) sum += CLv.Values[i];
                    resp = (float)sum;
                }

                return resp;
            }
Exemple #23
0
        /// <summary>Calculates isosurface corresponding to a given isolevel</summary>
        /// <param name="isoLvl"></param>
        public void CalcIsoSurface(float isoLvl)
        {
            //Copies iso level to video memory
            if (isoLvl != isoLevel[0])
            {
                isoLevel[0] = isoLvl;
                varIsoLevel.WriteToDevice(isoLevel);
            }

            //Interpolation
            CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { CLFuncVals, varIsoLevel, varEdgeCoords, varInitVals, varStep };
            kernelInterpPts.Execute(args, max);

            if (ComputeNormals)
            {
                //Polygonization
                args = new CLCalc.Program.Variable[] { CLFuncVals, varIsoLevel, varEdgeCoords, varEdgePrelimNormals, varElemIndex };
                int[] GlobalWorkSize = new int[] { max[0] - 1, max[1] - 1, max[2] - 1 };
                kernelPolygonize.Execute(args, GlobalWorkSize);

                //Normal smoothing
                args = new CLCalc.Program.Variable[] { varEdgePrelimNormals, varEdgeNormals };
                kernelSmoothNormals.Execute(args, max);
            }
            else
            {
                //Polygonization
                args = new CLCalc.Program.Variable[] { CLFuncVals, varIsoLevel, varEdgeCoords, varElemIndex };
                int[] GlobalWorkSize = new int[] { max[0] - 1, max[1] - 1, max[2] - 1 };
                kernelPolygonizeNoNormals.Execute(args, GlobalWorkSize);
            }
        }
Exemple #24
0
                /// <summary>Initializes physics program. Components indexes: [i] - x, [i+1] - y, [i+2] - z</summary>
                /// <param name="nParticles">Number of particles</param>
                public floatBodyPhysics(int nParticles)
                {
                    string[] s = new string[] { CollisionAppliers, ForceAppliers, ConstAccelMotionEDOSolver };
                    Program.Compile(s);
                    //Kernels
                    MotionStep = new Program.Kernel("constAccelStep");
                    Kernel_ApplyGravity = new Program.Kernel("ApplyGravity");
                    Kernel_FloorCollision = new Program.Kernel("FloorCollision");
                    Kernel_SelfCollision = new Program.Kernel("SelfCollision");
                    Kernel_WallCollision = new Program.Kernel("WallCollision");
                    Kernel_ResetForces = new Program.Kernel("ResetForces");
                    Kernel_ResetCloseNeighbors = new Program.Kernel("ResetCloseNeighbors");

                    float[] t = new float[1] { 0 };
                    float[] gg = new float[3] { 0, 0, 0 };
                    step = new float[1] { 0 };
                    //Tamanho de alocacao de velocidades e posicoes
                    float[] aloc = new float[nParticles * 3];

                    //Tamanho de alocacao de caracteristicas das particulas
                    float[] alocPart = new float[nParticles];
                    //3*Nparticulas
                    CL_pos = new CLCalc.Program.Variable(aloc);
                    CL_vel = new CLCalc.Program.Variable(aloc);
                    CL_forces = new CLCalc.Program.Variable(aloc);
                    //Nparticulas
                    closeNeighbors = new int[nParticles];
                    CL_closeNeighbors = new CLCalc.Program.Variable(closeNeighbors);
                    for (int i = 0; i < nParticles; i++) alocPart[i] = 1f; //inicializa massas como 1 e tamanhos de colisao como 1
                    CL_masses = new CLCalc.Program.Variable(alocPart);
                    CL_collisionSizes = new CLCalc.Program.Variable(alocPart);

                    //escalares
                    CL_t = new CLCalc.Program.Variable(t);
                    CL_step = new CLCalc.Program.Variable(step);

                    //gravidade
                    CL_g = new CLCalc.Program.Variable(gg);

                    //Argumentos de funcoes
                    stepArgs = new CLCalc.Program.Variable[] { CL_t, CL_step, CL_forces, CL_masses, CL_pos, CL_vel };
                    applyGravArgs = new CLCalc.Program.Variable[] { CL_forces, CL_masses, CL_g };
                    floorCollisionArgs = new CLCalc.Program.Variable[] { CL_vel, CL_pos, CL_collisionSizes };
                    wallCollisionArgs = floorCollisionArgs;
                    selfCollisionArgs = new CLCalc.Program.Variable[] { CL_vel, CL_pos, CL_masses, CL_forces, CL_closeNeighbors, CL_collisionSizes };
                    resetForcesArgs = new Program.Variable[] { CL_forces };
                    resetCloseNeighborsArgs = new Program.Variable[] { CL_closeNeighbors };

                    nArgs = new int[1] { nParticles * 3 };
                    nPartics = new int[1] { nParticles };
                    nPartics2 = new int[2] { nParticles, nParticles };
                }
Exemple #25
0
        /// <summary>Computes the inverse Discrete Fourier Transform of a float2 vector x whose length is a power of 4. 
        /// x = { Re[x0] Im[x0] Re[x1] Im[x1] ... Re[xn] Im[xn] }, n = power of 4 (Length = 2*pow(4,n))</summary>
        public static CLCalc.Program.Variable iFFT4(CLCalc.Program.Variable CLx)
        {
            if (CLScale == null) CLScale = new CLCalc.Program.Variable(new float[1]);

            //Trick: DFT-1 (x) = DFT(x*)*/N;
            //Conjugate
            float[] vx = new float[CLx.OriginalVarLength];
            CLx.ReadFromDeviceTo(vx);

            float[] scale = new float[] { 1 };
            CLScale.WriteToDevice(scale);
            kernelConjugate.Execute(new CLCalc.Program.Variable[] { CLx, CLScale }, CLx.OriginalVarLength >> 1);
            CLx.ReadFromDeviceTo(vx);

            CLy = FFT4(ref CLx);

            scale[0] = 1 / (float)(CLx.OriginalVarLength >> 1);
            CLScale.WriteToDevice(scale);
            kernelConjugate.Execute(new CLCalc.Program.Variable[] { CLy, CLScale }, CLy.OriginalVarLength >> 1);
            return CLy;
        }
Exemple #26
0
 /// <summary>Creates the diagonal matrix D(v) with elements associated to those of vector v. Uses the same objects.</summary>
 /// <param name="v">Reference vector</param>
 public floatDiag(floatVector v)
 {
     nRows = v.Length;
     nCols = v.Length;
     this.Values = v.Values;
     this.CLValues = v.CLValues;
 }
Exemple #27
0
            /// <summary>Computes transpose(A)*A and transpose(A)*b weighted by W using OpenCL. Lambda is regularization term</summary>
            private static floatSymPosDefMatrix AuxLSAtACL(floatMatrix A, floatDiag W, floatVector lambda, ref floatSymPosDefMatrix AtA)
            {
                if (AtA == null || AtA.CLValues.OriginalVarLength != (A.Cols * (A.Cols + 1)) >> 1)
                {
                    AtA = new floatSymPosDefMatrix(new float[(A.Cols * (A.Cols + 1)) >> 1]);
                }

                CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { A.CLValues, A.CLDim, W.CLValues, AtA.CLValues, lambda.CLValues };
                kernelComputeAtWA.Execute(args, AtA.CLValues.OriginalVarLength);

                //Just modified values in CL memory, matrix is no longer Cholesky factorized
                AtA.IsCholeskyFactorized = false;

                return AtA;
            }
        public OpenCLTest()
        {
            CLCalc.InitCL();
            var buildlogs = new List<string>();
            CLCalc.Program.Compile(new[] { run }, out buildlogs);
            Random r = new Random();

            float[] sample = Enumerable.Range(0, 768).Select(x => (float)r.NextDouble() * 2 - 1).ToArray();
            float[] hlW = Enumerable.Range(0, 10000 * 769).Select(x => (float)r.NextDouble() * 2 - 1).ToArray();
            float[] olW = Enumerable.Range(0, 10001 * 10).Select(x => (float)r.NextDouble() * 2 - 1).ToArray();
            float[] hlDest = new float[10000];
            float[] olDest = new float[10];

            //float[] sample = new[] { .3f, .7f };

            //float[] hlW = new[] { .4f, .4f, .1f, .5f, .5f, .7f };

            //float[] olW = new[] { .4f, .6f, .8f };

            //float[] hlDest = new[] { 0f, 0f };

            //float[] olDest = new[] { 0f };
            var sw = Stopwatch.StartNew();
            var sw2 = Stopwatch.StartNew();
            var gSample = sample.ToGraphics();
            var ghlW = hlW.ToGraphics();
            var golW = olW.ToGraphics();
            var ghlDest = hlDest.ToGraphics();
            var golDest = olDest.ToGraphics();
            sw2.Stop();

            var kernel = new CLCalc.Program.Kernel("runNN");

            var args = new[] {gSample, new CLCalc.Program.Variable(new[] { sample.Length}), ghlDest,
               new CLCalc.Program.Variable(new[] { hlDest.Length}), ghlW};

            kernel.Execute(args, hlDest.Length);

            args = new CLCalc.Program.Variable[5];
            args[0] = ghlDest;
            args[1] = new CLCalc.Program.Variable(new[] { hlDest.Length });
            args[2] = golDest;
            args[3] = new CLCalc.Program.Variable(new[] { olDest.Length });
            args[4] = golW;

            kernel.Execute(args, olDest.Length);

            sw2.Start();
            ghlDest.ReadFromDeviceTo(hlDest);
            golDest.ReadFromDeviceTo(olDest);
            sw2.Stop();
            sw.Stop();

            Console.WriteLine("hlDest\n" + string.Join("\n", hlDest));
            Console.WriteLine("olDest\n" + string.Join("\n", olDest));

            Console.WriteLine("Total {0}ms", sw.ElapsedMilliseconds);
            Console.WriteLine("Memory {0}ms", sw2.ElapsedMilliseconds);
        }
Exemple #29
0
        /// <summary>Creates a new isosurface calculator. You may pass variables created from a OpenGL context to the CL variables if you are using interop or NULL
        /// if not using OpenCL/GL interop.</summary>
        /// <param name="FuncValues">Values of the evaluated 3D function f(x,y,z). FuncValues=float[maxX,maxY,maxZ]</param>
        /// <param name="CLEdgeCoords">OpenCL variable (float) to hold edge coordinates. Dimension has to be 9 * maxX * maxY * maxZ</param>
        /// <param name="CLEdgeNormals">OpenCL variable (float) to hold edge normals. Dimension has to be 9 * maxX * maxY * maxZ</param>
        /// <param name="CLElementArrayIndex">OpenCL variable (int) to hold element array index. Dimension has to be 5 * 3 * (maxX - 1) * (maxY - 1) * (maxZ - 1)</param>
        private void InitMarchingCubes(float[, ,] FuncValues, CLCalc.Program.Variable CLEdgeCoords, CLCalc.Program.Variable CLEdgeNormals, CLCalc.Program.Variable CLElementArrayIndex)
        {
            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) CLCalc.InitCL();

            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
            {
                //Reads maximum lengths
                int maxX = FuncValues.GetLength(0);
                int maxY = FuncValues.GetLength(1);
                int maxZ = FuncValues.GetLength(2);
                max = new int[] { maxX, maxY, maxZ };

                #region Creating variables

                //Isolevel
                isoLevel = new float[1] { 1.32746E-5f };
                varIsoLevel = new CLCalc.Program.Variable(isoLevel);

                //Step size and x0,y0,z0
                varStep = new CLCalc.Program.Variable(step);
                varInitVals = new CLCalc.Program.Variable(initVals);

                //Create and copy function values
                funcVals = new float[maxX * maxY * maxZ];
                CLFuncVals = new CLCalc.Program.Variable(funcVals);
                SetFuncVals(FuncValues);

                //Edge coordinates - 3 coords * 3 possible directions * number of points
                edgeCoords = new float[9 * maxX * maxY * maxZ];
                if (CLEdgeCoords != null)
                {
                    varEdgeCoords = CLEdgeCoords;
                    varEdgeCoords.WriteToDevice(edgeCoords);
                }
                else varEdgeCoords = new CLCalc.Program.Variable(edgeCoords);

                //4 preliminary normals per edge - has to be averaged afterwards
                edgePrelimNormals = new float[36 * maxX * maxY * maxZ];
                varEdgePrelimNormals = new CLCalc.Program.Variable(edgePrelimNormals);

                //Edge normals
                edgeNormals = new float[9 * maxX * maxY * maxZ];
                if (CLEdgeNormals != null)
                {
                    varEdgeNormals = CLEdgeNormals;
                    varEdgeNormals.WriteToDevice(edgeNormals);
                }
                else varEdgeNormals = new CLCalc.Program.Variable(edgeNormals);

                //Number of cubes: (maxX-1)*(maxY-1)*(maxZ-1)
                //Marching cube algorithm: each cube can have 5 triangles drawn, 3 vertexes per triangle
                //q-th vertex of p-th triangle of the ijk-th cube: [(5*(i+(maxX-1)*j+k*(maxX-1)*(maxY-1))+p)*3+q]
                elementIndex = new int[5 * 3 * (maxX - 1) * (maxY - 1) * (maxZ - 1)];
                if (CLElementArrayIndex != null)
                {
                    varElemIndex = CLElementArrayIndex;
                    varElemIndex.WriteToDevice(elementIndex);
                }
                else varElemIndex = new CLCalc.Program.Variable(elementIndex);

                //Edge remapping to build output
                edges = new int[edgeCoords.Length / 3];
                for (int i = 0; i < edges.Length; i++) edges[i] = -1;

                #endregion

                #region Compile code and create kernels

                CLMarchingCubesSrc cmsrc = new CLMarchingCubesSrc();

                CLCalc.Program.Compile(new string[] { cmsrc.definitions, cmsrc.src });
                kernelInterpPts = new CLCalc.Program.Kernel("interpPts");
                kernelPolygonize = new CLCalc.Program.Kernel("Polygonize");
                kernelSmoothNormals = new CLCalc.Program.Kernel("SmoothNormals");
                kernelPolygonizeNoNormals = new CLCalc.Program.Kernel("PolygonizeNoNormals");
                #endregion
            }
            else throw new Exception("OpenCL not available");
        }
Exemple #30
0
 /// <summary>OpenCL diagonal matrix constructor</summary>
 /// <param name="Vals">Main diagonal elements</param>
 public floatDiag(float[] Vals)
 {
     this.Values = (float[])Vals.Clone();
     if (UseOpenCLIfAvailable && CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
     {
         CLValues = new CLCalc.Program.Variable(Values);
     }
     nRows = Vals.Length;
     nCols = Vals.Length;
 }