Exemple #1
0
        static string TestCublasHandle()
        {
            CublasOp o = CublasClr.CublasOp.N;

            string testName  = "TestCublasHandle";
            var    cuby      = new CublasClr.Cublas();
            IntPtr devHandle = new IntPtr();
            var    aa        = new CudaArray();

            try
            {
                var res = aa.ResetDevice();
                res = res + cuby.MakeCublasHandle(ref devHandle);
                res = res + cuby.DestroyCublasHandle(devHandle);
                if (res != String.Empty)
                {
                    return(testName + " fail: " + res);
                }
                return(testName + " pass");
            }
            catch
            {
                return(testName + " fail");
            }
            finally
            {
                //aa.ReleaseDevicePtr(devData);
                aa.ResetDevice();
            }
        }
Exemple #2
0
        static string TestcublasSgemm2()
        {
            string    testName = "TestcublasSgemm2";
            uint      aw       = 2;
            uint      bh       = aw;
            uint      ah       = 3;
            uint      bw       = 3;
            uint      ch       = ah;
            uint      cw       = bw;
            GpuMatrix gpuA;
            GpuMatrix gpuB;
            GpuMatrix gpuC;

            var dataA = MatrixUtils.AA();
            var dataB = MatrixUtils.BB();
            var cRes  = new float[ch * cw];

            var cuby = new CublasClr.Cublas();
            var aa   = new CudaArray();

            var res = aa.ResetDevice();

            res = res + GpuMatrixOps.SetupGpuMatrix(
                out gpuA,
                new Matrix <float>(_rows: ch, _cols: cw,
                                   host_data: ImmutableArray.Create(dataA),
                                   matrixFormat: MatrixFormat.Column_Major));

            res = res + GpuMatrixOps.SetupGpuMatrix(
                out gpuB,
                new Matrix <float>(_rows: bh, _cols: bw,
                                   host_data: ImmutableArray.Create(dataB),
                                   matrixFormat: MatrixFormat.Column_Major));

            res = res + GpuMatrixOps.SetupGpuMatrix(
                out gpuC,
                new Matrix <float>(_rows: ch, _cols: cw,
                                   host_data: ImmutableArray.Create(cRes),
                                   matrixFormat: MatrixFormat.Column_Major));


            IntPtr cublasHandle = new IntPtr();

            res = res + cuby.MakeCublasHandle(ref cublasHandle);

            GpuMatrix gpuProd;

            res = res + GpuMatrixOps.Multiply(
                gmOut: out gpuProd,
                cublasHandle: cublasHandle,
                gmA: gpuA, gmB: gpuB, gmC: gpuC);

            GpuMatrix gpuSynched;

            res = res + GpuMatrixOps.CopyToHost(out gpuSynched, gpuProd);

            // GpuMatrixUtils.MatrixMult(C: cRes, A: dataA, B: dataB, wA: aw, hA: ah, wB: bw);

            return(string.Empty);
        }
Exemple #3
0
        public static string CopyToDevice(out GpuMatrix gmOut, GpuMatrix gmIn)
        {
            if (gmIn.DevHostState == DevHostState.DeviceNotAllocated)
            {
                gmOut = null;
                return("Device data pointer not allocated");
            }

            var aa     = new CudaArray();
            var strRet = aa.CopyFloatsToDevice(
                gmIn.Matrix.Data.ToArray(),
                gmIn.DevPtr, (uint)gmIn.Matrix.Data.Length);

            if (!String.IsNullOrEmpty(strRet))
            {
                gmOut = null;
                return(strRet);
            }

            gmOut = new GpuMatrix(
                matrix: gmIn.Matrix,
                devPtr: gmIn.DevPtr,
                devHostState: DevHostState.Synched);

            return(String.Empty);
        }
Exemple #4
0
        public static string Init(int[] inputs, uint span, uint blockSize)
        {
            _span       = span;
            _block_size = blockSize;

            _area            = _span * _span;
            _blocks_per_span = span / blockSize;
            _blockCount      = _blocks_per_span * _blocks_per_span;

            d_indexRands = new IntPtr();
            d_grid       = new IntPtr();

            _cudaArray  = new CudaArray();
            _gridProcs  = new GridProcs();
            _randoProcs = new RandoProcs();

            var strRet = _cudaArray.ResetDevice();

            strRet = strRet + _randoProcs.MakeGenerator32(SEED);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_grid, _area);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_energy, _area);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_energyBlocks, _area / 1024);
            strRet = strRet + _cudaArray.CopyIntsToDevice(inputs, d_grid, _area);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_indexRands, _blockCount);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_tempRands, _blockCount);
            strRet = strRet + _gridProcs.Runk_Energy4(d_energy, d_grid, _span);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_betas, 5);

            return(strRet);
        }
Exemple #5
0
        public static string CopyToHost(out GpuMatrix gmOut, GpuMatrix gmIn)
        {
            if (gmIn.DevHostState == DevHostState.DeviceNotAllocated)
            {
                gmOut = null;
                return("Device data pointer not allocated");
            }

            var hostData = new float[gmIn.Matrix.Data.Length];
            var aa       = new CudaArray();

            var strRet = aa.CopyFloatsFromDevice(hostData,
                                                 gmIn.DevPtr, (uint)gmIn.Matrix.Data.Length);

            if (!String.IsNullOrEmpty(strRet))
            {
                gmOut = null;
                return(strRet);
            }

            gmOut = new GpuMatrix(
                matrix: new Matrix <float>(_rows: gmIn.Matrix.Rows,
                                           _cols: gmIn.Matrix.Cols,
                                           host_data: ImmutableArray.Create(hostData),
                                           matrixFormat: MatrixFormat.Column_Major),
                devPtr: gmIn.DevPtr,
                devHostState: DevHostState.Synched);

            return(String.Empty);
        }
Exemple #6
0
        public static string Init(int[] inputs, uint span)
        {
            // init libs
            _cudaArray  = new CudaArray();
            _gridProcs  = new GridProcs();
            _randoProcs = new RandoProcs();

            // Set grid sizes
            _span = span;
            _area = _span * _span;

            // Set block and thread sizes
            _blockSize = (_span < MAXTHREADS) ? _span : MAXTHREADS;
            _gridSize  = _area / _blockSize;

            // Set memory sizes
            _mem_N    = sizeof(int) * (_area);
            _mem_rand = sizeof(double) * (3 * _area);
            _mem_1    = sizeof(int) * (1);
            _mem_measured_quantity = sizeof(int) * (_gridSize);
            _mem_measured_magnet   = sizeof(int) * (_gridSize);

            // Allocate device arrays
            d_rands = new IntPtr();
            d_grid  = new IntPtr();

            var strRet = _cudaArray.ResetDevice();

            strRet = strRet + _randoProcs.MakeGenerator64(SEED);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_grid, _area);
            strRet = strRet + _cudaArray.CopyIntsToDevice(inputs, d_grid, _area);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_rands, _area);

            return(strRet);
        }
Exemple #7
0
        public static string Init(int[] inputs, uint span)
        {
            _span = span;
            _area = _span * _span;

            d_rands        = new IntPtr();
            d_gridA        = new IntPtr();
            d_gridB        = new IntPtr();
            d_energy       = new IntPtr();
            d_energyBlocks = new IntPtr();

            _cudaArray  = new CudaArray();
            _gridProcs  = new GridProcs();
            _randoProcs = new RandoProcs();

            var strRet = _cudaArray.ResetDevice();

            strRet = strRet + _randoProcs.MakeGenerator64(SEED);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_gridA, _area);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_energy, _area);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_energyBlocks, _area / 1024);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_gridB, _area);
            strRet = strRet + _cudaArray.CopyIntsToDevice(inputs, d_gridA, _area);
            strRet = strRet + _cudaArray.CopyIntsToDevice(inputs, d_gridB, _area);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_rands, _area);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_betas, 10);

            return(strRet);
        }
        public void Initialize()
        {
            if (NumAttributeAxes > GPUConstants.MaxAttributeAxes || NumCategoricalAxes > GPUConstants.MaxCategoricalAxes)
            {
                throw new InvalidOperationException("Too attribute axes");
            }
            ContextBuffer   = new CudaArray <GPUDecisionLearnerContext>(1);
            DataPointBuffer = DataPointsToGpu(DataPoints);
            DataPointIds    = new CudaArray <int>(DataPoints.Count);
            Nodes           = new CudaArray <GPUNode>(GPUConstants.MaxTotalNodes);
            OpenNodeIds     = new CudaArray <int>(GPUConstants.MaxNodesAtSingleLevel);
            NextOpenNodeIds = new CudaArray <int>(GPUConstants.MaxNodesAtSingleLevel);

            Kernels = new KernelManager(CudaManager)
            {
                BlockSize       = GPUConstants.NumThreadsPerBlock,
                PrefixArguments = new[] { ContextBuffer },
            };

            Kernels["dlInitContext"].Arguments(
                DataPointBuffer,
                DataPoints.Count,
                TotalWeight,
                NumAttributeAxes,
                NumCategoricalAxes,
                DataPointIds,
                Nodes,
                OpenNodeIds,
                NextOpenNodeIds,
                (int)OpenNodeIds.Size).ExecuteTask();
        }
        public void ApplyOptimalSplit(CudaArray <GPUSplit> bestSplits)
        {
            Kernels["spcApplyOptimalSplit"]
            .Arguments(_dataPointsPerAxis, bestSplits, _sortKeys)
            .Execute(GPUConstants.NumThreadsPerBlock * NumSmallBlocks);

#if DEBUGCUDA
            SanityCheckSplit(bestSplits);
#endif
        }
        public int FindOptimalSplit(CudaArray <GPUSplit> bestSplits, int writeSplitsStart)
        {
            Kernels["spcCopyDataPointsPerAxis"]
            .Arguments(_dataPointsPerAxis)
            .Execute(NumSmallBlocks * GPUConstants.NumThreadsPerBlock, GPUConstants.NumThreadsPerBlock);

            Kernels["spcBestCategoricalSplitPerAxis"]
            .Arguments(_dataPointsPerAxis, bestSplits, writeSplitsStart)
            .Execute(NumSmallBlocks * GPUConstants.NumThreadsPerBlock, GPUConstants.NumThreadsPerBlock);
            return(Context.NumCategoricalAxes);
        }
Exemple #11
0
        static void Main(string[] args)
        {
            var aa  = new CudaArray();
            var res = aa.TestRuntimeErr();

            res = aa.TestCudaStatusErr();

            Console.WriteLine(TestCopyIntsToDevice());
            Console.WriteLine(TestCopyFloatsToDevice());
            Console.WriteLine(TestCopyIntsDeviceToDevice());
            Console.WriteLine(TestCopyFloatsDeviceToDevice());
        }
Exemple #12
0
        public override void ApplyPalette(Palette palette)
        {
            context.Synchronize();
            mainStream.Synchronize();

            if (palette.Width <= 0 || palette.Height <= 0)
            {
                throw new ArgumentException("palette may not be empty.");
            }

            paletteImage.Free();

            paletteImage = CudaArray.Allocate(palette.Width, palette.Height, CudaArrayFormat.Float, 4);
            HostBuffer2D <Vec4> hostPaletteBuffer = HostBuffer2D <Vec4> .Alloc(palette.Width, palette.Height);

            Color col;

            Vec4 colorVec;

            for (int y = 0; y < palette.Height; y++)
            {
                for (int x = 0; x < palette.Width; x++)
                {
                    col      = palette.GetPixel(x, y);
                    colorVec = new Vec4(
                        (float)col.R / 255.0f,
                        (float)col.G / 255.0f,
                        (float)col.B / 255.0f,
                        1.0f);

                    hostPaletteBuffer[y, x] = colorVec;
                }
            }

            CudaMem.Copy(hostPaletteBuffer, paletteImage);
            hostPaletteBuffer.Free();

            paletteTex.Array = paletteImage;
            paletteTex.SetFormat(CudaArrayFormat.Float, 4);
            paletteTex.AddressModeX = TexAddressMode.Clamp;
            paletteTex.AddressModeY = TexAddressMode.Clamp;
            paletteTex.FilterMode   = TexFilterMode.Linear;
            paletteTex.Flags        = TexFlags.NormalizedCoordinates;

            iterateKernel.SetTexRef(paletteTex);
            context.Synchronize();
        }
Exemple #13
0
        public static string AllocateOnDevice(out GpuMatrix gmOut, GpuMatrix gmIn)
        {
            IntPtr devData = new IntPtr();

            var aa     = new CudaArray();
            var strRet = aa.MallocFloatsOnDevice(ref devData, (uint)gmIn.Matrix.Data.Length);

            if (!String.IsNullOrEmpty(strRet))
            {
                gmOut = null;
                return(strRet);
            }
            gmOut = new GpuMatrix(
                matrix: gmIn.Matrix,
                devPtr: devData,
                devHostState: DevHostState.HostIsNewer);

            return(String.Empty);
        }
Exemple #14
0
        static string TestCopyIntsDeviceToDevice()
        {
            string testName = "TestCopyIntsDeviceToDevice";
            uint   arrayLen = 1000;
            var    alist    = Enumerable.Range(4, (int)arrayLen).ToArray();
            var    aa       = new CudaArray();
            IntPtr devDataA = new System.IntPtr();
            IntPtr devDataB = new System.IntPtr();
            var    retlist  = new int[(int)arrayLen];

            try
            {
                var res = aa.ResetDevice();
                res = res + aa.MallocIntsOnDevice(ref devDataA, arrayLen);
                res = res + aa.MallocIntsOnDevice(ref devDataB, arrayLen);
                res = res + aa.CopyIntsToDevice(alist, devDataA, arrayLen);
                res = res + aa.CopyIntsDeviceToDevice(devDataB, devDataA, arrayLen);
                res = res + aa.CopyIntsFromDevice(retlist, devDataB, arrayLen);
                res = res + aa.ReleaseDevicePtr(devDataA);
                res = res + aa.ReleaseDevicePtr(devDataB);

                if (!alist.SequenceEqual(retlist))
                {
                    return(testName + " fail: sequences do not match");
                }

                if (res != String.Empty)
                {
                    return(testName + " fail: " + res);
                }
                return(testName + " pass");
            }
            catch (Exception ex)
            {
                return(testName + " exception " + ex.Message);
            }
            finally
            {
                aa.ReleaseDevicePtr(devDataA);
                aa.ReleaseDevicePtr(devDataB);
                aa.ResetDevice();
            }
        }
Exemple #15
0
        public static string Init(int[] inputs, uint span)
        {
            _span      = span;
            _area      = _span * _span;
            _backwards = false;

            d_In  = new IntPtr();
            d_Out = new IntPtr();

            _cudaArray = new CudaArray();
            _gridProcs = new GridProcs();

            var res = _cudaArray.MallocIntsOnDevice(ref d_In, _area);

            res = res + _cudaArray.CopyIntsToDevice(inputs, d_In, _area);
            res = res + _cudaArray.MallocIntsOnDevice(ref d_Out, _area);

            return(res);
        }
        private void SanityCheckSplit(CudaArray <GPUSplit> bestSplits)
        {
            GPUSplit[] bestSplitsArray        = bestSplits.Read();
            GPUDecisionLearnerContext context = Context.ContextBuffer.Read()[0];

            int[] dataPointIds = Context.DataPointIds.Read();
            for (int openNodeIndex = 0; openNodeIndex < context.NumOpenNodes; ++openNodeIndex)
            {
                int nodeId = Context.OpenNodeIds[openNodeIndex];
                if (nodeId == -1)
                {
                    continue;
                }

                GPUSplit bestSplit = bestSplitsArray[openNodeIndex];
                if (bestSplit.SplitType != GPUConstants.SplitType_Categorical)
                {
                    continue;
                }

                GPUNode parentNode = Context.Nodes[nodeId];
                GPUNode leftNode   = Context.Nodes[parentNode.LeftChild];
                GPUNode rightNode  = Context.Nodes[parentNode.RightChild];
                Assert.AreEqual(parentNode.RangeStart, leftNode.RangeStart);
                Assert.AreEqual(leftNode.RangeStart + leftNode.RangeLength, rightNode.RangeStart);
                Assert.AreEqual(parentNode.RangeLength, leftNode.RangeLength + rightNode.RangeLength);

                for (int i = 0; i < parentNode.RangeLength; ++i)
                {
                    int        index     = parentNode.RangeStart + i;
                    IDataPoint dataPoint = Context.DataPoints[dataPointIds[index]];
                    bool       goRight   = (dataPoint.Categories[bestSplit.Axis] & bestSplit.SplitCategories) != 0;
                    if (goRight)
                    {
                        Assert.IsTrue(rightNode.RangeStart <= index && index <= rightNode.RangeStart + rightNode.RangeLength);
                    }
                    else
                    {
                        Assert.IsTrue(leftNode.RangeStart <= index && index <= leftNode.RangeStart + leftNode.RangeLength);
                    }
                }
            }
        }
        public void Initialize()
        {
            if (_initialized)
            {
                return;
            }
            else
            {
                _initialized = true;
            }
            Context.Initialize();

            _allSplits  = new CudaArray <GPUSplit>(GPUConstants.MaxSplits * Context.OpenNodeIds.Size);
            _bestSplits = new CudaArray <GPUSplit>(Context.OpenNodeIds.Size);

            _attributeSplitter.Initialize();
            _categoricalSplitter.Initialize();

            Context.CudaManager.Context.Synchronize();
        }
        public void TestSum()
        {
            int[] numbers = Enumerable.Range(999, 10000).ToArray();
            using (CudaManager cudaManager = Provider.CudaManagerPool.GetCudaManagerForThread(Provider.Logger))
                using (CudaArray <int> numbersBuffer = numbers)
                    using (CudaArray <int> outputBuffer = new[] { 0 }) {
                        KernelManager kernels = new KernelManager(cudaManager);

                        for (int nBlocks = 1; nBlocks <= 1024; nBlocks *= 2)
                        {
                            for (int threadsPerBlock = 1; threadsPerBlock <= 1024; threadsPerBlock *= 2)
                            {
                                kernels["setIntKernel"].Arguments(outputBuffer, 0).ExecuteTask();
                                Assert.AreEqual(0, outputBuffer.Read()[0]);
                                kernels["sumToOutputKernel"].Arguments(numbersBuffer, numbers.Length, outputBuffer, SharedBuffer.Ints(threadsPerBlock)).Execute(nBlocks * threadsPerBlock, threadsPerBlock);
                                Assert.AreEqual(numbers.Sum(), outputBuffer.Read()[0]);
                            }
                        }
                    }
        }
Exemple #19
0
        public static string Init(float[] temp_inputs, int[] flip_inputs, uint span, uint blockSize, int seed)
        {
            _span       = span;
            _block_size = blockSize;

            _area            = _span * _span;
            _blocks_per_span = span / blockSize;
            _blockCount      = _blocks_per_span * _blocks_per_span;

            d_flipData = new IntPtr();
            d_tempData = new IntPtr();

            d_flipRands  = new IntPtr();
            d_indexRands = new IntPtr();
            d_threshes   = new IntPtr();

            _cudaArray  = new CudaArray();
            _gridProcs  = new GridProcs();
            _randoProcs = new RandoProcs();

            var strRet = _cudaArray.ResetDevice();

            strRet = strRet + _randoProcs.MakeGenerator32(seed);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_tempData, _area);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_heatBlocks, _area / 1024);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_flipData, _area);

            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_indexRands, _blockCount);
            strRet = strRet + _cudaArray.MallocIntsOnDevice(ref d_flipRands, _blockCount);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_threshes, _allTempSteps);

            strRet = strRet + _cudaArray.CopyIntsToDevice(flip_inputs, d_flipData, _area);
            strRet = strRet + _cudaArray.CopyFloatsToDevice(temp_inputs, d_tempData, _area);


            var res9 = new int[_area];

            strRet = strRet + _cudaArray.CopyIntsFromDevice(res9, d_flipData, _area);

            return(strRet);
        }
Exemple #20
0
        public static string Init(float[] inputs, uint span)
        {
            _span = span;
            _area = _span * _span;

            d_gridA = new IntPtr();
            d_gridB = new IntPtr();

            _cudaArray  = new CudaArray();
            _gridProcs  = new GridProcs();
            _randoProcs = new RandoProcs();

            var strRet = _cudaArray.ResetDevice();

            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_gridA, _area);
            strRet = strRet + _cudaArray.MallocFloatsOnDevice(ref d_gridB, _area);
            strRet = strRet + _cudaArray.CopyFloatsToDevice(inputs, d_gridA, _area);
            strRet = strRet + _cudaArray.CopyFloatsToDevice(inputs, d_gridB, _area);

            return(strRet);
        }
Exemple #21
0
        static string TestMakeNormalRands()
        {
            string testName = "TestMakeNormalRands";
            var    rdo      = new RandoClr.RandoProcs();
            var    aa       = new CudaArray();
            uint   arrayLen = 1000;
            int    seed     = 1234;
            IntPtr devRando = new IntPtr();
            IntPtr devData  = new IntPtr();
            var    retlist  = new float[(int)arrayLen];

            try
            {
                var res = aa.ResetDevice();

                res = res + rdo.MakeGenerator64(ref devRando, seed);
                res = res + aa.MallocFloatsOnDevice(ref devData, arrayLen);
                res = res + rdo.MakeNormalRands(devData, devRando, arrayLen, 0.0f, 1.0f);
                res = res + aa.CopyFloatsFromDevice(retlist, devData, arrayLen);

                res = res + aa.ReleaseDevicePtr(devData);
                res = res + rdo.DestroyGenerator(devRando);

                if (res != String.Empty)
                {
                    return(testName + " fail: " + res);
                }
                return(testName + " pass");
            }
            catch
            {
                return(testName + " fail");
            }
            finally
            {
                //rdo.DestroyGenerator(devRando);
                aa.ReleaseDevicePtr(devData);
                aa.ResetDevice();
            }
        }
Exemple #22
0
        public int FindOptimalSplit(CudaArray <GPUSplit> splits, int writeSplitsStart)
        {
            Kernels["spaCopyDataPointsPerAxis"]
            .Arguments(_sortedDataPointsPerAxis, _sortKeysPerAxis)
            .Execute(GPUConstants.NumThreadsPerBlock * NumSmallBlocks, GPUConstants.NumThreadsPerBlock);

            Kernels["spaSortDataPointsPerAxis"]
            .Arguments(_sortedDataPointsPerAxis, _sortKeysPerAxis)
            .Execute(GPUConstants.NumThreadsPerBlock * NumSmallBlocks, GPUConstants.NumThreadsPerBlock);

            Kernels["spaAccumulateFrequenciesPerAxis"]
            .Arguments(_sortedDataPointsPerAxis, _cumulativeFrequenciesPerAxis)
            .Execute(GPUConstants.NumThreadsPerBlock * NumSmallBlocks, GPUConstants.NumThreadsPerBlock);
#if DEBUGCUDA
            SanityCheckCumulativeFrequencies();
#endif

            Kernels["spaBestSplitPerAxis"]
            .Arguments(_sortedDataPointsPerAxis, _cumulativeFrequenciesPerAxis, splits, writeSplitsStart)
            .Execute(GPUConstants.NumThreadsPerBlock * NumSmallBlocks, GPUConstants.NumThreadsPerBlock);

            int numSplitsAdded = Context.NumAttributeAxes;
            return(numSplitsAdded);
        }
Exemple #23
0
        public static string ClearOnDevice(out GpuMatrix gmOut, GpuMatrix gmIn)
        {
            if (gmIn.DevHostState == DevHostState.DeviceNotAllocated)
            {
                gmOut = null;
                return("Device data pointer already cleared");
            }

            var aa     = new CudaArray();
            var strRet = aa.ReleaseDevicePtr(gmIn.DevPtr);

            if (!String.IsNullOrEmpty(strRet))
            {
                gmOut = null;
                return(strRet);
            }

            gmOut = new GpuMatrix(
                matrix: gmIn.Matrix,
                devPtr: new IntPtr(),
                devHostState: DevHostState.DeviceNotAllocated);

            return(String.Empty);
        }
 public void Initialize()
 {
     _dataPointsPerAxis           = new CudaArray <GPUCategoricalDataPoint>(Context.DataPoints.Count * GPUConstants.MaxCategoricalAxes);
     _classFrequenciesPerCategory = new CudaArray <float>(GPUConstants.MaxClasses * GPUConstants.MaxCategoricalAxes);
     _sortKeys = new CudaArray <byte>(Context.DataPoints.Count);
 }
        internal CudaFractalEngine(Device device)
        {
            if (device == null)
            {
                throw new ArgumentException("Invalid device passed to CudaFractalEngine.", "device");
            }

            this.device = device;

            context = device.CreateContext();

            iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64);

            System.IO.MemoryStream ptxStream = new System.IO.MemoryStream(Kernels.KernelResources.kernels_ptx);

            module = context.LoadModule(ptxStream);

            initIteratorsKernel  = module.GetKernel("init_iterators_kernel");
            resetIteratorsKernel = module.GetKernel("reset_iterators_kernel");
            iterateKernel        = module.GetKernel("iterate_kernel");
            updateStatsKernel    = module.GetKernel("update_stats_kernel");
            resetOutputKernel    = module.GetKernel("reset_output_kernel");
            updateOutputKernel   = module.GetKernel("update_output_kernel");
            glOutputBufferID     = 0;

            mainStream = new Cuda.Stream();

            iterPosStateBuffer   = DeviceBuffer.Alloc(8, IteratorCount);
            iterColorStateBuffer = DeviceBuffer.Alloc(8, IteratorCount);
            iterStatBuffer       = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeIterStatEntry)), IteratorCount);
            globalStatBuffer     = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeGlobalStatEntry)), 1);

            entropyXBuffer    = DeviceBuffer.Alloc(16, IteratorCount);
            entropyCBuffer    = DeviceBuffer.Alloc(4, IteratorCount);
            entropySeedBuffer = DeviceBuffer.Alloc(4, IteratorCount);

            uint[] seeds = new uint[IteratorCount];
            for (int i = 0; i < IteratorCount; i++)
            {
                seeds[i] = (uint)rand.Next(65536);
            }
            CudaMem.Copy(seeds, entropySeedBuffer);

            paletteImage = CudaArray.Null;
            paletteTex   = module.GetTexRef("paletteTex");

            resetBeginEvt = new Event();
            resetEndEvt   = new Event();
            cycleIterEvt  = new Event();
            cycleStatEvt  = new Event();
            cycleEndEvt   = new Event();
            toneBeginEvt  = new Event();
            toneEndEvt    = new Event();

            initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            initIteratorsKernel.SetGridDim(IterBlockCount, 1);
            initIteratorsKernel.SetSharedSize(0);
            resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            resetIteratorsKernel.SetGridDim(IterBlockCount, 1);
            resetIteratorsKernel.SetSharedSize(0);
            iterateKernel.SetBlockShape(IterBlockSize, 1, 1);
            iterateKernel.SetGridDim(IterBlockCount, 1);
            iterateKernel.SetSharedSize(0);
            updateStatsKernel.SetBlockShape(1, 1, 1);
            updateStatsKernel.SetGridDim(1, 1);
            updateStatsKernel.SetSharedSize(0);

            initIteratorsKernel.Launch(entropyXBuffer.Ptr.RawPtr, entropyCBuffer.Ptr.RawPtr, entropySeedBuffer.Ptr.RawPtr);

            context.Synchronize();
        }
Exemple #26
0
        static string TestcublasSgemm1()
        {
            string testName = "TestcublasSgemm";
            uint   aw       = 5;
            uint   bh       = aw;
            uint   ah       = 5;
            uint   bw       = 5;
            uint   ch       = ah;
            uint   cw       = bw;

            var cuby = new CublasClr.Cublas();
            var aa   = new CudaArray();
            var res  = aa.ResetDevice();

            var       dataA = MatrixUtils.MakeIdentity(rows: ah, cols: aw);
            GpuMatrix gpuA;

            res = res + GpuMatrixOps.SetupGpuMatrix(
                out gpuA,
                new Matrix <float>(_rows: ah, _cols: aw,
                                   host_data: ImmutableArray.Create(dataA),
                                   matrixFormat: MatrixFormat.Column_Major));


            var       dataB = MatrixUtils.MakeIdentiPoke(rows: bh, cols: bw);
            GpuMatrix gpuB;

            res = res + GpuMatrixOps.SetupGpuMatrix(
                out gpuB,
                new Matrix <float>(_rows: bh, _cols: bw,
                                   host_data: ImmutableArray.Create(dataB),
                                   matrixFormat: MatrixFormat.Column_Major));

            var       dataC = MatrixUtils.MakeZeroes(rows: bh, cols: bw);
            GpuMatrix gpuC;

            res = res + GpuMatrixOps.SetupGpuMatrix(
                out gpuC,
                new Matrix <float>(_rows: ch, _cols: cw,
                                   host_data: ImmutableArray.Create(dataC),
                                   matrixFormat: MatrixFormat.Column_Major));

            IntPtr cublasHandle = new IntPtr();

            res = res + cuby.MakeCublasHandle(ref cublasHandle);

            GpuMatrix gpuProd;

            res = res + GpuMatrixOps.Multiply(
                gmOut: out gpuProd,
                cublasHandle: cublasHandle,
                gmA: gpuA, gmB: gpuB, gmC: gpuC);

            GpuMatrix gpuSynched;

            res = res + GpuMatrixOps.CopyToHost(out gpuSynched, gpuProd);

            var cpuRes = new float[ah * bw];

            MatrixUtils.RowMajorMatrixMult(C: cpuRes, A: dataA, B: dataB, wA: aw, hA: ah, wB: bw);

            var cpuRes2 = new float[bh * aw];

            MatrixUtils.RowMajorMatrixMult(C: cpuRes2, A: dataB, B: dataA, wA: bw, hA: bh, wB: aw);

            return(res);
        }
Exemple #27
0
 public void Initialize()
 {
     _sortedDataPointsPerAxis      = new CudaArray <GPUAttributeDataPoint>(Context.DataPoints.Count * Context.NumAttributeAxes);
     _sortKeysPerAxis              = new CudaArray <float>(Context.DataPoints.Count * Context.NumAttributeAxes);
     _cumulativeFrequenciesPerAxis = new CudaArray <float>(Context.DataPoints.Count * Context.NumAttributeAxes * GPUConstants.MaxClasses);
 }
Exemple #28
0
        public CudaFractalEngine()
        {
            device  = Device.Devices[0];
            context = device.CreateContext();

            iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64);


            //System.Reflection.Assembly loadedAssembly = typeof(CudaFractalEngine).Assembly;
            //System.IO.Stream stream = loadedAssembly.GetManifestResourceStream(typeof(CudaFractalEngine), "kernels.ptx");
            System.IO.MemoryStream stream = new System.IO.MemoryStream(CudaResources.kernels_ptx);

            module = context.LoadModule(stream);
            initIteratorsKernel  = module.GetKernel("init_iterators_kernel");
            resetIteratorsKernel = module.GetKernel("reset_iterators_kernel");
            iterateKernel        = module.GetKernel("iterate_kernel");
            updateStatsKernel    = module.GetKernel("update_stats_kernel");
            resetOutputKernel    = module.GetKernel("reset_output_kernel");
            updateOutputKernel   = module.GetKernel("update_output_kernel");
            glOutputBufferID     = 0;

            mainStream = new Cuda.Stream();

            iterPosStateBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount);
            module.WriteConstant("iterPosStateBuffer", iterPosStateBuffer);
            iterColorStateBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount);
            module.WriteConstant("iterColorStateBuffer", iterColorStateBuffer);

            entropyXBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount);
            module.WriteConstant("entropyXBuffer", entropyXBuffer);
            entropyCBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount);
            module.WriteConstant("entropyCBuffer", entropyCBuffer);
            entropySeedBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount);
            module.WriteConstant("entropySeedBuffer", entropySeedBuffer);

            HostBuffer2D <uint> hostEntropySeedBuffer = HostBuffer2D <uint> .Alloc(IterBlockSize, IterBlockCount);

            uint rnd;

            for (int y = 0; y < IterBlockCount; y++)
            {
                for (int x = 0; x < IterBlockSize; x++)
                {
                    rnd = (uint)rand.Next(65536);
                    hostEntropySeedBuffer[y, x] = rnd;
                }
            }

            CudaMem.Copy(hostEntropySeedBuffer, entropySeedBuffer);
            hostEntropySeedBuffer.Free();


            dotCountBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount);
            module.WriteConstant("dotCountBuffer", dotCountBuffer);

            peakDensityBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount);
            module.WriteConstant("peakDensityBuffer", peakDensityBuffer);

            totalIterCountMem = DevicePtr.AllocRaw(8);
            module.WriteConstant("totalIterCountMem", totalIterCountMem);
            totalDotCountMem = DevicePtr.AllocRaw(8);
            module.WriteConstant("totalDotCountMem", totalDotCountMem);
            densityMem = DevicePtr.AllocRaw(4);
            module.WriteConstant("densityMem", densityMem);
            peakDensityMem = DevicePtr.AllocRaw(4);
            module.WriteConstant("peakDensityMem", peakDensityMem);
            scaleConstantMem = DevicePtr.AllocRaw(4);
            module.WriteConstant("scaleConstantMem", scaleConstantMem);

            paletteImage = CudaArray.Null;

            paletteTex = module.GetTexRef("paletteTex");

            resetBeginEvt = new Event();
            resetEndEvt   = new Event();
            cycleIterEvt  = new Event();
            cycleStatEvt  = new Event();
            cycleEndEvt   = new Event();
            toneBeginEvt  = new Event();
            toneEndEvt    = new Event();

            initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            initIteratorsKernel.SetGridDim(IterBlockCount, 1);
            initIteratorsKernel.SetSharedSize(0);
            resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            resetIteratorsKernel.SetGridDim(IterBlockCount, 1);
            resetIteratorsKernel.SetSharedSize(0);
            iterateKernel.SetBlockShape(IterBlockSize, 1, 1);
            iterateKernel.SetGridDim(IterBlockCount, 1);
            iterateKernel.SetSharedSize(0);
            updateStatsKernel.SetBlockShape(1, 1, 1);
            updateStatsKernel.SetGridDim(1, 1);
            updateStatsKernel.SetSharedSize(0);

            initIteratorsKernel.Launch();
            context.Synchronize();
        }