Exemplo n.º 1
0
 /// <summary>
 /// Forward DCT, quantization and level shift part of the JPEG encoding.
 /// Input is expected in 8x8 macro blocks and output is expected to be in 64x1
 /// macro blocks.
 /// </summary>
 /// <param name="src">Source image.</param>
 /// <param name="dst">Destination image</param>
 /// <param name="QuantFwdTable">Forward quantization tables for JPEG encoding created using QuantInvTableInit()</param>
 /// <param name="oSizeRoi">Roi size (in macro blocks?).</param>
 public static void DCTQuantFwd8x8LS(NPPImage_8uC1 src, NPPImage_16sC1 dst, CudaDeviceVariable<ushort> QuantFwdTable, NppiSize oSizeRoi)
 {
     NppStatus status;
     status = NPPNativeMethods.NPPi.ImageCompression.nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R(src.DevicePointer, src.Pitch, dst.DevicePointer, dst.Pitch, QuantFwdTable.DevicePointer, oSizeRoi);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R", status));
     NPPException.CheckNppStatus(status, null);
 }
        public void Backward(CudnnPoolingDescriptor pooling, CudnnTensorDescriptor srcTensor, float[] srcData, CudnnTensorDescriptor srcDiffTensor, float[] srcDiffData,
                                                             CudnnTensorDescriptor destTensor, float[] destData, CudnnTensorDescriptor destDiffTensor, float[] destDiffData)
        {
            Contract.Requires(pooling != null);
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);
            Contract.Requires(srcDiffTensor != null);
            Contract.Requires(srcDiffData != null);
            Contract.Requires(destDiffTensor != null);
            Contract.Requires(destDiffData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Float, srcTensor, srcDiffTensor, destTensor, destDiffTensor);

            using (var srcDataGpu = new CudaDeviceVariable<float>(srcData.Length))
            using (var srcDiffDataGpu = new CudaDeviceVariable<float>(srcDiffData.Length))
            using (var destDataGpu = new CudaDeviceVariable<float>(destData.Length))
            using (var destDiffDataGpu = new CudaDeviceVariable<float>(destDiffData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);
                srcDiffDataGpu.CopyToDevice(srcDiffData);
                destDataGpu.CopyToDevice(destData);

                Invoke(() => CudnnNativeMethods.cudnnPoolingBackward(handle, pooling.Handle,
                                                                     srcTensor.Handle, srcDataGpu.DevicePointer, srcDiffTensor.Handle, srcDiffDataGpu.DevicePointer,
                                                                     destTensor.Handle, destDataGpu.DevicePointer, destDiffTensor.Handle, destDiffDataGpu.DevicePointer));
                destDiffDataGpu.CopyToHost(destDiffData);
            }
        }
Exemplo n.º 3
0
        public CudaIntersectionDevice(RayEngineScene scene, NVContext ctx)
            : base(scene)
        {
            wallclock = new Stopwatch();
            this.todoRayBuffers = new ConcurrentQueue<Tuple<int, RayBuffer>>();
            this.doneRayBuffers = new List<ConcurrentQueue<RayBuffer>>() { { new ConcurrentQueue<RayBuffer>() } };
            this.started = false;
            if (ctx != null)
            {
                this.cudaContext = ctx;
            }
            else
            {
                this.cudaContext = new NVContext() { Context = new CudaContext(CudaContext.GetMaxGflopsDeviceId()) };
            }
            using (var sr = new StreamReader(@"G:\Git\RayDen\CudaMegaRay\x64\Release\kernel.cu.ptx"))
            {
                intersectKernel = cudaContext.Context.LoadKernelPTX(sr.BaseStream, "IntersectLBvh");
            }

            this.rays = new CudaDeviceVariable<RayData>(RayBuffer.RayBufferSize);
            this.hits = new CudaDeviceVariable<RayHit>(RayBuffer.RayBufferSize);
            verts = scene.Vertices.ToArray();
            //scene.Triangles.Select(i => i.GetInfo()).ToArray();

            var ti = scene.Triangles.Select(i => i.GetInfo()).ToArray();
            var da = new BvhDataAdapter(scene);
            var treeData = da.GetMpData();
            bvh = treeData;
            trianglesCount = ti.Length;
            tris = ti; 

            nodesCount = treeData.Length;
            Tracer.TraceLine("BVH Data Size {0:F3} MBytes", (treeData.Length * 32f) / (1024f * 1024f));
        }
Exemplo n.º 4
0
        protected override void Init()
        {
            var kernelFileName = KernelFile;
            var initKernel = Ctx.LoadKernel(kernelFileName, "generateData");
            Xopt = new CudaDeviceVariable<double>(DimensionsCount);

            var d_fopt = new CudaDeviceVariable<double>(1);

            long rseed = FunctionNumber + 10000 * InstanceNumber;

            double[] host_minus_ones = new double[DimensionsCount];

            for (int i = 0; i < DimensionsCount; i++)
            {
                host_minus_ones[i] = -1;
            }

            MinusOnes = host_minus_ones;

            Factor = Math.Max(1.0, Math.Sqrt(DimensionsCount)/8.0);

            initKernel.Run(
                DimensionsCount,
                rseed,
                FunctionNumber,
                InstanceNumber,
                Xopt.DevicePointer,
                d_fopt.DevicePointer);

            double[] fopt_arr = d_fopt;

            Fopt = fopt_arr[0];
        }
        protected override void Init()
        {
            var kernelFileName = KernelFile;
            var initKernel = Ctx.LoadKernel(kernelFileName, "generateData");
            B = new CudaDeviceVariable<double>(DimensionsCount);
            M = new CudaDeviceVariable<double>(DimensionsCount * DimensionsCount);
            Xopt = new CudaDeviceVariable<double>(DimensionsCount);

            var d_fopt = new CudaDeviceVariable<double>(1);

            long rseed = FunctionNumber + 10000 * InstanceNumber;

            initKernel.Run(
                DimensionsCount,
                rseed,
                FunctionNumber,
                InstanceNumber,
                M.DevicePointer,
                B.DevicePointer,
                Xopt.DevicePointer,
                d_fopt.DevicePointer);

            double[] fopt_arr = d_fopt;

            Fopt = fopt_arr[0];
        }
Exemplo n.º 6
0
        ////////////////////////////////////////////////////////////////////////////////
        // Occupancy-based launch configurator
        //
        // The launch configurator, cudaOccupancyMaxPotentialBlockSize and
        // cudaOccupancyMaxPotentialBlockSizeVariableSMem, suggests a block
        // size that achieves the best theoretical occupancy. It also returns
        // the minimum number of blocks needed to achieve the occupancy on the
        // whole device.
        //
        // This launch configurator is purely occupancy-based. It doesn't
        // translate directly to performance, but the suggestion should
        // nevertheless be a good starting point for further optimizations.
        //
        // This function configures the launch based on the "automatic"
        // argument, records the runtime, and reports occupancy and runtime.
        ////////////////////////////////////////////////////////////////////////////////
        static int launchConfig(CudaDeviceVariable<int> array, int arrayCount, bool automatic)
        {
            int blockSize = 0;
            int minGridSize = 0;
            int gridSize;
            SizeT dynamicSMemUsage = 0;

            float elapsedTime;

            double potentialOccupancy;

            CudaOccupancy.cudaOccDeviceState state = new CudaOccupancy.cudaOccDeviceState();
            state.cacheConfig = CudaOccupancy.cudaOccCacheConfig.PreferNone;

            if (automatic)
            {
                CudaOccupancy.cudaOccMaxPotentialOccupancyBlockSize(ref minGridSize, ref blockSize, new CudaOccupancy.cudaOccDeviceProp(0), new CudaOccupancy.cudaOccFuncAttributes(kernel), state, dynamicSMemUsage);

                Console.WriteLine("Suggested block size: {0}", blockSize);
                Console.WriteLine("Minimum grid size for maximum occupancy: {0}", minGridSize);
            }
            else
            {
                // This block size is too small. Given limited number of
                // active blocks per multiprocessor, the number of active
                // threads will be limited, and thus unable to achieve maximum
                // occupancy.
                //
                blockSize = manualBlockSize;
            }

            // Round up
            //
            gridSize = (arrayCount + blockSize - 1) / blockSize;

            // Launch and profile
            //
            kernel.GridDimensions = gridSize;
            kernel.BlockDimensions = blockSize;
            elapsedTime = kernel.Run(array.DevicePointer, arrayCount);

            // Calculate occupancy
            //
            potentialOccupancy = reportPotentialOccupancy(blockSize, dynamicSMemUsage);

            Console.WriteLine("Potential occupancy: {0}%", potentialOccupancy * 100);

            // Report elapsed time
            //
            Console.WriteLine("Elapsed time: {0}ms", elapsedTime * 100);

            return 0;
        }
        public void BackwardBias(CudnnTensorDescriptor srcTensor, CudaDeviceVariable<double> srcData, CudnnTensorDescriptor destTensor, CudaDeviceVariable<double> destData, CudnnAccumulateResult accumulate)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, destTensor);

            Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardBias(handle, srcTensor.Handle, srcData.DevicePointer, destTensor.Handle, destData.DevicePointer, accumulate));
        }
Exemplo n.º 8
0
        //constructor with node parameter
        public MyMatrixObserver()
        {
            NbDecimals = 2;
            m_drawMatrixKernel = MyKernelFactory.Instance.Kernel(@"Observers\DrawMatrixKernel", true);
            m_setKernel = MyKernelFactory.Instance.Kernel(@"Common\SetKernel", true);
            m_drawMatrixKernel.SetConstantVariable("D_CHARACTER_WIDTH", MyDrawStringHelper.CharacterWidth);
            m_drawMatrixKernel.SetConstantVariable("D_CHARACTER_HEIGHT", MyDrawStringHelper.CharacterHeight);
            m_drawMatrixKernel.SetConstantVariable("D_CHARACTER_SIZE", MyDrawStringHelper.CharacterWidth * MyDrawStringHelper.CharacterHeight);
            m_characters = MyMemoryManager.Instance.GetGlobalVariable<float>("CHARACTERS_TEXTURE", MyKernelFactory.Instance.DevCount - 1, MyDrawStringHelper.LoadDigits);

            TargetChanged += MyMatrixObserver_TargetChanged;
        }
        public void BackwardData(CudnnFilterDescriptor filter, CudaDeviceVariable<float> filterData, CudnnTensorDescriptor diffTensor, CudaDeviceVariable<float> diffData, CudnnConvolutionDescriptor convolution, CudnnTensorDescriptor gradient, CudaDeviceVariable<float> gradientData, CudnnAccumulateResult accumulate)
        {
            Contract.Requires(filter != null);
            Contract.Requires(filterData != null);
            Contract.Requires(diffTensor != null);
            Contract.Requires(diffData != null);
            Contract.Requires(convolution != null);
            Contract.Requires(gradient != null);
            Contract.Requires(gradientData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Float, filter, diffTensor, gradient);

            Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardData(handle, filter.Handle, filterData.DevicePointer, diffTensor.Handle, diffData.DevicePointer, convolution.Handle, gradient.Handle, gradientData.DevicePointer, accumulate));
        }
Exemplo n.º 10
0
		/// <summary>
		/// 
		/// </summary>
		/// <param name="size">Graph size</param>
		public GraphCut4(NppiSize size)
		{
			_size = size;
			int bufferSize = 0;
			status = NPPNativeMethods.NPPi.ImageLabeling.nppiGraphcutGetSize(_size, ref bufferSize);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiGraphcutGetSize", status));
			NPPException.CheckNppStatus(status, this);

			_buffer = new CudaDeviceVariable<byte>(bufferSize);

			_state = new NppiGraphcutState();
			status = NPPNativeMethods.NPPi.ImageLabeling.nppiGraphcutInitAlloc(_size, ref _state, _buffer.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiGraphcutInitAlloc", status));
			NPPException.CheckNppStatus(status, this);
		}
Exemplo n.º 11
0
        public void Run(MatOperation operation, CudaDeviceVariable<float> A, int ACount, int AColumnHint, CudaDeviceVariable<float> B, int BCount, int BColumnHint, CudaDeviceVariable<float> Result, int ResultCount, int ResultColumnHint, float beta = 1.0f)
        {
            Result.Memset(BitConverter.ToUInt32(BitConverter.GetBytes(0.0f), 0));

            switch (operation)
            {
                case MatOperation.Multiplication:  // vectors/matrices have to be always in the correct dimesions!
                    if (BCount > 1 && ACount > 1 && BColumnHint == 1 && ACount / AColumnHint > 1 && BCount / BColumnHint == AColumnHint) //. A*vecB
                    {
                        MyCublasFactory.Instance.Gemv(Operation.Transpose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                            AColumnHint, ACount / AColumnHint, 1.0f,
                            A, AColumnHint,
                            B, 1,
                            beta, Result, 1);
                    }
                    else if (ACount > 1 && BCount > 1 && ACount / AColumnHint == 1 && BColumnHint > 1 && BCount / BColumnHint == AColumnHint)  // vecA*B
                    {
                        MyCublasFactory.Instance.Gemv(Operation.NonTranspose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                            BColumnHint, BCount / BColumnHint, 1.0f,
                            B, BColumnHint,
                            A, 1,
                            beta, Result, 1);
                    }
                    else if (ACount / AColumnHint == 1 && BColumnHint == 1 && ACount > 1 && BCount > 1) //. trans(vecA) * vecB
                    {
                        Run(MatOperation.DotProd, A, ACount, AColumnHint, B, BCount, BColumnHint, Result, ResultCount, ResultColumnHint, beta);
                    }
                    else if (ACount != 1 || BCount != 1)// A*B   matrix multiplication
                    {
                        MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.NonTranspose,
                            ACount / AColumnHint, BColumnHint, AColumnHint, 1.0f,
                            A, ACount / AColumnHint,
                            B, BCount / BColumnHint,
                            beta, Result, ResultColumnHint);
                    }
                    break;
                case MatOperation.DotProd:
                    MyCublasFactory.Instance.Gemv(Operation.Transpose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                       ACount, 1, 1.0f,
                       A, ACount,
                       B, 1,
                       beta, Result, 1);
                    break;
                default:
                    MyLog.Writer.WriteLine(MyLogLevel.ERROR, "Trying to run cublas for undefined MatOperation");
                    break;
            }
        }
Exemplo n.º 12
0
        public CudaArray3D GenerateUniformArray(int width, int height, int depth)
        {
            int count = width * height * depth;

            CudaDeviceVariable<float> randomVariable = new CudaDeviceVariable<float>(count);
            CudaArray3D randomArray = new CudaArray3D(CUArrayFormat.Float, width, height, depth, CudaArray3DNumChannels.One, CUDAArray3DFlags.None);

            randomDevice.SetPseudoRandomGeneratorSeed((ulong)DateTime.Now.Ticks);
            randomDevice.GenerateUniform32(randomVariable.DevicePointer, count);

            randomArray.CopyFromDeviceToThis(randomVariable.DevicePointer, sizeof(float));

            randomVariable.Dispose();

            return randomArray;
        }
Exemplo n.º 13
0
        /* Function to perform backward activation  */
        public void ActivationBackward(cudnnActivationMode mode,
										float alpha,
										TensorDescriptor srcDesc,
										CudaDeviceVariable<float> srcData,
										TensorDescriptor srcDiffDesc,
										CudaDeviceVariable<float> srcDiffData,
										TensorDescriptor destDesc,
										CudaDeviceVariable<float> destData,
										float beta,
										TensorDescriptor destDiffDesc,
										CudaDeviceVariable<float> destDiffData
										)
        {
            res = CudaDNNNativeMethods.cudnnActivationBackward(_handle, mode, ref alpha, srcDesc.Desc, srcData.DevicePointer, srcDiffDesc.Desc, srcDiffData.DevicePointer, destDesc.Desc, destData.DevicePointer, ref beta, destDiffDesc.Desc, destDiffData.DevicePointer);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnActivationForward", res));
            if (res != cudnnStatus.Success) throw new CudaDNNException(res);
        }
        protected override void Init()
        {
            var kernelFileName = KernelFile;
            var initKernel = Ctx.LoadKernel(kernelFileName, "generateData");
            Rotation2 = new CudaDeviceVariable<double>(DimensionsCount * DimensionsCount);
            Rotation1 = new CudaDeviceVariable<double>(DimensionsCount * DimensionsCount);
            Xopt = new CudaDeviceVariable<double>(DimensionsCount);

            long rseed = FunctionNumber + 10000 * InstanceNumber;

            initKernel.Run(
                DimensionsCount,
                rseed,
                Rotation1.DevicePointer,
                Rotation2.DevicePointer,
                Xopt.DevicePointer);
        }
Exemplo n.º 15
0
        public static void DrawStringFromGPUMem(CudaDeviceVariable<float> inString, int x, int y, uint bgColor, uint fgColor, CUdeviceptr image, int imageWidth, int imageHeight, int stringOffset, int stringLen)
        {
            MyCudaKernel m_drawDigitKernel = MyKernelFactory.Instance.Kernel(MyKernelFactory.Instance.DevCount - 1, @"Observers\DrawStringKernel");
            CudaDeviceVariable<float> characters = MyMemoryManager.Instance.GetGlobalVariable<float>("CHARACTERS_TEXTURE", MyKernelFactory.Instance.DevCount - 1, LoadDigits);

            //MyKernelFactory.Instance.Synchronize();

            m_drawDigitKernel.SetConstantVariable("D_BG_COLOR", bgColor);
            m_drawDigitKernel.SetConstantVariable("D_FG_COLOR", fgColor);
            m_drawDigitKernel.SetConstantVariable("D_IMAGE_WIDTH", imageWidth);
            m_drawDigitKernel.SetConstantVariable("D_IMAGE_HEIGHT", imageHeight);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_WIDTH", CharacterWidth);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_SIZE", CharacterSize);
            m_drawDigitKernel.SetConstantVariable("D_DIGITMAP_NBCHARS", CharacterMapNbChars);

            m_drawDigitKernel.SetupExecution(CharacterSize * stringLen);
            m_drawDigitKernel.Run(image, characters.DevicePointer, x, y, inString.DevicePointer + sizeof(float) * stringOffset, stringLen);
        }
Exemplo n.º 16
0
        public void Backward(CudnnSoftmaxAlgorithm algorithm, CudnnSoftmaxMode mode,
                             CudnnTensorDescriptor srcTensor, CudaDeviceVariable<double> srcData, CudnnTensorDescriptor srcDiffTensor, CudaDeviceVariable<double> srcDiffData,
                             CudnnTensorDescriptor destDiffTensor, CudaDeviceVariable<double> destDiffData)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(srcDiffTensor != null);
            Contract.Requires(srcDiffData != null);
            Contract.Requires(destDiffTensor != null);
            Contract.Requires(destDiffData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, srcDiffTensor, destDiffTensor);

            Invoke(() => CudnnNativeMethods.cudnnSoftmaxBackward(handle, algorithm, mode,
                                                     srcTensor.Handle, srcData.DevicePointer, srcDiffTensor.Handle, srcDiffData.DevicePointer,
                                                     destDiffTensor.Handle, destDiffData.DevicePointer));
        }
Exemplo n.º 17
0
 static void Test(byte[] ptxFile)
 {
     const int size = 16;
     var context = new CudaContext();
     var kernel = context.LoadKernelPTX(ptxFile, "kernel");
     var memory = context.AllocateMemory(4 * size);
     var gpuMemory = new CudaDeviceVariable<int>(memory);
     var cpuMemory = new int[size];
     for (var i = 0; i < size; i++)
         cpuMemory[i] = i - 2;
     gpuMemory.CopyToDevice(cpuMemory);
     kernel.BlockDimensions = 4;
     kernel.GridDimensions = 4;
     kernel.Run(memory);
     gpuMemory.CopyToHost(cpuMemory);
     for (var i = 0; i < size; i++)
         Console.WriteLine("{0} = {1}", i, cpuMemory[i]);
 }
        public void Backward(CudnnActivationMode mode, CudnnTensorDescriptor srcTensor, CudaDeviceVariable<float> srcData, CudnnTensorDescriptor srcDiffTensor, CudaDeviceVariable<float> srcDiffData,
                                                       CudnnTensorDescriptor destTensor, CudaDeviceVariable<float> destData, CudnnTensorDescriptor destDiffTensor, CudaDeviceVariable<float> destDiffData)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);
            Contract.Requires(srcDiffTensor != null);
            Contract.Requires(srcDiffData != null);
            Contract.Requires(destDiffTensor != null);
            Contract.Requires(destDiffData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Float, srcTensor, srcDiffTensor, destTensor, destDiffTensor);

            Invoke(() => CudnnNativeMethods.cudnnActivationBackward(handle, mode,
                                srcTensor.Handle, srcData.DevicePointer, srcDiffTensor.Handle, srcDiffData.DevicePointer,
                                destTensor.Handle, destData.DevicePointer, destDiffTensor.Handle, destDiffData.DevicePointer));
        }
        public void BackwardBias(CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor destTensor, double[] destData, CudnnAccumulateResult accumulate)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, destTensor);

            using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
            using (var destDataGpu = new CudaDeviceVariable<double>(destData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);

                Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardBias(handle, srcTensor.Handle, srcDataGpu.DevicePointer, destTensor.Handle, destDataGpu.DevicePointer, accumulate));
                destDataGpu.CopyToHost(destData);
            }
        }
Exemplo n.º 20
0
        public DadeCudaIntersectionDevice(RayEngineScene scene, NVContext ctx)
            : base(scene)
        {
            this.scene = scene;
            wallclock = new Stopwatch();
            this.todoRayBuffers = new InputRayBufferCollection();
            this.doneRayBuffers = new OutputRayBufferCollection();
            this.started = false;
            if (ctx != null)
            {
                this.cudaContext = ctx;
            }
            else
            {
                this.cudaContext = new NVContext() { Context = new CudaContext(CudaContext.GetMaxGflopsDeviceId()) };
            }
            using (var sr = new StreamReader(@"G:\Git\RayDen\CudaMegaRay\x64\Release\Intersection.cu.ptx"))
            {
                intersectKernel = cudaContext.Context.LoadKernelPTX(sr.BaseStream, "Intersect");
            }

            this.rays = new CudaDeviceVariable<RayData>(RayBuffer.RayBufferSize);
            this.hits = new CudaDeviceVariable<RayHit>(RayBuffer.RayBufferSize);
            verts = scene.Vertices.ToArray();
            tris=scene.Triangles.Select(i => i.GetInfo()).ToArray();

            if (GlobalConfiguration.Instance.UseSceneCaching && scene.Cache != null)
            {
                bvh = scene.Cache.BvhData;
                nodesCount = scene.Cache.BvhData.Length;
            }
            else
            {
                var da = new BvhDataAdapter(scene);
                var treeData = da.BuildData();
                bvh = treeData;
                nodesCount = treeData.Length;

            }

            Tracer.TraceLine("BVH Data Size {0:F3} MBytes", (nodesCount * 32f) / (1024f * 1024f));
        }
Exemplo n.º 21
0
        public void Backward(CudnnPoolingDescriptor pooling, CudnnTensorDescriptor srcTensor, CudaDeviceVariable<double> srcData, CudnnTensorDescriptor srcDiffTensor, CudaDeviceVariable<double> srcDiffData,
                                                             CudnnTensorDescriptor destTensor, CudaDeviceVariable<double> destData, CudnnTensorDescriptor destDiffTensor, CudaDeviceVariable<double> destDiffData)
        {
            Contract.Requires(pooling != null);
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);
            Contract.Requires(srcDiffTensor != null);
            Contract.Requires(srcDiffData != null);
            Contract.Requires(destDiffTensor != null);
            Contract.Requires(destDiffData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, srcDiffTensor, destTensor, destDiffTensor);

            Invoke(() => CudnnNativeMethods.cudnnPoolingBackward(handle, pooling.Handle,
                                                     srcTensor.Handle, srcData.DevicePointer, srcDiffTensor.Handle, srcDiffData.DevicePointer,
                                                     destTensor.Handle, destData.DevicePointer, destDiffTensor.Handle, destDiffData.DevicePointer));
        }
Exemplo n.º 22
0
        protected override void Init()
        {
            var kernelFileName = KernelFile;
            var initKernel = Ctx.LoadKernel(kernelFileName, "generateData");
            initKernel.BlockDimensions = 1;
            initKernel.GridDimensions = 1;

            Rotation = new CudaDeviceVariable<double>(DimensionsCount * DimensionsCount);
            PeakValues = new CudaDeviceVariable<double>(PeaksCount);
            XLocal = new CudaDeviceVariable<double>(DimensionsCount * PeaksCount);
            ArrScales = new CudaDeviceVariable<double>(DimensionsCount * PeaksCount);

            int rseed = FunctionNumber + 10000 * InstanceNumber;
            initKernel.Run(
                DimensionsCount,
                rseed,
                Rotation.DevicePointer,
                PeaksCount,
                PeakValues.DevicePointer,
                XLocal.DevicePointer,
                ArrScales.DevicePointer);
        }
Exemplo n.º 23
0
        protected override void Init()
        {
            var kernelFileName = KernelFile;
            var initKernel = Ctx.LoadKernel(kernelFileName, "generateData");
            Xopt = new CudaDeviceVariable<double>(DimensionsCount);
            M = new CudaDeviceVariable<double>(DimensionsCount * DimensionsCount);
            B = new CudaDeviceVariable<double>(DimensionsCount);

            var d_fopt = new CudaDeviceVariable<double>(1);

            long rseed = FunctionNumber + 10000 * InstanceNumber;

            double[] h_ak = new double[12];
            double[] h_bk = new double[12];

            for (int i = 0; i < 12; i++)
            {
                h_ak[i] = Math.Pow(0.5, i);
                h_bk[i] = Math.Pow(3.0, i);
                F0 += h_ak[i]*Math.Cos(Math.PI*h_bk[i]);
            }

            Ak = h_ak;
            Bk = h_bk;

            initKernel.Run(
                DimensionsCount,
                rseed,
                FunctionNumber,
                InstanceNumber,
                M.DevicePointer,
                B.DevicePointer,
                Xopt.DevicePointer,
                d_fopt.DevicePointer);

            double[] fopt_arr = d_fopt;

            Fopt = fopt_arr[0];
        }
Exemplo n.º 24
0
        public void EdgeCues(float alpha, CudaPitchedDeviceVariable <uchar4> image, CudaPitchedDeviceVariable <int> left_transposed, CudaPitchedDeviceVariable <int> right_transposed,
                             CudaPitchedDeviceVariable <int> top, CudaPitchedDeviceVariable <int> bottom, CudaPitchedDeviceVariable <int> topleft, CudaPitchedDeviceVariable <int> topright,
                             CudaPitchedDeviceVariable <int> bottomleft, CudaPitchedDeviceVariable <int> bottomright, int width, int height, CudaDeviceVariable <byte> scratch_mem)
        {
            if (texref == null)
            {
                texref = new CudaTextureLinearPitched2D <uchar4>(MeanEdgeStrengthReductionKernel, "imageTex", CUAddressMode.Clamp, CUFilterMode.Point, CUTexRefSetFlags.ReadAsInteger, CUArrayFormat.UnsignedInt8, image);
            }
            else
            {
                texref.Reset(image);
            }

            if (texref2 == null)
            {
                texref2 = new CudaTextureLinearPitched2D <uchar4>(EdgeCuesKernel, "imageTex", CUAddressMode.Clamp, CUFilterMode.Point, CUTexRefSetFlags.ReadAsInteger, CUArrayFormat.UnsignedInt8, image);
            }
            else
            {
                texref2.Reset(image);
            }


            dim3 grid        = new dim3((width + 31) / 32, (height + 31) / 32, 1);
            dim3 block       = new dim3(32, 4, 1);
            dim3 large_block = new dim3(32, 8, 1);

            MeanEdgeStrengthReductionKernel.BlockDimensions = large_block;
            MeanEdgeStrengthReductionKernel.GridDimensions  = grid;
            MeanEdgeStrengthFinalKernel.BlockDimensions     = block;
            MeanEdgeStrengthFinalKernel.GridDimensions      = new dim3(1, 1, 1);
            EdgeCuesKernel.BlockDimensions = block;
            EdgeCuesKernel.GridDimensions  = grid;

            MeanEdgeStrengthReductionKernel.Run(width, height, scratch_mem.DevicePointer);

            //MeanEdgeStrengthReductionKernel<<<grid, large_block>>>( width, height, scratch_mem);
            MeanEdgeStrengthFinalKernel.Run(scratch_mem.DevicePointer, grid.x * grid.y);
            //MeanEdgeStrengthFinalKernel<<<1,block>>>( scratch_mem, grid.x * grid.y);

            EdgeCuesKernel.Run(alpha, scratch_mem.DevicePointer, left_transposed.DevicePointer, right_transposed.DevicePointer, top.DevicePointer, bottom.DevicePointer, topleft.DevicePointer, topright.DevicePointer,
                               bottomleft.DevicePointer, bottomright.DevicePointer, (int)top.Pitch / 4, (int)right_transposed.Pitch / 4, width, height);
            //EdgeCuesKernel<<<grid, block>>>( alpha , scratch_mem, left_transposed, right_transposed, top, bottom, topleft, topright, bottomleft, bottomright, pitch / 4, transposed_pitch/ 4, width, height );
        }
Exemplo n.º 25
0
        /// <summary>
        /// image warp perspective batch.
        /// </summary>
        /// <param name="oSmallestSrcSize">Size in pixels of the entire smallest source image width and height, may be from different images.</param>
        /// <param name="oSrcRectROI">Region of interest in the source images (may overlap source image size width and height).</param>
        /// <param name="oDstRectROI">Region of interest in the destination images (may overlap destination image size width and height).</param>
        /// <param name="eInterpolation">The type of eInterpolation to perform resampling. Currently limited to NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, or NPPI_INTER_SUPER. </param>
        /// <param name="pBatchList">Device memory pointer to nBatchSize list of NppiWarpAffineBatchCXR structures.</param>
        public static void WarpPerspectiveBatch(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiRect oDstRectROI, InterpolationMode eInterpolation, CudaDeviceVariable <NppiWarpAffineBatchCXR> pBatchList)
        {
            NppStatus status = NPPNativeMethods.NPPi.GeometricTransforms.nppiWarpPerspectiveBatch_16f_C4R(oSmallestSrcSize, oSrcRectROI, oDstRectROI, eInterpolation, pBatchList.DevicePointer, pBatchList.Size);

            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiWarpPerspectiveBatch_16f_C4R", status));
            NPPException.CheckNppStatus(status, null);
        }
Exemplo n.º 26
0
            public override void Execute()
            {
                // load bitmaps and pas them to the game objects
                int offset = 0;
                CudaDeviceVariable <float> devBitmaps = Owner.Bitmaps.GetDevice(Owner);

                Bitmap bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "taleObstacle.png"];

                Owner.m_tale_obstacle_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };
                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "taleEmpty.png"];
                Owner.m_tale_empty_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };
                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap          = Owner.m_bitmapTable[Owner.TEXTURE_SET + "agent.png"];
                Owner.m_agent_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };
                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "doorOpened.png"];
                MyGraphicsPrototype doorOpened_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "doorClosed.png"];
                MyGraphicsPrototype doorClosed_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "doorControl.png"];
                MyGraphicsPrototype doorControl_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "doorControlOff.png"];
                MyGraphicsPrototype doorControlOff_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "lightsControl.png"];
                MyGraphicsPrototype lightsControl_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "lightsControlOff.png"];
                MyGraphicsPrototype lightsControlOff_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);


                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "lightsOn.png"];
                MyGraphicsPrototype lightsOn_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                bitmap = Owner.m_bitmapTable[Owner.TEXTURE_SET + "lightsOff.png"];
                MyGraphicsPrototype lightsOff_g = new MyGraphicsPrototype()
                {
                    PixelSize = new int2(bitmap.Width, bitmap.Height),
                    Bitmap    = devBitmaps.DevicePointer + devBitmaps.TypeSize * offset,
                };

                offset += FillWithChannelFromBitmap(bitmap, 0, Owner.Bitmaps.Host, offset);

                // parse the map and instantiate all parts of the world
                Owner.World.registerGraphics(
                    Owner.m_tale_empty_g, Owner.m_tale_obstacle_g, Owner.m_agent_g,
                    doorOpened_g, doorClosed_g, doorControl_g, doorControlOff_g,
                    lightsControl_g, lightsControlOff_g, lightsOff_g, lightsOn_g);

                Owner.AgentPosX.Host[0] = Owner.World.GetAgent().GetPosition().x;
                Owner.AgentPosY.Host[0] = Owner.World.GetAgent().GetPosition().y;

                Owner.AgentPosX.SafeCopyToDevice();
                Owner.AgentPosY.SafeCopyToDevice();
                Owner.Bitmaps.SafeCopyToDevice();
                Array.Copy(Owner.World.GetArray(), Owner.MapTales.Host, Owner.MapTales.Count);
                Owner.MapTales.SafeCopyToDevice();

                Owner.PublishWorldSize();
            }
Exemplo n.º 27
0
 public float RunSafe(CudaDeviceVariable <ushort> imgIn, NPPImage_32fC3 imgOut, float maxVal)
 {
     SetComputeSize((uint)imgOut.WidthRoi, (uint)imgOut.HeightRoi);
     return(base.Run(imgIn.DevicePointer, imgOut.DevicePointer, maxVal, imgOut.WidthRoi, imgOut.HeightRoi, imgOut.Pitch));
 }
Exemplo n.º 28
0
        public override bool Reallocate(int newCount, bool copyData = true)
        {
            // TODO(HonzaS): Some of the current models need this during Execute().
            // TODO(HonzaS): Research will have to switch to the new model, but there is no reason to forbid it now.

            //// TODO(HonzaS): The simulation should be accessible in a better way.
            //if (!Owner.Owner.SimulationHandler.Simulation.IsStepFinished)
            //    throw new InvalidOperationException("Reallocate called from Execute()");

            if (!IsDynamic)
            {
                MyLog.ERROR.WriteLine(
                    "Cannot reallocate a static memory block. Use the DynamicAttribute to mark a memory block as dynamic.");
                throw new InvalidOperationException("Cannot reallocate non-dynamic memory block.");
            }

            MyLog.DEBUG.WriteLine("Reallocating {0} from {1} to {2}", Name, Count, newCount);

            int oldCount = Count;

            Count = newCount;

            if (oldCount == 0)
            {
                AllocateDevice();
            }

            // Make sure that both the host and device have enough memory. Allocate first.
            // If one of the allocations fails, return (moving out of scope will get rid of any allocated memory).

            T[] newHostMemory;
            CudaDeviceVariable <T> newDeviceMemory;

            try
            {
                newHostMemory = new T[newCount];
            }
            catch
            {
                //MyLog.WARNING.WriteLine("Could not reallocate host memory.");
                return(false);
            }

            try
            {
                newDeviceMemory = new CudaDeviceVariable <T>(
                    MyKernelFactory.Instance.GetContextByGPU(Owner.GPU).AllocateMemory(
                        newCount * Marshal.SizeOf(typeof(T))));

                newDeviceMemory.Memset(BitConverter.ToUInt32(BitConverter.GetBytes(0), 0));
            }
            catch
            {
                //MyLog.WARNING.WriteLine("Could not reallocate device memory.");
                return(false);
            }

            // Both the host and the device have enough memory for the reallocation.

            if (copyData)
            {
                // Copy the host data.
                Array.Copy(Host, newHostMemory, Math.Min(newCount, oldCount));

                // Copy the device data.
                newDeviceMemory.CopyToDevice(Device[Owner.GPU]);
            }

            // This will get rid of the original host memory.
            Host = newHostMemory;

            // Explicit dispose so that if there's a reference anywhere, we'll find out.
            MyLog.DEBUG.WriteLine("Disposing device memory in Reallocate()");
            Device[Owner.GPU].Dispose();
            Device[Owner.GPU] = newDeviceMemory;

            return(true);
        }
Exemplo n.º 29
0
 /// <summary>
 /// Inverse DCT, de-quantization and level shift part of the JPEG decoding.
 /// Input is expected in 64x1 macro blocks and output is expected to be in 8x8
 /// macro blocks. The new version of the primitive takes the ROI in image pixel size and
 /// works with DCT coefficients that are in zig-zag order.
 /// </summary>
 /// <param name="src">Source image.</param>
 /// <param name="dst">Destination image</param>
 /// <param name="QuantInvTable">Quantization Table in zig-zag order.</param>
 /// <param name="oSizeRoi">Roi size (in pixels).</param>
 public void DCTQuantInv8x8LS(NPPImage_16sC1 src, NPPImage_8uC1 dst, NppiSize oSizeRoi, CudaDeviceVariable <byte> QuantInvTable)
 {
     status = NPPNativeMethods.NPPi.CompressionDCT.nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R_NEW(src.DevicePointer, src.Pitch, dst.DevicePointer, dst.Pitch, QuantInvTable.DevicePointer, oSizeRoi, _state);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R_NEW", status));
     NPPException.CheckNppStatus(status, null);
 }
Exemplo n.º 30
0
 /// <summary>
 /// convolution filter.
 /// </summary>
 /// <param name="dst">Destination-Image</param>
 /// <param name="pKernel">Pointer to the start address of the kernel coefficient array.<para/>
 /// Coefficients are expected to be stored in reverse order.</param>
 /// <param name="oKernelSize">Width and Height of the rectangular kernel.</param>
 /// <param name="oAnchor">X and Y offsets of the kernel origin frame of reference</param>
 public void Filter(NPPImage_16fC4 dst, CudaDeviceVariable <float> pKernel, NppiSize oKernelSize, NppiPoint oAnchor)
 {
     status = NPPNativeMethods.NPPi.Convolution.nppiFilter32f_16f_C4R(_devPtrRoi, _pitch, dst.DevicePointerRoi, dst.Pitch, _sizeRoi, pKernel.DevicePointer, oKernelSize, oAnchor);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiFilter32f_16f_C4R", status));
     NPPException.CheckNppStatus(status, this);
 }
Exemplo n.º 31
0
        /// <summary>
        /// label markers image generation with fixed destination ROI applied to all images in the batch.
        /// </summary>
        /// <param name="pSrcBatchList">source_batch_images_pointer device memory pointer to the list of device memory source image descriptors, oSize element is ignored.</param>
        /// <param name="pDstBatchList">destination_batch_images_pointer device memory pointer to the list of device memory destination image descriptors, oSize element is ignored.</param>
        /// <param name="oSizeROI">Region-of-Interest (ROI).</param>
        /// <param name="eNorm">Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. </param>
        public static void LabelMarkersUFBatch(CudaDeviceVariable <NppiImageDescriptor> pSrcBatchList, CudaDeviceVariable <NppiImageDescriptor> pDstBatchList,
                                               NppiSize oSizeROI, NppiNorm eNorm)
        {
            NppStatus status = NPPNativeMethods.NPPi.LabelMarkers.nppiLabelMarkersUFBatch_32u_C1R(pSrcBatchList.DevicePointer, pDstBatchList.DevicePointer, pSrcBatchList.Size, oSizeROI, eNorm);

            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiLabelMarkersUFBatch_32u_C1R", status));
            NPPException.CheckNppStatus(status, pSrcBatchList);
        }
Exemplo n.º 32
0
		/// <summary>
		/// convolution filter.
		/// </summary>
		/// <param name="dst">Destination-Image</param>
		/// <param name="pKernel">Pointer to the start address of the kernel coefficient array.<para/>
		/// Coefficients are expected to be stored in reverse order.</param>
		/// <param name="oKernelSize">Width and Height of the rectangular kernel.</param>
		/// <param name="oAnchor">X and Y offsets of the kernel origin frame of reference</param>
		public void Filter(NPPImage_32sC4 dst, CudaDeviceVariable<float> pKernel, NppiSize oKernelSize, NppiPoint oAnchor)
		{
			status = NPPNativeMethods.NPPi.Convolution.nppiFilter32f_32s_C4R(_devPtrRoi, _pitch, dst.DevicePointerRoi, dst.Pitch, _sizeRoi, pKernel.DevicePointer, oKernelSize, oAnchor);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiFilter32f_32s_C4R", status));
			NPPException.CheckNppStatus(status, this);
		}
Exemplo n.º 33
0
		/// <summary>
		/// image average relative error.
		/// </summary>
		/// <param name="src2">2nd source image</param>
		/// <param name="pError">Pointer to the computed error.</param>
		/// <param name="buffer">Pointer to the user-allocated scratch buffer required for the AverageRelativeError operation.</param>
		public void AverageRelativeError(NPPImage_32sC4 src2, CudaDeviceVariable<double> pError, CudaDeviceVariable<byte> buffer)
		{
			int bufferSize = AverageRelativeErrorGetBufferHostSize();
			if (bufferSize > buffer.Size) throw new NPPException("Provided buffer is too small.");

			status = NPPNativeMethods.NPPi.AverageRelativeError.nppiAverageRelativeError_32s_C4R(_devPtrRoi, _pitch, src2.DevicePointerRoi, src2.Pitch, _sizeRoi, pError.DevicePointer, buffer.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiAverageRelativeError_32s_C4R", status));
			NPPException.CheckNppStatus(status, this);
		}
Exemplo n.º 34
0
 /// <summary>
 /// 32-bit float convolution filter with border control.
 /// </summary>
 /// <param name="dest">Destination image</param>
 /// <param name="pKernel">Pointer to the start address of the kernel coefficient array. Coeffcients are expected to be stored in reverse order</param>
 /// <param name="nKernelSize">Width and Height of the rectangular kernel.</param>
 /// <param name="oAnchor">X and Y offsets of the kernel origin frame of reference relative to the source pixel.</param>
 /// <param name="eBorderType">The border type operation to be applied at source image border boundaries.</param>
 public void FilterBorder(NPPImage_16fC4 dest, CudaDeviceVariable <float> pKernel, NppiSize nKernelSize, NppiPoint oAnchor, NppiBorderType eBorderType)
 {
     status = NPPNativeMethods.NPPi.FilterBorder32f.nppiFilterBorder32f_16f_C4R(_devPtr, _pitch, _sizeOriginal, _pointRoi, dest.DevicePointerRoi, dest.Pitch, dest.SizeRoi, pKernel.DevicePointer, nKernelSize, oAnchor, eBorderType);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiFilterBorder32f_16f_C4R", status));
     NPPException.CheckNppStatus(status, this);
 }
Exemplo n.º 35
0
		/// <summary>
		/// Filters the image using a separable Gaussian filter kernel with user supplied floating point coefficients
		/// </summary>
		/// <param name="dst">Destination-Image</param>
		/// <param name="Kernel">Pointer to an array of nFilterTaps kernel coefficients which sum to 1.0F, where nFilterTaps =  2 * ((int)((float)ceil(radius) + 0.5F) ) + 1.</param>
		public void FilterGauss(NPPImage_8uC4 dst, CudaDeviceVariable<float> Kernel)
		{
			status = NPPNativeMethods.NPPi.FixedFilters.nppiFilterGaussAdvanced_8u_C4R(_devPtrRoi, _pitch, dst.DevicePointerRoi, dst.Pitch, _sizeRoi, Kernel.Size, Kernel.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiFilterGaussAdvanced_8u_C4R", status));
			NPPException.CheckNppStatus(status, this);
		}
 /// <summary>
 /// Synchron copy device to host
 /// </summary>
 /// <param name="devicePtr"></param>
 public void SynchronCopyToHost(CudaDeviceVariable <T> devicePtr)
 {
     SynchronCopyToHost(devicePtr.DevicePointer);
 }
Exemplo n.º 37
0
        /// <summary>
        /// image average relative error.
        /// </summary>
        /// <param name="src2">2nd source image</param>
        /// <param name="pError">Pointer to the computed error.</param>
        /// <param name="buffer">Pointer to the user-allocated scratch buffer required for the AverageRelativeError operation.</param>
        public void AverageRelativeError(NPPImage_32uC1 src2, CudaDeviceVariable <double> pError, CudaDeviceVariable <byte> buffer)
        {
            int bufferSize = AverageRelativeErrorGetBufferHostSize();

            if (bufferSize > buffer.Size)
            {
                throw new NPPException("Provided buffer is too small.");
            }

            status = NPPNativeMethods.NPPi.AverageRelativeError.nppiAverageRelativeError_32u_C1R(_devPtrRoi, _pitch, src2.DevicePointerRoi, src2.Pitch, _sizeRoi, pError.DevicePointer, buffer.DevicePointer);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiAverageRelativeError_32u_C1R", status));
            NPPException.CheckNppStatus(status, this);
        }
Exemplo n.º 38
0
        /// <summary>
        /// color twist batch
        /// An input 4x5 color twist matrix with floating-point coefficient values including a constant (in the fourth column) vector
        /// is applied within ROI. For this particular version of the function the result is generated as shown below. Color twist matrix can vary per image. The same ROI is applied to each image.
        /// </summary>
        /// <param name="nMin">Minimum clamp value.</param>
        /// <param name="nMax">Maximum saturation and clamp value.</param>
        /// <param name="oSizeROI"></param>
        /// <param name="pBatchList">Device memory pointer to nBatchSize list of NppiColorTwistBatchCXR structures.</param>
        public static void ColorTwistBatchIC(float nMin, float nMax, NppiSize oSizeROI, CudaDeviceVariable <NppiColorTwistBatchCXR> pBatchList)
        {
            NppStatus status = NPPNativeMethods.NPPi.ColorTwistBatch.nppiColorTwistBatch32f_16f_C4IR(nMin, nMax, oSizeROI, pBatchList.DevicePointer, pBatchList.Size);

            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiColorTwistBatch32f_16f_C4IR", status));
            NPPException.CheckNppStatus(status, pBatchList);
        }
Exemplo n.º 39
0
 /// <summary>
 /// 1 channel 32-bit to 32-bit unsigned integer label markers image generation.
 /// </summary>
 /// <param name="dest">Destination image</param>
 /// <param name="eNorm">Type of pixel connectivity test to use, nppiNormInf will use 8 way connectivity and nppiNormL1 will use 4 way connectivity. </param>
 /// <param name="pBuffer">Pointer to device memory scratch buffer at least as large as value returned by the corresponding LabelMarkersUFGetBufferSize call.</param>
 public void LabelMarkersUF(NPPImage_32uC1 dest, NppiNorm eNorm, CudaDeviceVariable <byte> pBuffer)
 {
     status = NPPNativeMethods.NPPi.LabelMarkers.nppiLabelMarkersUF_32u_C1R(_devPtrRoi, _pitch, dest.DevicePointerRoi, dest.Pitch, _sizeRoi, eNorm, pBuffer.DevicePointer);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiLabelMarkersUF_32u_C1R", status));
     NPPException.CheckNppStatus(status, this);
 }
Exemplo n.º 40
0
        static void Main(string[] args)
        {
            int N = 275;

            float[] h_A;
            float[] h_B;
            float[] h_C;
            float[] h_C_ref;

            CudaDeviceVariable <float> d_A;
            CudaDeviceVariable <float> d_B;
            CudaDeviceVariable <float> d_C;
            float    alpha = 1.0f;
            float    beta  = 0.0f;
            int      n2    = N * N;
            int      i;
            float    error_norm;
            float    ref_norm;
            float    diff;
            CudaBlas handle;


            /* Initialize CUBLAS */
            Console.WriteLine("simpleCUBLAS test running.");

            handle = new CudaBlas();

            /* Allocate host memory for the matrices */
            h_A = new float[n2];
            h_B = new float[n2];
            //h_C = new float[n2];
            h_C_ref = new float[n2];

            Random rand = new Random(0);

            /* Fill the matrices with test data */
            for (i = 0; i < n2; i++)
            {
                h_A[i] = (float)rand.NextDouble();
                h_B[i] = (float)rand.NextDouble();
                //h_C[i] = (float)rand.NextDouble();
            }

            /* Allocate device memory for the matrices */
            d_A = new CudaDeviceVariable <float>(n2);
            d_B = new CudaDeviceVariable <float>(n2);
            d_C = new CudaDeviceVariable <float>(n2);


            /* Initialize the device matrices with the host matrices */
            d_A.CopyToDevice(h_A);
            d_B.CopyToDevice(h_B);
            //d_C.CopyToDevice(h_C);

            /* Performs operation using plain C code */
            simple_sgemm(N, alpha, h_A, h_B, beta, h_C_ref);

            /* Performs operation using cublas */
            handle.Gemm(Operation.NonTranspose, Operation.NonTranspose, N, N, N, alpha, d_A, N, d_B, N, beta, d_C, N);


            /* Allocate host memory for reading back the result from device memory */
            h_C = d_C;



            /* Check result against reference */
            error_norm = 0;
            ref_norm   = 0;

            for (i = 0; i < n2; ++i)
            {
                diff        = h_C_ref[i] - h_C[i];
                error_norm += diff * diff;
                ref_norm   += h_C_ref[i] * h_C_ref[i];
            }

            ref_norm = (float)Math.Sqrt((double)ref_norm);

            if (Math.Abs(ref_norm) < 1e-7)
            {
                Console.WriteLine("!!!! reference norm is 0");
                return;
            }

            /* Memory clean up */
            d_A.Dispose();
            d_B.Dispose();
            d_C.Dispose();


            /* Shutdown */
            handle.Dispose();

            if (error_norm / ref_norm < 1e-6f)
            {
                Console.WriteLine("simpleCUBLAS test passed.");
                return;
            }
            else
            {
                Console.WriteLine("simpleCUBLAS test failed.");
                return;
            }
        }
Exemplo n.º 41
0
        public override CUdeviceptr GetDevicePtr(int GPU, int offset)
        {
            CudaDeviceVariable <T> rDeviceVar = GetDevice(GPU);

            return(rDeviceVar != null ? rDeviceVar.DevicePointer + offset * rDeviceVar.TypeSize : default(CUdeviceptr));
        }
Exemplo n.º 42
0
        /// <summary>
        /// image maximum error.
        /// </summary>
        /// <param name="src2">2nd source image</param>
        /// <param name="pError">Pointer to the computed error.</param>
        /// <param name="buffer">Pointer to the user-allocated scratch buffer required for the MaxError operation.</param>
        /// <param name="nppStreamCtx">NPP stream context.</param>
        public void MaxError(NPPImage_32fcC2 src2, CudaDeviceVariable <double> pError, CudaDeviceVariable <byte> buffer, NppStreamContext nppStreamCtx)
        {
            int bufferSize = MaxErrorGetBufferHostSize(nppStreamCtx);

            if (bufferSize > buffer.Size)
            {
                throw new NPPException("Provided buffer is too small.");
            }

            status = NPPNativeMethods_Ctx.NPPi.MaximumError.nppiMaximumError_32fc_C2R_Ctx(_devPtrRoi, _pitch, src2.DevicePointerRoi, src2.Pitch, _sizeRoi, pError.DevicePointer, buffer.DevicePointer, nppStreamCtx);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiMaximumError_32fc_C2R_Ctx", status));
            NPPException.CheckNppStatus(status, this);
        }
Exemplo n.º 43
0
        /// <summary>
        /// Huffman Encoding of the JPEG Encoding.<para/>
        /// Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
        /// </summary>
        /// <param name="pSrc">Source image.</param>
        /// <param name="restartInterval">Restart Interval, see JPEG standard.</param>
        /// <param name="Ss">Start Coefficient, see JPEG standard.</param>
        /// <param name="Se">End Coefficient, see JPEG standard.</param>
        /// <param name="Ah">Bit Approximation High, see JPEG standard.</param>
        /// <param name="Al">Bit Approximation Low, see JPEG standard.</param>
        /// <param name="pDst">Byte-stuffed huffman encoded JPEG scan.</param>
        /// <param name="nLength">Byte length of the huffman encoded JPEG scan.</param>
        /// <param name="pHuffmanTableDC">DC Huffman table.</param>
        /// <param name="pHuffmanTableAC">AC Huffman table.</param>
        /// <param name="oSizeROI">ROI</param>
        /// <param name="buffer">Scratch buffer</param>
        public static void EncodeHuffmanScan(NPPImage_16sC1[] pSrc, int restartInterval, int Ss, int Se, int Ah, int Al,
                                             CudaDeviceVariable <byte> pDst, ref int nLength, NppiEncodeHuffmanSpec[] pHuffmanTableDC, NppiEncodeHuffmanSpec[] pHuffmanTableAC, NppiSize[] oSizeROI, CudaDeviceVariable <byte> buffer)
        {
            NppStatus status;

            CUdeviceptr[] srcs  = new CUdeviceptr[] { pSrc[0].DevicePointer, pSrc[1].DevicePointer, pSrc[2].DevicePointer };
            int[]         steps = new int[] { pSrc[0].Pitch, pSrc[1].Pitch, pSrc[2].Pitch };

            status = NPPNativeMethods.NPPi.CompressionDCT.nppiEncodeHuffmanScan_JPEG_8u16s_P3R(srcs, steps, restartInterval, Ss, Se, Ah, Al, pDst.DevicePointer, ref nLength, pHuffmanTableDC, pHuffmanTableAC, oSizeROI, buffer.DevicePointer);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiEncodeHuffmanScan_JPEG_8u16s_P3R", status));
            NPPException.CheckNppStatus(status, null);
        }
Exemplo n.º 44
0
 public override CudaDeviceVariable <float> BackPropagation(CudaDeviceVariable <float> groundTrouth)
 {
     return(_previousLayer.BackPropagation(_dx));
 }
Exemplo n.º 45
0
        /// <summary>
        /// Setup as empty map with only one value at 1.
        /// </summary>
        /// <param name="pos"></param>
        /// <param name="fieldEnsemble"></param>
        /// <param name="startTime"></param>
        /// <param name="endTime"></param>
        public void SetupPoint(Int2 pos, int startTime)
        {
            // ~~~~~~~~~~~~~~ Copy relevant data ~~~~~~~~~~~~~~ \\
            // Count up when advection was executed.
            CurrentTime = startTime;
            _startTime  = startTime;

            // ~~~~~~~~~~~~ Load ensemble ~~~~~~~~~~~~ \\
            // Load fields first to get the grid size.
            //Loader ncFile = new Loader(RedSea.Singleton.DataFolder + (_startTime + 1) + RedSea.Singleton.FileName);
            //ScalarField t0X = ncFile.LoadFieldSlice(_ensembleRanges[0]);
            //ScalarField t0Y = ncFile.LoadFieldSlice(_ensembleRanges[1]);
            //ncFile.Close();

            LoaderNCF   ncFile = RedSea.Singleton.GetLoaderNCF(_startTime);
            ScalarField t1X    = ncFile.LoadFieldSlice(_ensembleRanges[0]);
            ScalarField t1Y    = ncFile.LoadFieldSlice(_ensembleRanges[1]);

            ncFile.Close();

            // ~~~~~~~~~~~~~~ Copy relevant data ~~~~~~~~~~~~~~ \\
            // Keep for plane creation and size reference.
            _ensembleGrid = t1X.Grid as RectlinearGrid;
            // Mapper for binding the SlimDX texture to CUDA easily.
            _cudaDxMapper = new CudaGraphicsInteropResourceCollection();
            // Tell CUDA which value is a border.
            _texInvalidValue = t1X.InvalidValue ?? float.MaxValue;

            // ~~~~~~~~~~~~ Fill CUDA resources ~~~~~~~~~~~~ \\
            // All members are above each other.
            int vHeight = _height * _numMembers;

            //// vX, t=0
            //_t0X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One);
            //_t0X.CopyFromHostToThis<float>(t0X.Data);
            //new CudaTextureArray2D(_advectParticlesKernel, "vX_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0X);

            //// vY, t=0
            //_t0Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One);
            //_t0Y.CopyFromHostToThis<float>(t0Y.Data);
            //new CudaTextureArray2D(_advectParticlesKernel, "vY_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0Y);

            // vX, t=1
            _t1X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One);
            _t1X.CopyFromHostToThis <float>(t1X.Data);
            new CudaTextureArray2D(_advectParticlesKernel, "vX_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1X);

            // vY, t=1
            _t1Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One);
            _t1Y.CopyFromHostToThis <float>(t1Y.Data);
            new CudaTextureArray2D(_advectParticlesKernel, "vY_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1Y);

            // ~~~~~~~~~~~~~ Create texture ~~~~~~~~~~~~~~~~~~~~ \\
            // Create texture. Completely zero, except for one point.
            Texture2DDescription desc = new Texture2DDescription
            {
                ArraySize         = 1,
                BindFlags         = BindFlags.ShaderResource,
                CpuAccessFlags    = CpuAccessFlags.None,
                Format            = Format.R32_Float,
                Width             = _width,
                Height            = _height,
                MipLevels         = 1,
                OptionFlags       = ResourceOptionFlags.None,
                SampleDescription = new SampleDescription(1, 0),
                Usage             = ResourceUsage.Default
            };

            // Put field data into stream/rectangle object
            float[] zeros = new float[_width * _height];
            Array.Clear(zeros, 0, zeros.Length);

            // Fill the empty texture.
            DataRectangle texData = new DataRectangle(_width * sizeof(float), new DataStream(zeros, true, true));

            _pongFlowMap = new CudaDeviceVariable <float>(_width * _height);//new Texture2D(_device, desc, texData);
            // Magically, copy to device happens here.
            _pongFlowMap = zeros;

            // Add one pixel for integration.
            zeros[pos.X + pos.Y * _width] = 1;
            texData = new DataRectangle(_width * sizeof(float), new DataStream(zeros, true, true));

            // Create texture.
            FlowMap = new Texture2D(_device, desc, texData);

            // ~~~~~~~~~ Make textures mappable to CUDA ~~~~~~~~~~ \\
            _cudaDxMapper.Add(new CudaDirectXInteropResource(FlowMap.ComPointer, CUGraphicsRegisterFlags.None, CudaContext.DirectXVersion.D3D11));


            _cudaDxMapper.MapAllResources();
            CudaArray2D lastFlowMap = _cudaDxMapper[0].GetMappedArray2D(0, 0);

            new CudaTextureArray2D(_advectParticlesKernel, "flowMap", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, lastFlowMap);
            _cudaDxMapper.UnmapAllResources();
        }
Exemplo n.º 46
0
 public void SetGroundTrouth(CudaDeviceVariable <float> data)
 {
     _groundTrouthData.CopyToDevice(data);
 }
Exemplo n.º 47
0
 public float RunSafe(CudaDeviceVariable <float> imgIn, CudaDeviceVariable <float3> imgOut, int patchSize, float3 blackPoint, float3 scale)
 {
     SetComputeSize((uint)patchSize, (uint)patchSize, 1);
     return(base.Run(patchSize, patchSize, imgIn.DevicePointer, patchSize * 4, imgOut.DevicePointer, patchSize * 12, blackPoint, scale));
 }
Exemplo n.º 48
0
        protected override void Execute()
        {
            if (!initializedFlag)
            {
                m_d_Force = new CudaDeviceVariable <float>(Target.MAX_CELLS * 3);

                m_d_ActiveConnectionsCount = new CudaDeviceVariable <int>(1);
                m_d_CenterOfGravity        = new CudaDeviceVariable <float>(3);

                //initialize vertices position
                InitCoordinatesAndVelocity();
                initializedFlag = true;
                ViewMode        = ViewMethod.Orbit_3D;

                float translationValue = 0.50f * (COORDINATES_MAX - COORDINATES_MIN);
                m_Translation = new Vector3(-translationValue, 0, -translationValue);

                m_zeroTextureKernel.SetupExecution(TextureHeight * TextureWidth);

                m_zeroTextureKernel.Run(
                    VBODevicePointer,
                    TextureHeight * TextureWidth
                    );
            }


            if (TRANSLATE_TO_CENTER == Option.True)
            {
                m_centerOfGravityKernel.SetupExecution(1);

                m_centerOfGravityKernel.Run(
                    m_d_PointsCoordinates.DevicePointer,
                    m_d_CenterOfGravity.DevicePointer,
                    Target.ActivityFlag,
                    Target.MAX_CELLS
                    );

                float[] m_h_centerOfGravity = new float[3];
                m_d_CenterOfGravity.CopyToHost(m_h_centerOfGravity);

                m_Translation = new Vector3(-m_h_centerOfGravity[0], -m_h_centerOfGravity[1], -m_h_centerOfGravity[2]);

                m_Connections.Translation     = m_Translation;
                m_ReferenceFields.Translation = m_Translation;
                m_WinnerOne.Translation       = m_Translation;
                m_WinnerTwo.Translation       = m_Translation;
            }

            // PHYSICS PART
            // set forces to zero
            m_setForcesToZeroKernel.SetupExecution(Target.MAX_CELLS * 3);

            m_setForcesToZeroKernel.Run(
                m_d_Force.DevicePointer,
                Target.MAX_CELLS
                );

            // spring force computation
            m_springKernel.SetupExecution(Target.MAX_CELLS);

            m_springKernel.Run(
                Target.ActivityFlag,
                Target.ConnectionMatrix,
                m_d_PointsCoordinates.DevicePointer,
                SPRING_STRENGTH,
                m_d_Force.DevicePointer,
                Target.MAX_CELLS
                );


            // repulsion force computation
            m_repulsionKernel.SetupExecution(Target.MAX_CELLS);

            m_repulsionKernel.Run(
                REPULSION,
                REPULSION_DISTANCE,
                m_d_Force.DevicePointer,
                m_d_PointsCoordinates.DevicePointer,
                Target.ActivityFlag,
                Target.MAX_CELLS
                );


            // applying forces to the points
            m_useForceKernel.SetupExecution(Target.MAX_CELLS * 3);

            m_useForceKernel.Run(
                m_d_Force.DevicePointer,
                FORCE_FACTOR,
                m_d_PointsCoordinates.DevicePointer,
                Target.MAX_CELLS
                );



            // GRAPHICS PART
            // COPY AND PROCESS TEXTURE
            m_copyAndProcessTextureKernel.SetupExecution(Target.ReferenceVector.Count);

            m_copyAndProcessTextureKernel.Run(
                Target.ReferenceVector,
                Target.INPUT_SIZE,
                Target.Input.ColumnHint,
                TextureWidth,
                VBODevicePointer,
                Target.MAX_CELLS,
                Target.ReferenceVector.Count
                );



            // CONNECTIONS
            m_d_ActiveConnectionsCount.CopyToDevice(0);

            m_copyConnectionsCoordinatesKernel.SetupExecution(Target.MAX_CELLS * Target.MAX_CELLS);

            m_copyConnectionsCoordinatesKernel.Run(
                Target.ConnectionMatrix,
                m_d_PointsCoordinates.DevicePointer,
                VertexVBODevicePointer,
                m_d_ActiveConnectionsCount.DevicePointer,
                Target.MAX_CELLS
                );

            m_d_ActiveConnectionsCount.CopyToHost(m_h_ActiveConnectionsCount);
            m_Connections.VertexCount = 2 * m_h_ActiveConnectionsCount[0];

            // REFERENCE VECTORS (CUBES)

            /*
             * m_computeCubesKernel.m_kernel.SetupExecution(Target.MAX_CELLS
             *  );
             *
             *
             * .Run(
             *  m_computeCubesKernel,
             *  m_d_PointsCoordinates.DevicePointer,
             *  VertexVBODevicePointer,
             *  m_ReferenceFields.VertexOffset,
             *  TEXTURE_SIDE,
             *  Target.ActivityFlag,
             *  Target.Input.ColumnHint,
             *  Target.MAX_CELLS
             *  );
             */

            /*
             * m_cubeCoordinatesKernel.m_kernel.SetupExecution(Target.MAX_CELLS * 72
             *  );
             *
             * .Run(
             *  m_cubeCoordinatesKernel,
             *  VertexVBODevicePointer,
             *  m_d_CubeOperation.DevicePointer,
             *  m_ReferenceFields.VertexOffset,
             *  Target.ActivityFlag,
             *  TEXTURE_SIDE,
             *  m_d_PointsCoordinates.DevicePointer,
             *  Target.MAX_CELLS
             *  );
             *
             * m_cubeTextureKernel.m_kernel.SetupExecution(Target.MAX_CELLS * 48
             *  );
             *
             * .Run(
             *  m_cubeTextureKernel,
             *  VertexVBODevicePointer,
             *  m_ReferenceFields.TexCoordOffset,
             *  m_d_CubeTexCoordinates.DevicePointer,
             *  TEXTURE_SIDE,
             *  Target.Input.ColumnHint,
             *  Target.ActivityFlag,
             *  Target.MAX_CELLS
             *  );
             */

            m_computeCubes2Kernel.SetupExecution(Target.MAX_CELLS * 6);

            m_computeCubes2Kernel.Run(
                m_d_PointsCoordinates.DevicePointer,
                VertexVBODevicePointer,
                m_ReferenceFields.VertexOffset,
                TEXTURE_SIDE,
                m_d_CubeOperation.DevicePointer,
                m_d_CubeTexCoordinates.DevicePointer,
                Target.ActivityFlag,
                (float)Target.Input.ColumnHint,
                Target.MAX_CELLS
                );



            /*
             * m_computeQuadsKernel.m_kernel.SetupExecution(
             *  Target.MAX_CELLS
             *  );
             *
             * m_computeQuadsKernel.Run(
             *  m_d_PointsCoordinates.DevicePointer,
             *  VertexVBODevicePointer,
             *  m_ReferenceFields.VertexOffset,
             *  TEXTURE_SIDE,
             *  Target.ActivityFlag,
             *  Target.Input.ColumnHint,
             *  Target.MAX_CELLS
             *  );
             */

            m_winnersKernel.SetupExecution(Target.MAX_CELLS);

            m_winnersKernel.Run(
                Target.WinnerOne,
                VertexVBODevicePointer,
                m_WinnerOne.VertexOffset,
                m_d_PointsCoordinates.DevicePointer,
                TEXTURE_SIDE,
                Target.MAX_CELLS
                );

            m_winnersKernel.SetupExecution(Target.MAX_CELLS);

            m_winnersKernel.Run(
                Target.WinnerTwo,
                VertexVBODevicePointer,
                m_WinnerTwo.VertexOffset,
                m_d_PointsCoordinates.DevicePointer,
                TEXTURE_SIDE,
                Target.MAX_CELLS
                );

            if (ONE_SHOT_RESTART == Option.True)
            {
                initializedFlag = false;
                TriggerReset();
                ONE_SHOT_RESTART = Option.False;
            }
        }
        /// <summary>
        /// Four-channel 32-bit unsigned image DotProd. Ignoring alpha channel.
        /// </summary>
        /// <param name="src2">2nd source image</param>
        /// <param name="pDp">Pointer to the computed dot product of the two images. (3 * sizeof(double))</param>
        /// <param name="buffer">Allocated device memory with size of at <see cref="ADotProdGetBufferHostSize()"/></param>
        /// <param name="nppStreamCtx">NPP stream context.</param>
        public void ADotProduct(NPPImage_32uC4 src2, CudaDeviceVariable <double> pDp, CudaDeviceVariable <byte> buffer, NppStreamContext nppStreamCtx)
        {
            int bufferSize = DotProdGetBufferHostSize(nppStreamCtx);

            if (bufferSize > buffer.Size)
            {
                throw new NPPException("Provided buffer is too small.");
            }

            status = NPPNativeMethods_Ctx.NPPi.DotProd.nppiDotProd_32u64f_AC4R_Ctx(_devPtrRoi, _pitch, src2.DevicePointerRoi, src2.Pitch, _sizeRoi, pDp.DevicePointer, buffer.DevicePointer, nppStreamCtx);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDotProd_32u64f_AC4R_Ctx", status));
            NPPException.CheckNppStatus(status, this);
        }
Exemplo n.º 50
0
        public override CUdeviceptr GetDevicePtr(MyAbstractObserver callee, int offset)
        {
            CudaDeviceVariable <T> rDeviceVar = GetDevice(MyKernelFactory.Instance.DevCount - 1);

            return(rDeviceVar != null ? rDeviceVar.DevicePointer + offset * rDeviceVar.TypeSize : default(CUdeviceptr));
        }
 /// <summary>
 /// Synchron copy device to host
 /// </summary>
 /// <param name="devicePtr">Pointer to device memory</param>
 /// <param name="offsetSrc">Offset to source pointer in bytes</param>
 /// <param name="offsetDest">Offset to destination pointer in bytes</param>
 /// <param name="aSizeInBytes">Bytes to copy</param>
 public void SynchronCopyToHost(CudaDeviceVariable <T> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
 {
     SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
 }
Exemplo n.º 52
0
        /// <summary>
        /// image resize batch for variable ROI.
        /// </summary>
        /// <param name="nMaxWidth">Size in pixels of the entire smallest source image width and height, may be from different images.</param>
        /// <param name="nMaxHeight">Region of interest in the source images (may overlap source image size width and height).</param>
        /// <param name="pBatchSrc">Size in pixels of the entire smallest destination image width and height, may be from different images.</param>
        /// <param name="pBatchDst">Region of interest in the destination images (may overlap destination image size width and height).</param>
        /// <param name="nBatchSize">Device memory pointer to nBatchSize list of NppiResizeBatchCXR structures.</param>
        /// <param name="pBatchROI">Device pointer to NppiResizeBatchROI_Advanced list of per-image variable ROIs.User needs to initialize this structure and copy it to device.</param>
        /// <param name="eInterpolation">The type of eInterpolation to perform resampling.</param>
        public static void ResizeBatchAdvanced(int nMaxWidth, int nMaxHeight, CudaDeviceVariable <NppiImageDescriptor> pBatchSrc, CudaDeviceVariable <NppiImageDescriptor> pBatchDst,
                                               CudaDeviceVariable <NppiResizeBatchROI_Advanced> pBatchROI, uint nBatchSize, InterpolationMode eInterpolation)
        {
            NppStatus status = NPPNativeMethods.NPPi.GeometricTransforms.nppiResizeBatch_16f_C4R_Advanced(nMaxWidth, nMaxHeight, pBatchSrc.DevicePointer, pBatchDst.DevicePointer,
                                                                                                          pBatchROI.DevicePointer, pBatchDst.Size, eInterpolation);

            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiResizeBatch_16f_C4R_Advanced", status));
            NPPException.CheckNppStatus(status, null);
        }
Exemplo n.º 53
0
		/// <summary>
		/// Filters the image using a separable Gaussian filter kernel with user supplied floating point coefficients
		/// </summary>
		/// <param name="dst">Destination-Image</param>
		/// <param name="Kernel">Pointer to an array of nFilterTaps kernel coefficients which sum to 1.0F, where nFilterTaps =  2 * ((int)((float)ceil(radius) + 0.5F) ) + 1.</param>
		/// <param name="eBorderType">The border type operation to be applied at source image border boundaries.</param>
		public void FilterGaussBorderA(NPPImage_8uC4 dst, CudaDeviceVariable<float> Kernel, NppiBorderType eBorderType)
		{
			status = NPPNativeMethods.NPPi.FilterGaussBorder.nppiFilterGaussAdvancedBorder_8u_AC4R(_devPtr, _pitch, _sizeOriginal, _pointRoi, dst.DevicePointerRoi, dst.Pitch, _sizeRoi, Kernel.Size, Kernel.DevicePointer, eBorderType);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiFilterGaussAdvancedBorder_8u_AC4R", status));
			NPPException.CheckNppStatus(status, this);
		}
 /// <summary>
 /// Asynchron copy device to host
 /// </summary>
 /// <param name="deviceVar"></param>
 /// <param name="stream"></param>
 public void AsyncCopyFromDevice(CudaDeviceVariable <T> deviceVar, CUstream stream)
 {
     AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
 }
Exemplo n.º 55
0
		/// <summary>
		/// Four-channel 32-bit unsigned image DotProd. Buffer is internally allocated and freed. Ignoring alpha channel.
		/// </summary>
		/// <param name="src2">2nd source image</param>
		/// <param name="pDp">Pointer to the computed dot product of the two images. (3 * sizeof(double))</param>
		public void ADotProduct(NPPImage_32sC4 src2, CudaDeviceVariable<double> pDp)
		{
			int bufferSize = DotProdGetBufferHostSize();
			CudaDeviceVariable<byte> buffer = new CudaDeviceVariable<byte>(bufferSize);

			status = NPPNativeMethods.NPPi.DotProd.nppiDotProd_32s64f_AC4R(_devPtrRoi, _pitch, src2.DevicePointerRoi, src2.Pitch, _sizeRoi, pDp.DevicePointer, buffer.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDotProd_32s64f_AC4R", status));
			buffer.Dispose();
			NPPException.CheckNppStatus(status, this);
		}
 /// <summary>
 /// Asynchron copy device to host
 /// </summary>
 /// <param name="deviceVar"></param>
 /// <param name="offsetSrc">Offset to source pointer in bytes</param>
 /// <param name="offsetDest">Offset to destination pointer in bytes</param>
 /// <param name="aSizeInBytes">Bytes to copy</param>
 /// <param name="stream"></param>
 public void AsyncCopyFromDevice(CudaDeviceVariable <T> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
 {
     AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
 }
Exemplo n.º 57
0
		/// <summary>
		/// image maximum relative error. User buffer is internally allocated and freed.
		/// </summary>
		/// <param name="src2">2nd source image</param>
		/// <param name="pError">Pointer to the computed error.</param>
		public void MaximumRelativeError(NPPImage_32sC4 src2, CudaDeviceVariable<double> pError)
		{
			int bufferSize = MaximumRelativeErrorGetBufferHostSize();
			CudaDeviceVariable<byte> buffer = new CudaDeviceVariable<byte>(bufferSize);
			status = NPPNativeMethods.NPPi.MaximumRelativeError.nppiMaximumRelativeError_32s_C4R(_devPtrRoi, _pitch, src2.DevicePointerRoi, src2.Pitch, _sizeRoi, pError.DevicePointer, buffer.DevicePointer);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiMaximumRelativeError_32s_C4R", status));
			buffer.Dispose();
			NPPException.CheckNppStatus(status, this);
		}
Exemplo n.º 58
0
        protected override void Execute()
        {
            bool newValueIsAvailable = m_lastSimulationStep != SimulationStep;

            if (m_currentRealTimeStep % Period != 0 && newValueIsAvailable)
            {
                m_currentRealTimeStep++;
                return;
            }

            m_currentSamplingTimeStep = (m_currentRealTimeStep / Period);

            if (m_currentSamplingTimeStep < m_delay && newValueIsAvailable)
            {
                m_currentRealTimeStep++;
                return;
            }
            bool mustBeUpdated = false;

            if (BoundPolicy == MyBoundPolicy.AUTO)
            {
                // Update the new min / max
                bool newBounds = false;
                Target.SafeCopyToHost();
                if (Target.Count == 0)
                {
                    return;
                }
                for (int c = 0; c < Count; c++)
                {
                    double value = Target.Host[c * Stride + Offset];
                    if (m_isDirty)
                    {
                        if (double.IsNaN(value))
                        {
                            // Cant decide bounds
                            return;
                        }
                        // First value
                        m_plotCurrentValueMax = value + 0.01f;
                        m_plotCurrentValueMin = value - 0.01f;
                    }
                    else
                    {
                        if (!double.IsNaN(value)) // Change bounds only if values are real
                        {
                            // Next value
                            if (value > m_plotCurrentValueMax)
                            {
                                m_plotCurrentValueMax = value;
                                newBounds             = true;
                            }
                            else if (value < m_plotCurrentValueMin)
                            {
                                m_plotCurrentValueMin = value;
                                newBounds             = true;
                            }

                            if (newBounds)
                            {
                                double range = m_plotCurrentValueMax - m_plotCurrentValueMin;
                                m_plotCurrentValueMax += range * 0.1;
                                m_plotCurrentValueMin -= range * 0.1;

                                m_boundMin = (float)m_plotCurrentValueMin;
                                m_boundMax = (float)m_plotCurrentValueMax;
                                OnRuntimePropertyChanged();
                            }
                        }
                    }
                }
                mustBeUpdated = newBounds;
            }
            else if (BoundPolicy == MyBoundPolicy.MANUAL)
            {
                m_plotCurrentValueMin = BoundMin;
                m_plotCurrentValueMax = BoundMax;
            }

            //MyLog.DEBUG.WriteLine("min " + m_plotCurrentValueMin + " max " + m_plotCurrentValueMax);


            if (m_isDirty)
            {
                // Set a blank canvas
                m_canvas      = new CudaDeviceVariable <uint>(VBODevicePointer);
                mustBeUpdated = true;
            }



            if (m_colorsAreDirty)
            {
                UpdateColorsToGpu();
            }

            if (mustBeUpdated || m_colorsAreDirty)
            {
                drawCoordinates();
                m_colorsAreDirty = false;
                mustBeUpdated    = true;
            }

            switch (DisplayMethod)
            {
            case MyDisplayMethod.CYCLE:
                runMethodCycle(mustBeUpdated);
                break;

            case MyDisplayMethod.SCALE:
                runMethodScale(mustBeUpdated);
                break;

            case MyDisplayMethod.SCROLL:
                runMethodScroll(mustBeUpdated);
                break;

            // Add new methods here

            default:
                throw new NotImplementedException();
            }

            m_isDirty = false;
            if (newValueIsAvailable)
            {
                m_currentRealTimeStep++;
            }
            m_lastSimulationStep = SimulationStep;
        }
Exemplo n.º 59
0
		/// <summary>
		/// Four channel 32-bit signed convolution filter with border control, ignoring alpha channel.<para/>
		/// General purpose 2D convolution filter using floating-point weights with border control.<para/>
		/// Pixels under the mask are multiplied by the respective weights in the mask
		/// and the results are summed. Before writing the result pixel the sum is scaled
		/// back via division by nDivisor. If any portion of the mask overlaps the source
		/// image boundary the requested border type operation is applied to all mask pixels
		/// which fall outside of the source image. <para/>
		/// </summary>
		/// <param name="dest">Destination image</param>
		/// <param name="pKernel">Pointer to the start address of the kernel coefficient array. Coeffcients are expected to be stored in reverse order</param>
		/// <param name="nKernelSize">Width and Height of the rectangular kernel.</param>
		/// <param name="oAnchor">X and Y offsets of the kernel origin frame of reference relative to the source pixel.</param>
		/// <param name="eBorderType">The border type operation to be applied at source image border boundaries.</param>
		public void FilterBorderA(NPPImage_32sC4 dest, CudaDeviceVariable<float> pKernel, NppiSize nKernelSize, NppiPoint oAnchor, NppiBorderType eBorderType)
		{
			status = NPPNativeMethods.NPPi.FilterBorder32f.nppiFilterBorder32f_32s_AC4R(_devPtr, _pitch, _sizeOriginal, _pointRoi, dest.DevicePointerRoi, dest.Pitch, dest.SizeRoi, pKernel.DevicePointer, nKernelSize, oAnchor, eBorderType);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiFilterBorder32f_32s_AC4R", status));
			NPPException.CheckNppStatus(status, this);
		}
Exemplo n.º 60
0
            public override void Execute()
            {
                CudaDeviceVariable <MySLICClusterCenterObject> devSLICCCenter = Owner.SLICClusterCenters.GetDevice(Owner);                                                                    // get pointer

                m_kernel_desc.Run(devSLICCCenter.DevicePointer, Owner.SP_xy, Owner.SP_desc, Owner.nSegs, Owner.SP_xy.ColumnHint, Owner.SP_desc.ColumnHint, Owner.InputDimX, Owner.InputDimY); // fill descriptor
            }