internal static Texture2D DepthStencil(Context ctx, int width, int height) { Image image = ctx.Device.CreateImage(new ImageCreateInfo { ImageType = ImageType.Image2D, Format = ctx.DepthStencilFormat, Extent = new Extent3D(width, height, 1), MipLevels = 1, ArrayLayers = 1, Samples = SampleCounts.Count1, Tiling = ImageTiling.Optimal, Usage = ImageUsages.DepthStencilAttachment | ImageUsages.TransferSrc }); MemoryRequirements memReq = image.GetMemoryRequirements(); int heapIndex = ctx.MemoryProperties.MemoryTypes.IndexOf( memReq.MemoryTypeBits, MemoryProperties.DeviceLocal); DeviceMemory memory = ctx.Device.AllocateMemory(new MemoryAllocateInfo(memReq.Size, heapIndex)); image.BindMemory(memory); ImageView view = image.CreateView(new ImageViewCreateInfo(ctx.DepthStencilFormat, new ImageSubresourceRange(ImageAspects.Depth | ImageAspects.Stencil, 0, 1, 0, 1))); //var sampler = VKHelper.CreateSampler(ctx, Filter.Linear, Filter.Linear, SamplerMipmapMode.Nearest); return(new Texture2D(ctx, image, memory, view, ctx.DepthStencilFormat, new Vector2I(width, height), false)); }
public VKBuffer( string name, Graphics graphics, long count, BufferUsages usages, MemoryProperties memoryProperties, BufferCreateFlags flags = BufferCreateFlags.None, SharingMode sharingMode = SharingMode.Exclusive, int[] queueFamilyIndices = null ) { Name = name; Graphics = graphics; Count = count; Size = count * Interop.SizeOf <T>(); Usages = usages; Buffer = Graphics.Device.CreateBuffer(new BufferCreateInfo( size: Size, usages: usages, flags: flags, sharingMode: sharingMode, queueFamilyIndices: queueFamilyIndices )); var reqs = Buffer.GetMemoryRequirements(); DeviceMemory = Graphics.Device.AllocateMemory(new MemoryAllocateInfo( reqs.Size, graphics.GetMemoryTypeIndex(reqs.MemoryTypeBits, memoryProperties) )); Buffer.BindMemory(DeviceMemory); }
/// <inheritdoc cref="IDisposable.Dispose"/> public void Dispose() { _mapped?.Dispose(); _mapped = null; _memory.Dispose(); _memory = null; }
void CopyBitmapToBuffer(IntPtr scan0, int bitmapSize, DeviceMemory bufferMem, MemoryRequirements memRequirements) { var map = device.MapMemory(bufferMem, 0, memRequirements.Size); Copy(scan0, map, bitmapSize); device.UnmapMemory(bufferMem); }
public ConnectionTests() { _typesContainer = new TypesContainer(Program.GetApp().Container.Resolve(typeof(IUnityContainer)) as IUnityContainer); _device = Program.GetDevice(); _configuration = _device.DeviceFragments.First(fragment => fragment.StrongName == "Configuration") as IDeviceConfiguration; _shell = _typesContainer.Resolve <ShellViewModel>(); _deviceViewModelFactory = _typesContainer.Resolve <IDeviceViewModelFactory>(); var deviceMemory = new DeviceMemory(); _typesContainer.Resolve <IDevicesContainerService>() .AddConnectableItem(_device); _device.DeviceMemory = deviceMemory; _deviceViewModel = _shell.ProjectBrowserViewModel.DeviceViewModels[0]; _configurationFragmentViewModel = null; _configurationFragmentViewModel = _shell.ProjectBrowserViewModel.DeviceViewModels[0].FragmentViewModels .First(model => model.NameForUiKey == "Configuration") as RuntimeConfigurationViewModel; _measuringMonitorViewModel = _shell.ProjectBrowserViewModel.DeviceViewModels[0].FragmentViewModels .First(model => model.NameForUiKey == "MeasuringMonitor") as MeasuringMonitorViewModel; _readCommand = _configurationFragmentViewModel.FragmentOptionsViewModel.FragmentOptionGroupViewModels .First(model => model.NameKey == "Device").FragmentOptionCommandViewModels .First(model => model.TitleKey == ApplicationGlobalNames.UiCommandStrings.READ_STRING_KEY) .OptionCommand as RelayCommand; }
//[CudnnMnistFCF] public void FullyConnectedForward(Layer ip, nchw_t nchw, DeviceMemory <float> srcData, ref DeviceMemory <float> dstData) { if (nchw.N != 1) { throw new Exception("Not Implemented"); } var dimX = nchw.C * nchw.H * nchw.W; var dimY = ip.Outputs; Resize(ref dstData, dimY); const float alpha = 1.0f; const float beta = 1.0f; // This cuMemcpyDtoD is a raw CUDA API call so it should be guarded with worker.Eval var output = dstData; _worker.EvalAction(() => CUDAInterop.cuMemcpyDtoD(output.Ptr.Handle, ip.BiasD.Handle, (IntPtr)(dimY * sizeof(float)))); // This cublas call doesn't need worker.Eval because cublas is a thin wrapper for the raw API // and it alreadyhas worke.eval _cublas.Sgemv(CUBLASInterop.cublasOperation_t.CUBLAS_OP_T, dimX, dimY, alpha, ip.DataD.Ptr, dimX, srcData.Ptr, 1, beta, dstData.Ptr, 1); nchw.H = 1; nchw.W = 1; nchw.C = dimY; }
public void BindMemoryAndCreateView() { using (Image image = CreateImage()) { PhysicalDeviceMemoryProperties deviceMemProps = PhysicalDevice.GetMemoryProperties(); MemoryRequirements memReq = image.GetMemoryRequirements(); using (DeviceMemory memory = Device.AllocateMemory(new MemoryAllocateInfo( memReq.Size, deviceMemProps.MemoryTypes.IndexOf(memReq.MemoryTypeBits, 0)))) { image.BindMemory(memory); var createInfo = new ImageViewCreateInfo { Format = Format.B8G8R8A8UNorm, ViewType = ImageViewType.Image2D, SubresourceRange = new ImageSubresourceRange { AspectMask = ImageAspects.Color, LayerCount = 1, LevelCount = 1 } }; using (image.CreateView(createInfo)) { } using (image.CreateView(createInfo, CustomAllocator)) { } } } }
internal Chunk( Device logicalDevice, HostDevice hostDevice, Location location, int supportedMemoryTypesFilter, long size = 128 *ByteUtils.MEGABYTE_TO_BYTE) { if (logicalDevice == null) { throw new ArgumentNullException(nameof(logicalDevice)); } if (hostDevice == null) { throw new ArgumentNullException(nameof(hostDevice)); } this.logicalDevice = logicalDevice; this.location = location; totalSize = size; //Add a block the size of the entire chunk to the free set freeBlocks.Add(new Block(container: this, offset: 0, size: size)); //Find the memory type on the gpu to place this pool in memoryTypeIndex = hostDevice.GetMemoryType( properties: location == Location.Device ? MemoryProperties.DeviceLocal : MemoryProperties.HostVisible, supportedTypesFilter: supportedMemoryTypesFilter); //Allocate the memory memory = logicalDevice.AllocateMemory(new MemoryAllocateInfo( allocationSize: size, memoryTypeIndex: memoryTypeIndex)); }
/// <summary> /// Checks whether or not the Cuda features are currently supported /// </summary> public static bool IsGpuAccelerationSupported() { try { // CUDA test Gpu gpu = Gpu.Default; if (gpu == null) { return(false); } if (!Dnn.IsAvailable) { return(false); // cuDNN } using (DeviceMemory <float> sample_gpu = gpu.AllocateDevice <float>(1024)) { deviceptr <float> ptr = sample_gpu.Ptr; void Kernel(int i) => ptr[i] = i; Alea.Parallel.GpuExtension.For(gpu, 0, 1024, Kernel); // JIT test float[] sample = Gpu.CopyToHost(sample_gpu); return(Enumerable.Range(0, 1024).Select <int, float>(i => i).ToArray().ContentEquals(sample)); } } catch { // Missing .dll or other errors return(false); } }
public void Read() { IntPtr srcPtr = DeviceMemory.Map(0, HostSize); GCHandle handle = GCHandle.Alloc(HostResource, GCHandleType.Pinned); IntPtr dstPtr = handle.AddrOfPinnedObject(); if (HostStride == DeviceStride) { System.Buffer.MemoryCopy( srcPtr.ToPointer(), dstPtr.ToPointer(), HostSize, HostSize); } else { var srcWalk = (byte *)srcPtr; var dstWalk = (byte *)dstPtr; for (int i = 0; i < Count; i++) { System.Buffer.MemoryCopy(srcWalk, dstWalk, HostStride, HostStride); srcWalk += DeviceStride; dstWalk += HostStride; } } handle.Free(); DeviceMemory.Unmap(); }
public void AddBias(CUDNNTensorDescriptor dstTensorDesc, Layer layer, int c, DeviceMemory<float> data) { _biasTensorDesc.Set4D(TensorFormat, DataType, 1, c, 1, 1); const float alpha = 1.0f; const float beta = 1.0f; _cudnn.AddTensor(CUDNNInterop.cudnnAddMode_t.CUDNN_ADD_SAME_C, alpha, _biasTensorDesc, layer.BiasD.Ptr, beta, dstTensorDesc, data.Ptr); }
private unsafe void ReleaseDevice() { EmptyTexelBuffer.Dispose(); EmptyTexelBuffer = null; // Wait for all queues to be idle nativeDevice.WaitIdle(); // Destroy all remaining fences GetCompletedValue(); // Mark upload buffer for destruction if (nativeUploadBuffer != SharpVulkan.Buffer.Null) { NativeDevice.UnmapMemory(nativeUploadBufferMemory); nativeResourceCollector.Add(lastCompletedFence, nativeUploadBuffer); nativeResourceCollector.Add(lastCompletedFence, nativeUploadBufferMemory); nativeUploadBuffer = SharpVulkan.Buffer.Null; nativeUploadBufferMemory = DeviceMemory.Null; } // Release fenced resources nativeResourceCollector.Dispose(); DescriptorPools.Dispose(); nativeDevice.DestroyCommandPool(NativeCopyCommandPool); nativeDevice.Destroy(); }
public void UpdateSetsDescriptorWrite() { const int bufferSize = 256; var layoutCreateInfo = new DescriptorSetLayoutCreateInfo( new DescriptorSetLayoutBinding(0, DescriptorType.StorageBuffer, 1)); var poolCreateInfo = new DescriptorPoolCreateInfo( 1, new[] { new DescriptorPoolSize(DescriptorType.StorageBuffer, 1) }, DescriptorPoolCreateFlags.FreeDescriptorSet); using (Buffer buffer = Device.CreateBuffer(new BufferCreateInfo(bufferSize, BufferUsages.StorageBuffer))) using (DeviceMemory memory = Device.AllocateMemory(new MemoryAllocateInfo(bufferSize, 0))) using (DescriptorSetLayout layout = Device.CreateDescriptorSetLayout(layoutCreateInfo)) using (DescriptorPool pool = Device.CreateDescriptorPool(poolCreateInfo)) using (DescriptorSet set = pool.AllocateSets(new DescriptorSetAllocateInfo(1, layout))[0]) { // Required to satisfy the validation layer. buffer.GetMemoryRequirements(); buffer.BindMemory(memory); var descriptorWrite = new WriteDescriptorSet(set, 0, 0, 1, DescriptorType.StorageBuffer, bufferInfo: new[] { new DescriptorBufferInfo(buffer) }); pool.UpdateSets(new[] { descriptorWrite }); } }
private AdaptiveLBP(Size size) { this.size = size; // initialize data structures to avoid reallocating with every call hist = new int[numSubuniformPatterns * numVarBins]; hist2 = new int[numSubuniformPatterns * numVarBins]; lbpImageGPU = worker.Malloc <short>(size.Width * size.Height); varImageGPU = worker.Malloc <short>(size.Width * size.Height); histGPU = worker.Malloc <int>(hist.Length); floatImageGPU = worker.Malloc <float>(size.Width * size.Height); // precompute the subuniform bin for each LBP pattern, and push it to the GPU subuniformBins = new short[(short)Math.Pow(2, numNeighbors)]; for (int i = 0; i < subuniformBins.Length; i++) { short bin = GetPatternNum(i); subuniformBins[i] = bin; } subuniformBinsGPU = worker.Malloc(subuniformBins); neighborCoordinateX = new float[numNeighbors]; neighborCoordinateY = new float[numNeighbors]; for (int i = 0; i < numNeighbors; i++) { float xx = (float)Math.Cos(2.0 * PI * (double)i / (double)numNeighbors); float yy = (float)Math.Sin(2.0 * PI * (double)i / (double)numNeighbors); neighborCoordinateX[i] = xx; neighborCoordinateY[i] = yy; } neighborCoordinateXGPU = worker.Malloc(neighborCoordinateX); neighborCoordinateYGPU = worker.Malloc(neighborCoordinateY); varBinsGPU = worker.Malloc(varBins); // initialize CUDA parameters var blockDims = new dim3(8, 8); var gridDims = new dim3(Common.divup(size.Width, blockDims.x), Common.divup(size.Height, blockDims.y)); lp = new LaunchParam(gridDims, blockDims); // create filters for (int i = 0; i < numScales; i++) { float[,] filter = LaplacianOfGaussian.Generate(i + 1); filters[i] = Utils.Flatten(filter); filtersGPU[i] = worker.Malloc(filters[i]); filterSizes[i] = (filter.GetLength(0) - 1) / 2; } // allocate space for scale space images deviceptr <float>[] tempPointers = new deviceptr <float> [numScales]; for (int i = 0; i < numScales; i++) { scaledImages[i] = worker.Malloc <float>(size.Width * size.Height); tempPointers[i] = scaledImages[i].Ptr; } scaledImagePointers = worker.Malloc(tempPointers); pixelScaleImage = worker.Malloc <short>(size.Width * size.Height); }
public VulkanImage(BufferManager manager, Image image, DeviceMemory memory, Format format) { this.manager = manager; this.image = image; this.memory = memory; this.format = format; }
public static Texture2D RenderTarget(Graphics g, int width, int height) { const Format format = Format.B8G8R8A8UNorm; // Create optimal tiled target image. Image image = g.Context.Device.CreateImage(new ImageCreateInfo { ImageType = ImageType.Image2D, Format = format, MipLevels = 1, ArrayLayers = 1, Samples = SampleCounts.Count1, Tiling = ImageTiling.Optimal, SharingMode = SharingMode.Exclusive, InitialLayout = ImageLayout.Undefined, Extent = new Extent3D(width, height, 1), Usage = ImageUsages.Sampled | ImageUsages.TransferDst | ImageUsages.TransferSrc | ImageUsages.ColorAttachment }); MemoryRequirements imageMemReq = image.GetMemoryRequirements(); int imageHeapIndex = g.Context.MemoryProperties.MemoryTypes.IndexOf( imageMemReq.MemoryTypeBits, MemoryProperties.DeviceLocal); DeviceMemory memory = g.Context.Device.AllocateMemory(new MemoryAllocateInfo(imageMemReq.Size, imageHeapIndex)); image.BindMemory(memory); var subresourceRange = new ImageSubresourceRange(ImageAspects.Color, 0, 1, 0, 1); // Create image view. ImageView view = image.CreateView(new ImageViewCreateInfo(format, subresourceRange)); var sampler = VKHelper.CreateSampler(g.Context, Filter.Linear, Filter.Linear, SamplerMipmapMode.Linear); return(new Texture2D(g.Context, image, memory, view, format, new Vector2I(width, height), true)); }
protected unsafe void AllocateMemory(MemoryPropertyFlags memoryProperties, MemoryRequirements memoryRequirements) { if (NativeMemory != DeviceMemory.Null) return; if (memoryRequirements.Size == 0) return; var allocateInfo = new MemoryAllocateInfo { StructureType = StructureType.MemoryAllocateInfo, AllocationSize = memoryRequirements.Size, }; PhysicalDeviceMemoryProperties physicalDeviceMemoryProperties; GraphicsDevice.NativePhysicalDevice.GetMemoryProperties(out physicalDeviceMemoryProperties); var typeBits = memoryRequirements.MemoryTypeBits; for (uint i = 0; i < physicalDeviceMemoryProperties.MemoryTypeCount; i++) { if ((typeBits & 1) == 1) { // Type is available, does it match user properties? var memoryType = *((MemoryType*)&physicalDeviceMemoryProperties.MemoryTypes + i); if ((memoryType.PropertyFlags & memoryProperties) == memoryProperties) { allocateInfo.MemoryTypeIndex = i; break; } } typeBits >>= 1; } NativeMemory = GraphicsDevice.NativeDevice.AllocateMemory(ref allocateInfo); }
public void CreateDepth() { ImageCreateInfo imageInfo = new ImageCreateInfo { ImageType = ImageType.Image2D, Format = Format.D16Unorm, Extent = new Extent3D { Width = BackBufferWidth, Height = BackBufferHeight, Depth = 1, }, MipLevels = 1, ArrayLayers = 1, Samples = (uint)SampleCountFlags.Count1, Tiling = ImageTiling.Optimal, Usage = (uint)ImageUsageFlags.DepthStencilAttachment, Flags = 0, }; Image image = Device.CreateImage(imageInfo, null); MemoryRequirements memReq = Device.GetImageMemoryRequirements(image); uint memTypeIndex; if (!TryGetMemoryTypeFromProperties(memReq.MemoryTypeBits, 0, out memTypeIndex)) { throw new Exception("Failed to create back buffer"); } MemoryAllocateInfo allocInfo = new MemoryAllocateInfo { AllocationSize = 0, MemoryTypeIndex = memTypeIndex, }; DeviceMemory imageMem = Device.AllocateMemory(allocInfo, null); Device.BindImageMemory(image, imageMem, 0); SetImageLayout(image, ImageAspectFlags.Depth, ImageLayout.Undefined, ImageLayout.DepthStencilAttachmentOptimal, 0); ImageViewCreateInfo imageViewInfo = new ImageViewCreateInfo { Image = image, Format = imageInfo.Format, SubresourceRange = new ImageSubresourceRange { AspectMask = (uint)ImageAspectFlags.Depth, BaseMipLevel = 0, LevelCount = 1, BaseArrayLayer = 0, LayerCount = 1, }, Flags = 0, ViewType = ImageViewType.View2D, }; ImageView imageView = Device.CreateImageView(imageViewInfo, null); }
public unsafe void FullyConnectedBackwardData() { FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian); Tensor dy = CreateRandomTensor(400, fc.OutputInfo.Size); fixed(float *pw = fc.Weights, pb = fc.Biases) { Tensor.Reshape(pw, fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor w); Tensor.Reshape(pb, 1, fc.OutputInfo.Size, out Tensor b); Tensor.New(dy.Entities, fc.InputInfo.Size, out Tensor dx1); CpuDnn.FullyConnectedBackwardData(w, dy, dx1); Gpu gpu = Gpu.Default; using (DeviceMemory <float> dy_gpu = gpu.AllocateDevice(dy), w_gpu = gpu.AllocateDevice(w), dx_gpu = gpu.AllocateDevice <float>(dx1.Size)) { Dnn.Get(gpu).FullyConnectedBackwardData(dy.Entities, fc.InputInfo.Size, fc.OutputInfo.Size, dy_gpu.Ptr, w_gpu.Ptr, dx_gpu.Ptr); dx_gpu.CopyToHost(dx1.Entities, dx1.Length, out Tensor dx2); Assert.IsTrue(dx1.ContentEquals(dx2)); Tensor.Free(dy, dx1, dx2); } } }
public static VKBuffer InstanceInfo <T>(Graphics g, int count) where T : struct { long size = count * Interop.SizeOf <T>(); // Create a staging buffer that is writable by host. var stagingBuffer = g.Context.Device.CreateBuffer(new BufferCreateInfo(size, BufferUsages.TransferSrc)); MemoryRequirements stagingReq = stagingBuffer.GetMemoryRequirements(); int stagingMemoryTypeIndex = g.Context.MemoryProperties.MemoryTypes.IndexOf( stagingReq.MemoryTypeBits, MemoryProperties.HostVisible | MemoryProperties.HostCoherent); DeviceMemory stagingMemory = g.Context.Device.AllocateMemory(new MemoryAllocateInfo(stagingReq.Size, stagingMemoryTypeIndex)); stagingBuffer.BindMemory(stagingMemory); // Create a device local buffer where the vertex data will be copied and which will be used for rendering. VulkanCore.Buffer buffer = g.Context.Device.CreateBuffer(new BufferCreateInfo(size, BufferUsages.VertexBuffer | BufferUsages.TransferDst)); MemoryRequirements req = buffer.GetMemoryRequirements(); int memoryTypeIndex = g.Context.MemoryProperties.MemoryTypes.IndexOf( req.MemoryTypeBits, MemoryProperties.DeviceLocal); DeviceMemory memory = g.Context.Device.AllocateMemory(new MemoryAllocateInfo(req.Size, memoryTypeIndex)); buffer.BindMemory(memory); return(new VKBuffer(g.Context, buffer, memory, g.Context.Device.CreateFence(), count, size, true, stagingBuffer, stagingMemory, g.Context.GraphicsCommandPool.AllocateBuffers(new CommandBufferAllocateInfo(CommandBufferLevel.Primary, 1))[0])); }
public unsafe void FullyConnectedForward() { FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian); Tensor x = CreateRandomTensor(400, fc.InputInfo.Size); fixed(float *pw = fc.Weights, pb = fc.Biases) { Tensor.Reshape(pw, fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor w); Tensor.Reshape(pb, 1, fc.OutputInfo.Size, out Tensor b); Tensor.New(x.Entities, fc.OutputInfo.Size, out Tensor y1); CpuDnn.FullyConnectedForward(x, w, b, y1); Gpu gpu = Gpu.Default; using (DeviceMemory <float> x_gpu = gpu.AllocateDevice(x), w_gpu = gpu.AllocateDevice(w), b_gpu = gpu.AllocateDevice(b), y_gpu = gpu.AllocateDevice <float>(y1.Size)) { Dnn.Get(gpu).FullyConnectedForward(x.Entities, x.Length, y1.Length, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr); y_gpu.CopyToHost(y1.Entities, y1.Length, out Tensor y2); Assert.IsTrue(y1.ContentEquals(y2)); Tensor.Free(x, y1, y2); } } }
private VulkanImage(Image image, DeviceMemory memory, ImageView view, Format format) { Image = image; Memory = memory; View = view; Format = format; }
//[/CudnnMnistFCF] //[CudnnMnistCF] public void ConvoluteForward(Layer conv, nchw_t nchw, DeviceMemory <float> srcData, ref DeviceMemory <float> dstData) { _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); _filterDesc.Set4D(DataType, conv.Outputs, conv.Inputs, conv.KernelDim, conv.KernelDim); _convDesc.Set2D(0, 0, 1, 1, 1, 1, CUDNNInterop.cudnnConvolutionMode_t.CUDNN_CROSS_CORRELATION); // find dimension of convoltion output // outputDim = 1 + (inputDim + 2*pad - filterDim) / convolutionStride int n, c, h, w; _convDesc.Get2DForwardOutputDim(_srcTensorDesc, _filterDesc, out n, out c, out h, out w); nchw.N = n; nchw.C = c; nchw.H = h; nchw.W = w; _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); var algo = _cudnn.GetConvolutionForwardAlgorithm(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, CUDNNInterop.cudnnConvolutionFwdPreference_t.CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, (IntPtr)0); Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W); var sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, algo); using (var workSpace = _worker.Malloc <byte>(sizeInBytes.ToInt32())) { const float alpha = 1.0f; const float beta = 0.0f; _cudnn.ConvolutionForward(alpha, _srcTensorDesc, srcData.Ptr, _filterDesc, conv.DataD.Ptr, _convDesc, algo, workSpace.Ptr, sizeInBytes, beta, _dstTensorDesc, dstData.Ptr); AddBias(_dstTensorDesc, conv, c, dstData); } }
void CopyArrayToBuffer(DeviceMemory bufferMem, DeviceSize size, byte[] data) { var map = device.MapMemory(bufferMem, 0, size); Marshal.Copy(data, 0, map, (int)((ulong)size)); device.UnmapMemory(bufferMem); }
public void CmdDraw() { var renderPassCreateInfo = new RenderPassCreateInfo(new[] { new SubpassDescription( new[] { new AttachmentReference(0, ImageLayout.ColorAttachmentOptimal) }) }, new[] { new AttachmentDescription { Format = Format.B8G8R8A8UNorm, Samples = SampleCounts.Count1 } }); var imageCreateInfo = new ImageCreateInfo { Usage = ImageUsages.ColorAttachment, Format = Format.B8G8R8A8UNorm, Extent = new Extent3D(2, 2, 1), ImageType = ImageType.Image2D, MipLevels = 1, ArrayLayers = 1, Samples = SampleCounts.Count1 }; var imageViewCreateInfo = new ImageViewCreateInfo( Format.B8G8R8A8UNorm, new ImageSubresourceRange(ImageAspects.Color, 0, 1, 0, 1)); using (ShaderModule vertexShader = Device.CreateShaderModule(new ShaderModuleCreateInfo(ReadAllBytes("Shader.vert.spv")))) using (ShaderModule fragmentShader = Device.CreateShaderModule(new ShaderModuleCreateInfo(ReadAllBytes("Shader.frag.spv")))) using (PipelineLayout pipelineLayout = Device.CreatePipelineLayout()) using (RenderPass renderPass = Device.CreateRenderPass(renderPassCreateInfo)) using (Image image = Device.CreateImage(imageCreateInfo)) { MemoryRequirements imageMemReq = image.GetMemoryRequirements(); int memTypeIndex = PhysicalDeviceMemoryProperties.MemoryTypes.IndexOf(imageMemReq.MemoryTypeBits, MemoryProperties.DeviceLocal); using (DeviceMemory imageMemory = Device.AllocateMemory(new MemoryAllocateInfo(imageMemReq.Size, memTypeIndex))) { image.BindMemory(imageMemory); using (ImageView imageView = image.CreateView(imageViewCreateInfo)) using (Framebuffer framebuffer = renderPass.CreateFramebuffer(new FramebufferCreateInfo(new[] { imageView }, 2, 2))) using (Pipeline pipeline = Device.CreateGraphicsPipeline(new GraphicsPipelineCreateInfo( pipelineLayout, renderPass, 0, new[] { new PipelineShaderStageCreateInfo(ShaderStages.Vertex, vertexShader, "main"), new PipelineShaderStageCreateInfo(ShaderStages.Fragment, fragmentShader, "main") }, new PipelineInputAssemblyStateCreateInfo(), new PipelineVertexInputStateCreateInfo(), new PipelineRasterizationStateCreateInfo { RasterizerDiscardEnable = true, LineWidth = 1.0f }))) { CommandBuffer.Begin(); CommandBuffer.CmdBeginRenderPass(new RenderPassBeginInfo(framebuffer, new Rect2D(0, 0, 2, 2))); CommandBuffer.CmdBindPipeline(PipelineBindPoint.Graphics, pipeline); CommandBuffer.CmdDraw(3); CommandBuffer.CmdEndRenderPass(); CommandBuffer.End(); } } } }
public SharedFontManager(Switch Device, long PhysicalAddress) { this.PhysicalAddress = PhysicalAddress; Memory = Device.Memory; FontsPath = Path.Combine(Device.VFs.GetSystemPath(), "fonts"); }
/// <summary> /// Get a Windows HANDLE for a memory object. /// </summary> /// <param name="memory">The memory object from which the handle will be exported.</param> /// <param name="handleType">The type of handle requested.</param> /// <returns>The Windows handle representing the underlying resources of the device memory object.</returns> /// <exception cref="VulkanException">Vulkan returns an error code.</exception> public static IntPtr GetWin32HandleKhx(this DeviceMemory memory, ExternalMemoryHandleTypesKhx handleType) { IntPtr handle; Result result = vkGetMemoryWin32HandleKHX(memory.Parent, memory, handleType, &handle); VulkanException.ThrowForInvalidResult(result); return(handle); }
/// <summary> /// Get a POSIX file descriptor for a memory object. /// </summary> /// <param name="memory">The memory object from which the handle will be exported.</param> /// <param name="handleType">The type of handle requested.</param> /// <returns>A file descriptor representing the underlying resources of the device memory object.</returns> /// <exception cref="VulkanException">Vulkan returns an error code.</exception> public static int GetFdKhx(this DeviceMemory memory, ExternalMemoryHandleTypesKhx handleType) { int fd; Result result = vkGetMemoryFdKHX(memory.Parent, memory, handleType, &fd); VulkanException.ThrowForInvalidResult(result); return(fd); }
public void AddBias(CUDNNTensorDescriptor dstTensorDesc, Layer layer, int c, DeviceMemory <float> data) { _biasTensorDesc.Set4D(TensorFormat, DataType, 1, c, 1, 1); const float alpha = 1.0f; const float beta = 1.0f; _cudnn.AddTensor(CUDNNInterop.cudnnAddMode_t.CUDNN_ADD_SAME_C, alpha, _biasTensorDesc, layer.BiasD.Ptr, beta, dstTensorDesc, data.Ptr); }
public static void CopyToBufferMemory(this Device @this, byte[] source, DeviceMemory destinationBufferMemory, DeviceSize offset, DeviceSize size, uint mapFlags) { var mappedMemoryPointer = @this.MapMemory(destinationBufferMemory, offset, size, mapFlags); Marshal.Copy(source, 0, mappedMemoryPointer, (int)(uint)size); @this.UnmapMemory(destinationBufferMemory); }
private void Release(DeviceMemory <byte> memory) { lock (_repo) { _repo.Add(memory); _repo.Sort(MComp); } }
private void TearDown() { device.WaitIdle(); this.renderFinishedSemaphore.Dispose(); this.renderFinishedSemaphore = null; this.imageAvailableSemaphore.Dispose(); this.imageAvailableSemaphore = null; this.device.FreeMemory(this.vertexBufferMemory); this.vertexBufferMemory = null; this.vertexBuffer.Dispose(); this.vertexBuffer = null; this.commandPool.Dispose(); this.commandPool = null; foreach (var frameBuffer in this.frameBuffers) { frameBuffer.Dispose(); } this.frameBuffers = null; this.fragShader.Dispose(); this.fragShader = null; this.vertShader.Dispose(); this.vertShader = null; this.pipeline.Dispose(); this.pipeline = null; this.pipelineLayout.Dispose(); this.pipelineLayout = null; foreach (var imageView in this.swapChainImageViews) { imageView.Dispose(); } this.swapChainImageViews = null; this.renderPass.Dispose(); this.renderPass = null; this.swapChain.Dispose(); this.swapChain = null; this.device.Dispose(); this.device = null; this.surface.Dispose(); this.surface = null; this.instance.Dispose(); this.instance = null; }
//[CudnnMnistFCF] public void FullyConnectedForward(Layer ip, nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData) { if (nchw.N != 1) throw new Exception("Not Implemented"); var dimX = nchw.C * nchw.H * nchw.W; var dimY = ip.Outputs; Resize(ref dstData, dimY); const float alpha = 1.0f; const float beta = 1.0f; // This cuMemcpyDtoD is a raw CUDA API call so it should be guarded with worker.Eval var output = dstData; _worker.EvalAction(() => CUDAInterop.cuMemcpyDtoD(output.Ptr.Handle, ip.BiasD.Handle, (IntPtr)(dimY * sizeof(float)))); // This cublas call doesn't need worker.Eval because cublas is a thin wrapper for the raw API // and it alreadyhas worke.eval _cublas.Sgemv(CUBLASInterop.cublasOperation_t.CUBLAS_OP_T, dimX, dimY, alpha, ip.DataD.Ptr, dimX, srcData.Ptr, 1, beta, dstData.Ptr, 1); nchw.H = 1; nchw.W = 1; nchw.C = dimY; }
public unsafe void BindImageMemory(Image image, DeviceMemory memory, ulong memoryOffset) { vkBindImageMemory(this, image, memory, memoryOffset).CheckError(); }
public unsafe void FreeMemory(DeviceMemory memory, AllocationCallbacks* allocator = null) { vkFreeMemory(this, memory, allocator); }
public unsafe ulong GetMemoryCommitment(DeviceMemory memory) { ulong committedMemoryInBytes; vkGetDeviceMemoryCommitment(this, memory, &committedMemoryInBytes); return committedMemoryInBytes; }
public unsafe IntPtr MapMemory(DeviceMemory memory, ulong offset, ulong size, MemoryMapFlags flags) { IntPtr data; vkMapMemory(this, memory, offset, size, flags, &data).CheckError(); return data; }
internal static unsafe extern Result vkBindImageMemory(Device device, Image image, DeviceMemory memory, ulong memoryOffset);
private void CreateVertexBuffer() { var vertices = new[,] { { 0.0f, -0.5f, 0.5f, 1.0f, 0.0f, 0.0f }, { 0.5f, 0.5f, 0.5f, 0.0f, 1.0f, 0.0f }, { -0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 1.0f }, }; var createInfo = new BufferCreateInfo { StructureType = StructureType.BufferCreateInfo, Usage = BufferUsageFlags.VertexBuffer, Size = (ulong)(sizeof(float) * vertices.Length) }; vertexBuffer = device.CreateBuffer(ref createInfo); MemoryRequirements memoryRequirements; device.GetBufferMemoryRequirements(vertexBuffer, out memoryRequirements); if (memoryRequirements.Size == 0) return; var allocateInfo = new MemoryAllocateInfo { StructureType = StructureType.MemoryAllocateInfo, AllocationSize = memoryRequirements.Size, MemoryTypeIndex = MemoryTypeFromProperties(memoryRequirements.MemoryTypeBits, MemoryPropertyFlags.HostVisible) }; vertexBufferMemory = device.AllocateMemory(ref allocateInfo); var mapped = device.MapMemory(vertexBufferMemory, 0, (ulong)createInfo.Size, MemoryMapFlags.None); fixed (float* source = &vertices[0, 0]) Utilities.CopyMemory(mapped, new IntPtr(source), (int)createInfo.Size); device.UnmapMemory(vertexBufferMemory); device.BindBufferMemory(vertexBuffer, vertexBufferMemory, 0); vertexAttributes = new [] { new VertexInputAttributeDescription { Binding = 0, Location = 0, Format = Format.R32G32B32SFloat, Offset = 0 }, new VertexInputAttributeDescription { Binding = 0, Location = 1, Format = Format.R32G32B32SFloat, Offset = sizeof(float) * 3 }, }; vertexBindings = new [] { new VertexInputBindingDescription { Binding = 0, InputRate = VertexInputRate.Vertex, Stride = (uint)(sizeof(float) * vertices.GetLength(1)) } }; }
internal static unsafe extern void vkUnmapMemory(Device device, DeviceMemory memory);
public void Resize(ref DeviceMemory<float> buffer, int length) { if (buffer.Length >= length) return; buffer.Dispose(); buffer = _worker.Malloc<float>(length); }
//[/CudnnMnistSF] //[CudnnMnistAF] public void ActivationForward(nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData) { Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W); _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); const float alpha = 1.0f; const float beta = 0.0f; _cudnn.ActivationForward(CUDNNInterop.cudnnActivationMode_t.CUDNN_ACTIVATION_RELU, alpha, _srcTensorDesc, srcData.Ptr, beta, _dstTensorDesc, dstData.Ptr); }
//[/CudnnMnistPF] //[CudnnMnistSF] public void SoftmaxForward(nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData) { Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W); _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); const float alpha = 1.0f; const float beta = 0.0f; _cudnn.SoftmaxForward(CUDNNInterop.cudnnSoftmaxAlgorithm_t.CUDNN_SOFTMAX_ACCURATE, CUDNNInterop.cudnnSoftmaxMode_t.CUDNN_SOFTMAX_MODE_CHANNEL, alpha, _srcTensorDesc, srcData.Ptr, beta, _dstTensorDesc, dstData.Ptr); }
//[/CudnnMnistCF] //[CudnnMnistPF] public void PoolForward(nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData) { _poolingDesc.Set2D(CUDNNInterop.cudnnPoolingMode_t.CUDNN_POOLING_MAX, 2, 2, 0, 0, 2, 2); _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); nchw.H /= 2; nchw.W /= 2; _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W); const float alpha = 1.0f; const float beta = 0.0f; _cudnn.PoolingForward(_poolingDesc, alpha, _srcTensorDesc, srcData.Ptr, beta, _dstTensorDesc, dstData.Ptr); }
//[/CudnnMnistFCF] //[CudnnMnistCF] public void ConvoluteForward(Layer conv, nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData) { _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); _filterDesc.Set4D(DataType, conv.Outputs, conv.Inputs, conv.KernelDim, conv.KernelDim); _convDesc.Set2D(0, 0, 1, 1, 1, 1, CUDNNInterop.cudnnConvolutionMode_t.CUDNN_CROSS_CORRELATION); // find dimension of convoltion output // outputDim = 1 + (inputDim + 2*pad - filterDim) / convolutionStride int n, c, h, w; _convDesc.Get2DForwardOutputDim(_srcTensorDesc, _filterDesc, out n, out c, out h, out w); nchw.N = n; nchw.C = c; nchw.H = h; nchw.W = w; _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W); var algo = _cudnn.GetConvolutionForwardAlgorithm(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, CUDNNInterop.cudnnConvolutionFwdPreference_t.CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, (IntPtr)0); Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W); var sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, algo); using (var workSpace = _worker.Malloc<byte>(sizeInBytes.ToInt32())) { const float alpha = 1.0f; const float beta = 0.0f; _cudnn.ConvolutionForward(alpha, _srcTensorDesc, srcData.Ptr, _filterDesc, conv.DataD.Ptr, _convDesc, algo, workSpace.Ptr, sizeInBytes, beta, _dstTensorDesc, dstData.Ptr); AddBias(_dstTensorDesc, conv, c, dstData); } }
internal static unsafe extern Result vkBindBufferMemory(Device device, Buffer buffer, DeviceMemory memory, ulong memoryOffset);
public unsafe void BindBufferMemory(Buffer buffer, DeviceMemory memory, ulong memoryOffset) { vkBindBufferMemory(this, buffer, memory, memoryOffset).CheckError(); }
//[/LockPositions] public SimWindow() : base(800, 600, GraphicsMode.Default, "Gravitational n-body simulation") { _numBodies = 256*64; const float clusterScale = 1.0f; const float velocityScale = 1.0f; _deltaTime = 0.001f; _softeningSquared = 0.00125f; _damping = 0.9995f; //[CreateWorker] _worker = Worker.CreateByFunc(Generate); //[/CreateWorker] _stopwatch = Stopwatch.StartNew(); _fpsCalcLag = 128; _frameCounter = 0; //[CreateSimulatros] _simulators = new Queue<ISimulator>(); var target = GPUModuleTarget.Worker(_worker); var simulatorGpuDynamicBlockSizeModule = new GpuDynamicSimulatorModule(target); // need dispose var simulatorGpuDynamicBlockSize64 = simulatorGpuDynamicBlockSizeModule.Create(64); var simulatorGpuDynamicBlockSize128 = simulatorGpuDynamicBlockSizeModule.Create(128); var simulatorGpuDynamicBlockSize256 = simulatorGpuDynamicBlockSizeModule.Create(256); var simulatorGpuDynamicBlockSize512 = simulatorGpuDynamicBlockSizeModule.Create(512); var simulatorGpuStaticBlockSizeModule64 = new GpuStaticSimulatorModule64(target); // need dispose var simulatorGpuStaticBlockSizeModule128 = new GpuStaticSimulatorModule128(target); // need dispose var simulatorGpuStaticBlockSizeModule256 = new GpuStaticSimulatorModule256(target); // need dispose var simulatorGpuStaticBlockSizeModule512 = new GpuStaticSimulatorModule512(target); // need dispose // First, enquene one simulator which is 256 blocksize so we can compare with C code for performance. _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256); // Enqueue several dynamic block size simulators. _simulators.Enqueue(simulatorGpuDynamicBlockSize64); _simulators.Enqueue(simulatorGpuDynamicBlockSize128); _simulators.Enqueue(simulatorGpuDynamicBlockSize256); _simulators.Enqueue(simulatorGpuDynamicBlockSize512); // Enqueue several static block size simulators. _simulators.Enqueue(simulatorGpuStaticBlockSizeModule64); _simulators.Enqueue(simulatorGpuStaticBlockSizeModule128); _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256); _simulators.Enqueue(simulatorGpuStaticBlockSizeModule512); // We do not enqueue any cpu simulator as it is much too slow. //_simulators.Enqueue(new CpuSimulator(_worker, _numBodies)); _disposeSimulators = () => { simulatorGpuDynamicBlockSizeModule.Dispose(); simulatorGpuStaticBlockSizeModule64.Dispose(); simulatorGpuStaticBlockSizeModule128.Dispose(); simulatorGpuStaticBlockSizeModule256.Dispose(); simulatorGpuStaticBlockSizeModule512.Dispose(); }; _simulator = _simulators.Dequeue(); //[/CreateSimulatros] //[CreateBuffers] _buffers = new uint[2]; for (var i = 0; i < _buffers.Length; i++) { _buffers[i] = 0; } GL.GenBuffers(_buffers.Length, _buffers); foreach (var buffer in _buffers) { GL.BindBuffer(BufferTarget.ArrayBuffer, buffer); GL.BufferData(BufferTarget.ArrayBuffer, (IntPtr) (Microsoft.FSharp.Core.Operators.SizeOf<float4>()*_numBodies), IntPtr.Zero, BufferUsageHint.DynamicDraw); var size = 0; unsafe { GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, &size); } if (size != Microsoft.FSharp.Core.Operators.SizeOf<float4>()*_numBodies) { throw new Exception("Pixel Buffer Object allocation failed!"); } GL.BindBuffer(BufferTarget.ArrayBuffer, 0); CUDAInterop.cuSafeCall(CUDAInterop.cuGLRegisterBufferObject(buffer)); } _resources = new IntPtr[_buffers.Length]; for (var i = 0; i < _buffers.Length; i++) { var res = IntPtr.Zero; unsafe { CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsGLRegisterBuffer(&res, _buffers[i], 0u)); } _resources[i] = res; } //[/CreateBuffers] //[FinalizeGL] _vel = _worker.Malloc<float4>(_numBodies); float4[] hpos, hvel; BodyInitializer.Initialize(new BodyInitializer3(), clusterScale, velocityScale, _numBodies, out hpos, out hvel); _worker.Scatter(hvel, _vel.Ptr, Microsoft.FSharp.Core.FSharpOption<int>.None, Microsoft.FSharp.Core.FSharpOption<int>.None); LockPos( (pos0, pos1) => _worker.Scatter(hpos, pos1, Microsoft.FSharp.Core.FSharpOption<int>.None, Microsoft.FSharp.Core.FSharpOption<int>.None)); Help(); Description(); //[/FinalizeGL] }
internal static unsafe extern Result vkAllocateMemory(Device device, MemoryAllocateInfo* allocateInfo, AllocationCallbacks* allocator, DeviceMemory* memory);
internal static unsafe extern Result vkMapMemory(Device device, DeviceMemory memory, ulong offset, ulong size, MemoryMapFlags flags, IntPtr* data);
public unsafe void UnmapMemory(DeviceMemory memory) { vkUnmapMemory(this, memory); }
internal static unsafe extern void vkFreeMemory(Device device, DeviceMemory memory, AllocationCallbacks* allocator);
internal static unsafe extern void vkGetDeviceMemoryCommitment(Device device, DeviceMemory memory, ulong* committedMemoryInBytes);