Beispiel #1
0
        internal static Texture2D DepthStencil(Context ctx, int width, int height)
        {
            Image image = ctx.Device.CreateImage(new ImageCreateInfo
            {
                ImageType   = ImageType.Image2D,
                Format      = ctx.DepthStencilFormat,
                Extent      = new Extent3D(width, height, 1),
                MipLevels   = 1,
                ArrayLayers = 1,
                Samples     = SampleCounts.Count1,
                Tiling      = ImageTiling.Optimal,
                Usage       = ImageUsages.DepthStencilAttachment | ImageUsages.TransferSrc
            });
            MemoryRequirements memReq = image.GetMemoryRequirements();
            int heapIndex             = ctx.MemoryProperties.MemoryTypes.IndexOf(
                memReq.MemoryTypeBits, MemoryProperties.DeviceLocal);
            DeviceMemory memory = ctx.Device.AllocateMemory(new MemoryAllocateInfo(memReq.Size, heapIndex));

            image.BindMemory(memory);
            ImageView view = image.CreateView(new ImageViewCreateInfo(ctx.DepthStencilFormat,
                                                                      new ImageSubresourceRange(ImageAspects.Depth | ImageAspects.Stencil, 0, 1, 0, 1)));

            //var sampler = VKHelper.CreateSampler(ctx, Filter.Linear, Filter.Linear, SamplerMipmapMode.Nearest);

            return(new Texture2D(ctx, image, memory, view, ctx.DepthStencilFormat, new Vector2I(width, height), false));
        }
Beispiel #2
0
        public VKBuffer(
            string name, Graphics graphics, long count, BufferUsages usages, MemoryProperties memoryProperties,
            BufferCreateFlags flags  = BufferCreateFlags.None, SharingMode sharingMode = SharingMode.Exclusive,
            int[] queueFamilyIndices = null
            )
        {
            Name     = name;
            Graphics = graphics;
            Count    = count;
            Size     = count * Interop.SizeOf <T>();
            Usages   = usages;
            Buffer   = Graphics.Device.CreateBuffer(new BufferCreateInfo(
                                                        size: Size,
                                                        usages: usages,
                                                        flags: flags,
                                                        sharingMode: sharingMode,
                                                        queueFamilyIndices: queueFamilyIndices
                                                        ));
            var reqs = Buffer.GetMemoryRequirements();

            DeviceMemory = Graphics.Device.AllocateMemory(new MemoryAllocateInfo(
                                                              reqs.Size,
                                                              graphics.GetMemoryTypeIndex(reqs.MemoryTypeBits, memoryProperties)
                                                              ));
            Buffer.BindMemory(DeviceMemory);
        }
 /// <inheritdoc cref="IDisposable.Dispose"/>
 public void Dispose()
 {
     _mapped?.Dispose();
     _mapped = null;
     _memory.Dispose();
     _memory = null;
 }
Beispiel #4
0
        void CopyBitmapToBuffer(IntPtr scan0, int bitmapSize, DeviceMemory bufferMem, MemoryRequirements memRequirements)
        {
            var map = device.MapMemory(bufferMem, 0, memRequirements.Size);

            Copy(scan0, map, bitmapSize);
            device.UnmapMemory(bufferMem);
        }
Beispiel #5
0
        public ConnectionTests()
        {
            _typesContainer =
                new TypesContainer(Program.GetApp().Container.Resolve(typeof(IUnityContainer)) as IUnityContainer);
            _device        = Program.GetDevice();
            _configuration = _device.DeviceFragments.First(fragment => fragment.StrongName == "Configuration") as
                             IDeviceConfiguration;

            _shell = _typesContainer.Resolve <ShellViewModel>();
            _deviceViewModelFactory = _typesContainer.Resolve <IDeviceViewModelFactory>();
            var deviceMemory = new DeviceMemory();

            _typesContainer.Resolve <IDevicesContainerService>()
            .AddConnectableItem(_device);
            _device.DeviceMemory            = deviceMemory;
            _deviceViewModel                = _shell.ProjectBrowserViewModel.DeviceViewModels[0];
            _configurationFragmentViewModel = null;
            _configurationFragmentViewModel = _shell.ProjectBrowserViewModel.DeviceViewModels[0].FragmentViewModels
                                              .First(model => model.NameForUiKey == "Configuration") as
                                              RuntimeConfigurationViewModel;
            _measuringMonitorViewModel = _shell.ProjectBrowserViewModel.DeviceViewModels[0].FragmentViewModels
                                         .First(model => model.NameForUiKey == "MeasuringMonitor") as
                                         MeasuringMonitorViewModel;

            _readCommand = _configurationFragmentViewModel.FragmentOptionsViewModel.FragmentOptionGroupViewModels
                           .First(model => model.NameKey == "Device").FragmentOptionCommandViewModels
                           .First(model => model.TitleKey == ApplicationGlobalNames.UiCommandStrings.READ_STRING_KEY)
                           .OptionCommand as RelayCommand;
        }
Beispiel #6
0
        //[CudnnMnistFCF]
        public void FullyConnectedForward(Layer ip, nchw_t nchw, DeviceMemory <float> srcData, ref DeviceMemory <float> dstData)
        {
            if (nchw.N != 1)
            {
                throw new Exception("Not Implemented");
            }
            var dimX = nchw.C * nchw.H * nchw.W;
            var dimY = ip.Outputs;

            Resize(ref dstData, dimY);

            const float alpha = 1.0f;
            const float beta  = 1.0f;

            // This cuMemcpyDtoD is a raw CUDA API call so it should be guarded with worker.Eval
            var output = dstData;

            _worker.EvalAction(() => CUDAInterop.cuMemcpyDtoD(output.Ptr.Handle, ip.BiasD.Handle, (IntPtr)(dimY * sizeof(float))));

            // This cublas call doesn't need worker.Eval because cublas is a thin wrapper for the raw API
            // and it alreadyhas worke.eval
            _cublas.Sgemv(CUBLASInterop.cublasOperation_t.CUBLAS_OP_T, dimX, dimY, alpha, ip.DataD.Ptr, dimX,
                          srcData.Ptr, 1, beta, dstData.Ptr, 1);

            nchw.H = 1;
            nchw.W = 1;
            nchw.C = dimY;
        }
Beispiel #7
0
        public void BindMemoryAndCreateView()
        {
            using (Image image = CreateImage())
            {
                PhysicalDeviceMemoryProperties deviceMemProps = PhysicalDevice.GetMemoryProperties();
                MemoryRequirements memReq = image.GetMemoryRequirements();

                using (DeviceMemory memory = Device.AllocateMemory(new MemoryAllocateInfo(
                    memReq.Size, 
                    deviceMemProps.MemoryTypes.IndexOf(memReq.MemoryTypeBits, 0))))
                {
                    image.BindMemory(memory);

                    var createInfo = new ImageViewCreateInfo
                    {
                        Format = Format.B8G8R8A8UNorm,
                        ViewType = ImageViewType.Image2D,
                        SubresourceRange = new ImageSubresourceRange
                        {
                            AspectMask = ImageAspects.Color,
                            LayerCount = 1,
                            LevelCount = 1
                        }
                    };
                    using (image.CreateView(createInfo)) { }
                    using (image.CreateView(createInfo, CustomAllocator)) { }
                }
            }
        }
Beispiel #8
0
        internal Chunk(
            Device logicalDevice,
            HostDevice hostDevice,
            Location location,
            int supportedMemoryTypesFilter,
            long size = 128 *ByteUtils.MEGABYTE_TO_BYTE)
        {
            if (logicalDevice == null)
            {
                throw new ArgumentNullException(nameof(logicalDevice));
            }
            if (hostDevice == null)
            {
                throw new ArgumentNullException(nameof(hostDevice));
            }
            this.logicalDevice = logicalDevice;
            this.location      = location;
            totalSize          = size;

            //Add a block the size of the entire chunk to the free set
            freeBlocks.Add(new Block(container: this, offset: 0, size: size));

            //Find the memory type on the gpu to place this pool in
            memoryTypeIndex = hostDevice.GetMemoryType(
                properties: location == Location.Device ?
                MemoryProperties.DeviceLocal :
                MemoryProperties.HostVisible,
                supportedTypesFilter: supportedMemoryTypesFilter);
            //Allocate the memory
            memory = logicalDevice.AllocateMemory(new MemoryAllocateInfo(
                                                      allocationSize: size,
                                                      memoryTypeIndex: memoryTypeIndex));
        }
Beispiel #9
0
            /// <summary>
            /// Checks whether or not the Cuda features are currently supported
            /// </summary>
            public static bool IsGpuAccelerationSupported()
            {
                try
                {
                    // CUDA test
                    Gpu gpu = Gpu.Default;
                    if (gpu == null)
                    {
                        return(false);
                    }
                    if (!Dnn.IsAvailable)
                    {
                        return(false);                  // cuDNN
                    }
                    using (DeviceMemory <float> sample_gpu = gpu.AllocateDevice <float>(1024))
                    {
                        deviceptr <float> ptr = sample_gpu.Ptr;
                        void Kernel(int i) => ptr[i] = i;

                        Alea.Parallel.GpuExtension.For(gpu, 0, 1024, Kernel); // JIT test
                        float[] sample = Gpu.CopyToHost(sample_gpu);
                        return(Enumerable.Range(0, 1024).Select <int, float>(i => i).ToArray().ContentEquals(sample));
                    }
                }
                catch
                {
                    // Missing .dll or other errors
                    return(false);
                }
            }
        public void Read()
        {
            IntPtr   srcPtr = DeviceMemory.Map(0, HostSize);
            GCHandle handle = GCHandle.Alloc(HostResource, GCHandleType.Pinned);
            IntPtr   dstPtr = handle.AddrOfPinnedObject();

            if (HostStride == DeviceStride)
            {
                System.Buffer.MemoryCopy(
                    srcPtr.ToPointer(),
                    dstPtr.ToPointer(),
                    HostSize,
                    HostSize);
            }
            else
            {
                var srcWalk = (byte *)srcPtr;
                var dstWalk = (byte *)dstPtr;
                for (int i = 0; i < Count; i++)
                {
                    System.Buffer.MemoryCopy(srcWalk, dstWalk, HostStride, HostStride);
                    srcWalk += DeviceStride;
                    dstWalk += HostStride;
                }
            }

            handle.Free();
            DeviceMemory.Unmap();
        }
Beispiel #11
0
 public void AddBias(CUDNNTensorDescriptor dstTensorDesc, Layer layer, int c, DeviceMemory<float> data)
 {
     _biasTensorDesc.Set4D(TensorFormat, DataType, 1, c, 1, 1);
     const float alpha = 1.0f;
     const float beta = 1.0f;
     _cudnn.AddTensor(CUDNNInterop.cudnnAddMode_t.CUDNN_ADD_SAME_C, alpha, _biasTensorDesc, layer.BiasD.Ptr, beta, dstTensorDesc, data.Ptr);
 }
Beispiel #12
0
        private unsafe void ReleaseDevice()
        {
            EmptyTexelBuffer.Dispose();
            EmptyTexelBuffer = null;

            // Wait for all queues to be idle
            nativeDevice.WaitIdle();

            // Destroy all remaining fences
            GetCompletedValue();

            // Mark upload buffer for destruction
            if (nativeUploadBuffer != SharpVulkan.Buffer.Null)
            {
                NativeDevice.UnmapMemory(nativeUploadBufferMemory);
                nativeResourceCollector.Add(lastCompletedFence, nativeUploadBuffer);
                nativeResourceCollector.Add(lastCompletedFence, nativeUploadBufferMemory);

                nativeUploadBuffer       = SharpVulkan.Buffer.Null;
                nativeUploadBufferMemory = DeviceMemory.Null;
            }

            // Release fenced resources
            nativeResourceCollector.Dispose();
            DescriptorPools.Dispose();

            nativeDevice.DestroyCommandPool(NativeCopyCommandPool);
            nativeDevice.Destroy();
        }
        public void UpdateSetsDescriptorWrite()
        {
            const int bufferSize = 256;

            var layoutCreateInfo = new DescriptorSetLayoutCreateInfo(
                new DescriptorSetLayoutBinding(0, DescriptorType.StorageBuffer, 1));
            var poolCreateInfo = new DescriptorPoolCreateInfo(
                1,
                new[] { new DescriptorPoolSize(DescriptorType.StorageBuffer, 1) },
                DescriptorPoolCreateFlags.FreeDescriptorSet);

            using (Buffer buffer = Device.CreateBuffer(new BufferCreateInfo(bufferSize, BufferUsages.StorageBuffer)))
                using (DeviceMemory memory = Device.AllocateMemory(new MemoryAllocateInfo(bufferSize, 0)))
                    using (DescriptorSetLayout layout = Device.CreateDescriptorSetLayout(layoutCreateInfo))
                        using (DescriptorPool pool = Device.CreateDescriptorPool(poolCreateInfo))
                            using (DescriptorSet set = pool.AllocateSets(new DescriptorSetAllocateInfo(1, layout))[0])
                            {
                                // Required to satisfy the validation layer.
                                buffer.GetMemoryRequirements();

                                buffer.BindMemory(memory);

                                var descriptorWrite = new WriteDescriptorSet(set, 0, 0, 1, DescriptorType.StorageBuffer,
                                                                             bufferInfo: new[] { new DescriptorBufferInfo(buffer) });
                                pool.UpdateSets(new[] { descriptorWrite });
                            }
        }
Beispiel #14
0
        private AdaptiveLBP(Size size)
        {
            this.size = size;

            // initialize data structures to avoid reallocating with every call
            hist          = new int[numSubuniformPatterns * numVarBins];
            hist2         = new int[numSubuniformPatterns * numVarBins];
            lbpImageGPU   = worker.Malloc <short>(size.Width * size.Height);
            varImageGPU   = worker.Malloc <short>(size.Width * size.Height);
            histGPU       = worker.Malloc <int>(hist.Length);
            floatImageGPU = worker.Malloc <float>(size.Width * size.Height);

            // precompute the subuniform bin for each LBP pattern, and push it to the GPU
            subuniformBins = new short[(short)Math.Pow(2, numNeighbors)];
            for (int i = 0; i < subuniformBins.Length; i++)
            {
                short bin = GetPatternNum(i);
                subuniformBins[i] = bin;
            }
            subuniformBinsGPU = worker.Malloc(subuniformBins);

            neighborCoordinateX = new float[numNeighbors];
            neighborCoordinateY = new float[numNeighbors];
            for (int i = 0; i < numNeighbors; i++)
            {
                float xx = (float)Math.Cos(2.0 * PI * (double)i / (double)numNeighbors);
                float yy = (float)Math.Sin(2.0 * PI * (double)i / (double)numNeighbors);
                neighborCoordinateX[i] = xx;
                neighborCoordinateY[i] = yy;
            }
            neighborCoordinateXGPU = worker.Malloc(neighborCoordinateX);
            neighborCoordinateYGPU = worker.Malloc(neighborCoordinateY);

            varBinsGPU = worker.Malloc(varBins);

            // initialize CUDA parameters
            var blockDims = new dim3(8, 8);
            var gridDims  = new dim3(Common.divup(size.Width, blockDims.x), Common.divup(size.Height, blockDims.y));

            lp = new LaunchParam(gridDims, blockDims);

            // create filters
            for (int i = 0; i < numScales; i++)
            {
                float[,] filter = LaplacianOfGaussian.Generate(i + 1);
                filters[i]      = Utils.Flatten(filter);
                filtersGPU[i]   = worker.Malloc(filters[i]);
                filterSizes[i]  = (filter.GetLength(0) - 1) / 2;
            }

            // allocate space for scale space images
            deviceptr <float>[] tempPointers = new deviceptr <float> [numScales];
            for (int i = 0; i < numScales; i++)
            {
                scaledImages[i] = worker.Malloc <float>(size.Width * size.Height);
                tempPointers[i] = scaledImages[i].Ptr;
            }
            scaledImagePointers = worker.Malloc(tempPointers);
            pixelScaleImage     = worker.Malloc <short>(size.Width * size.Height);
        }
Beispiel #15
0
 public VulkanImage(BufferManager manager, Image image, DeviceMemory memory, Format format)
 {
     this.manager = manager;
     this.image   = image;
     this.memory  = memory;
     this.format  = format;
 }
Beispiel #16
0
        public static Texture2D RenderTarget(Graphics g, int width, int height)
        {
            const Format format = Format.B8G8R8A8UNorm;

            // Create optimal tiled target image.
            Image image = g.Context.Device.CreateImage(new ImageCreateInfo
            {
                ImageType     = ImageType.Image2D,
                Format        = format,
                MipLevels     = 1,
                ArrayLayers   = 1,
                Samples       = SampleCounts.Count1,
                Tiling        = ImageTiling.Optimal,
                SharingMode   = SharingMode.Exclusive,
                InitialLayout = ImageLayout.Undefined,
                Extent        = new Extent3D(width, height, 1),
                Usage         = ImageUsages.Sampled | ImageUsages.TransferDst | ImageUsages.TransferSrc | ImageUsages.ColorAttachment
            });
            MemoryRequirements imageMemReq = image.GetMemoryRequirements();
            int imageHeapIndex             = g.Context.MemoryProperties.MemoryTypes.IndexOf(
                imageMemReq.MemoryTypeBits, MemoryProperties.DeviceLocal);
            DeviceMemory memory = g.Context.Device.AllocateMemory(new MemoryAllocateInfo(imageMemReq.Size, imageHeapIndex));

            image.BindMemory(memory);

            var subresourceRange = new ImageSubresourceRange(ImageAspects.Color, 0, 1, 0, 1);

            // Create image view.
            ImageView view = image.CreateView(new ImageViewCreateInfo(format, subresourceRange));

            var sampler = VKHelper.CreateSampler(g.Context, Filter.Linear, Filter.Linear, SamplerMipmapMode.Linear);

            return(new Texture2D(g.Context, image, memory, view, format, new Vector2I(width, height), true));
        }
        protected unsafe void AllocateMemory(MemoryPropertyFlags memoryProperties, MemoryRequirements memoryRequirements)
        {
            if (NativeMemory != DeviceMemory.Null)
                return;

            if (memoryRequirements.Size == 0)
                return;

            var allocateInfo = new MemoryAllocateInfo
            {
                StructureType = StructureType.MemoryAllocateInfo,
                AllocationSize = memoryRequirements.Size,
            };

            PhysicalDeviceMemoryProperties physicalDeviceMemoryProperties;
            GraphicsDevice.NativePhysicalDevice.GetMemoryProperties(out physicalDeviceMemoryProperties);
            var typeBits = memoryRequirements.MemoryTypeBits;
            for (uint i = 0; i < physicalDeviceMemoryProperties.MemoryTypeCount; i++)
            {
                if ((typeBits & 1) == 1)
                {
                    // Type is available, does it match user properties?
                    var memoryType = *((MemoryType*)&physicalDeviceMemoryProperties.MemoryTypes + i);
                    if ((memoryType.PropertyFlags & memoryProperties) == memoryProperties)
                    {
                        allocateInfo.MemoryTypeIndex = i;
                        break;
                    }
                }
                typeBits >>= 1;
            }

            NativeMemory = GraphicsDevice.NativeDevice.AllocateMemory(ref allocateInfo);
        }
Beispiel #18
0
        public void CreateDepth()
        {
            ImageCreateInfo imageInfo = new ImageCreateInfo
            {
                ImageType = ImageType.Image2D,
                Format    = Format.D16Unorm,
                Extent    = new Extent3D
                {
                    Width  = BackBufferWidth,
                    Height = BackBufferHeight,
                    Depth  = 1,
                },
                MipLevels   = 1,
                ArrayLayers = 1,
                Samples     = (uint)SampleCountFlags.Count1,
                Tiling      = ImageTiling.Optimal,
                Usage       = (uint)ImageUsageFlags.DepthStencilAttachment,
                Flags       = 0,
            };

            Image image = Device.CreateImage(imageInfo, null);
            MemoryRequirements memReq = Device.GetImageMemoryRequirements(image);

            uint memTypeIndex;

            if (!TryGetMemoryTypeFromProperties(memReq.MemoryTypeBits, 0, out memTypeIndex))
            {
                throw new Exception("Failed to create back buffer");
            }

            MemoryAllocateInfo allocInfo = new MemoryAllocateInfo
            {
                AllocationSize  = 0,
                MemoryTypeIndex = memTypeIndex,
            };

            DeviceMemory imageMem = Device.AllocateMemory(allocInfo, null);

            Device.BindImageMemory(image, imageMem, 0);

            SetImageLayout(image, ImageAspectFlags.Depth, ImageLayout.Undefined, ImageLayout.DepthStencilAttachmentOptimal, 0);

            ImageViewCreateInfo imageViewInfo = new ImageViewCreateInfo
            {
                Image            = image,
                Format           = imageInfo.Format,
                SubresourceRange = new ImageSubresourceRange
                {
                    AspectMask     = (uint)ImageAspectFlags.Depth,
                    BaseMipLevel   = 0,
                    LevelCount     = 1,
                    BaseArrayLayer = 0,
                    LayerCount     = 1,
                },
                Flags    = 0,
                ViewType = ImageViewType.View2D,
            };

            ImageView imageView = Device.CreateImageView(imageViewInfo, null);
        }
        public unsafe void FullyConnectedBackwardData()
        {
            FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian);
            Tensor dy = CreateRandomTensor(400, fc.OutputInfo.Size);

            fixed(float *pw = fc.Weights, pb = fc.Biases)
            {
                Tensor.Reshape(pw, fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor w);
                Tensor.Reshape(pb, 1, fc.OutputInfo.Size, out Tensor b);
                Tensor.New(dy.Entities, fc.InputInfo.Size, out Tensor dx1);
                CpuDnn.FullyConnectedBackwardData(w, dy, dx1);
                Gpu gpu = Gpu.Default;

                using (DeviceMemory <float>
                       dy_gpu = gpu.AllocateDevice(dy),
                       w_gpu = gpu.AllocateDevice(w),
                       dx_gpu = gpu.AllocateDevice <float>(dx1.Size))
                {
                    Dnn.Get(gpu).FullyConnectedBackwardData(dy.Entities, fc.InputInfo.Size, fc.OutputInfo.Size, dy_gpu.Ptr, w_gpu.Ptr, dx_gpu.Ptr);
                    dx_gpu.CopyToHost(dx1.Entities, dx1.Length, out Tensor dx2);
                    Assert.IsTrue(dx1.ContentEquals(dx2));
                    Tensor.Free(dy, dx1, dx2);
                }
            }
        }
Beispiel #20
0
        public static VKBuffer InstanceInfo <T>(Graphics g, int count) where T : struct
        {
            long size = count * Interop.SizeOf <T>();

            // Create a staging buffer that is writable by host.
            var stagingBuffer             = g.Context.Device.CreateBuffer(new BufferCreateInfo(size, BufferUsages.TransferSrc));
            MemoryRequirements stagingReq = stagingBuffer.GetMemoryRequirements();
            int stagingMemoryTypeIndex    = g.Context.MemoryProperties.MemoryTypes.IndexOf(
                stagingReq.MemoryTypeBits,
                MemoryProperties.HostVisible | MemoryProperties.HostCoherent);
            DeviceMemory stagingMemory = g.Context.Device.AllocateMemory(new MemoryAllocateInfo(stagingReq.Size, stagingMemoryTypeIndex));

            stagingBuffer.BindMemory(stagingMemory);

            // Create a device local buffer where the vertex data will be copied and which will be used for rendering.
            VulkanCore.Buffer  buffer = g.Context.Device.CreateBuffer(new BufferCreateInfo(size, BufferUsages.VertexBuffer | BufferUsages.TransferDst));
            MemoryRequirements req    = buffer.GetMemoryRequirements();
            int memoryTypeIndex       = g.Context.MemoryProperties.MemoryTypes.IndexOf(
                req.MemoryTypeBits,
                MemoryProperties.DeviceLocal);
            DeviceMemory memory = g.Context.Device.AllocateMemory(new MemoryAllocateInfo(req.Size, memoryTypeIndex));

            buffer.BindMemory(memory);

            return(new VKBuffer(g.Context, buffer, memory, g.Context.Device.CreateFence(), count, size, true, stagingBuffer, stagingMemory, g.Context.GraphicsCommandPool.AllocateBuffers(new CommandBufferAllocateInfo(CommandBufferLevel.Primary, 1))[0]));
        }
        public unsafe void FullyConnectedForward()
        {
            FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian);
            Tensor x = CreateRandomTensor(400, fc.InputInfo.Size);

            fixed(float *pw = fc.Weights, pb = fc.Biases)
            {
                Tensor.Reshape(pw, fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor w);
                Tensor.Reshape(pb, 1, fc.OutputInfo.Size, out Tensor b);
                Tensor.New(x.Entities, fc.OutputInfo.Size, out Tensor y1);
                CpuDnn.FullyConnectedForward(x, w, b, y1);
                Gpu gpu = Gpu.Default;

                using (DeviceMemory <float>
                       x_gpu = gpu.AllocateDevice(x),
                       w_gpu = gpu.AllocateDevice(w),
                       b_gpu = gpu.AllocateDevice(b),
                       y_gpu = gpu.AllocateDevice <float>(y1.Size))
                {
                    Dnn.Get(gpu).FullyConnectedForward(x.Entities, x.Length, y1.Length, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
                    y_gpu.CopyToHost(y1.Entities, y1.Length, out Tensor y2);
                    Assert.IsTrue(y1.ContentEquals(y2));
                    Tensor.Free(x, y1, y2);
                }
            }
        }
Beispiel #22
0
 private VulkanImage(Image image, DeviceMemory memory, ImageView view, Format format)
 {
     Image  = image;
     Memory = memory;
     View   = view;
     Format = format;
 }
Beispiel #23
0
        //[/CudnnMnistFCF]

        //[CudnnMnistCF]
        public void ConvoluteForward(Layer conv, nchw_t nchw, DeviceMemory <float> srcData, ref DeviceMemory <float> dstData)
        {
            _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            _filterDesc.Set4D(DataType, conv.Outputs, conv.Inputs, conv.KernelDim, conv.KernelDim);
            _convDesc.Set2D(0, 0, 1, 1, 1, 1, CUDNNInterop.cudnnConvolutionMode_t.CUDNN_CROSS_CORRELATION);
            // find dimension of convoltion output
            // outputDim = 1 + (inputDim + 2*pad - filterDim) / convolutionStride
            int n, c, h, w;

            _convDesc.Get2DForwardOutputDim(_srcTensorDesc, _filterDesc, out n, out c, out h, out w);
            nchw.N = n;
            nchw.C = c;
            nchw.H = h;
            nchw.W = w;
            _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            var algo = _cudnn.GetConvolutionForwardAlgorithm(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc,
                                                             CUDNNInterop.cudnnConvolutionFwdPreference_t.CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, (IntPtr)0);

            Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W);
            var sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, algo);

            using (var workSpace = _worker.Malloc <byte>(sizeInBytes.ToInt32()))
            {
                const float alpha = 1.0f;
                const float beta  = 0.0f;
                _cudnn.ConvolutionForward(alpha, _srcTensorDesc, srcData.Ptr, _filterDesc, conv.DataD.Ptr, _convDesc, algo, workSpace.Ptr, sizeInBytes, beta, _dstTensorDesc, dstData.Ptr);
                AddBias(_dstTensorDesc, conv, c, dstData);
            }
        }
Beispiel #24
0
        void CopyArrayToBuffer(DeviceMemory bufferMem, DeviceSize size, byte[] data)
        {
            var map = device.MapMemory(bufferMem, 0, size);

            Marshal.Copy(data, 0, map, (int)((ulong)size));
            device.UnmapMemory(bufferMem);
        }
Beispiel #25
0
        public void CmdDraw()
        {
            var renderPassCreateInfo = new RenderPassCreateInfo(new[] { new SubpassDescription(
                                                                            new[] { new AttachmentReference(0, ImageLayout.ColorAttachmentOptimal) }) },
                                                                new[] { new AttachmentDescription {
                                                                            Format = Format.B8G8R8A8UNorm, Samples = SampleCounts.Count1
                                                                        } });
            var imageCreateInfo = new ImageCreateInfo
            {
                Usage       = ImageUsages.ColorAttachment,
                Format      = Format.B8G8R8A8UNorm,
                Extent      = new Extent3D(2, 2, 1),
                ImageType   = ImageType.Image2D,
                MipLevels   = 1,
                ArrayLayers = 1,
                Samples     = SampleCounts.Count1
            };
            var imageViewCreateInfo = new ImageViewCreateInfo(
                Format.B8G8R8A8UNorm,
                new ImageSubresourceRange(ImageAspects.Color, 0, 1, 0, 1));

            using (ShaderModule vertexShader = Device.CreateShaderModule(new ShaderModuleCreateInfo(ReadAllBytes("Shader.vert.spv"))))
                using (ShaderModule fragmentShader = Device.CreateShaderModule(new ShaderModuleCreateInfo(ReadAllBytes("Shader.frag.spv"))))
                    using (PipelineLayout pipelineLayout = Device.CreatePipelineLayout())
                        using (RenderPass renderPass = Device.CreateRenderPass(renderPassCreateInfo))
                            using (Image image = Device.CreateImage(imageCreateInfo))
                            {
                                MemoryRequirements imageMemReq = image.GetMemoryRequirements();
                                int memTypeIndex = PhysicalDeviceMemoryProperties.MemoryTypes.IndexOf(imageMemReq.MemoryTypeBits, MemoryProperties.DeviceLocal);
                                using (DeviceMemory imageMemory = Device.AllocateMemory(new MemoryAllocateInfo(imageMemReq.Size, memTypeIndex)))
                                {
                                    image.BindMemory(imageMemory);
                                    using (ImageView imageView = image.CreateView(imageViewCreateInfo))
                                        using (Framebuffer framebuffer = renderPass.CreateFramebuffer(new FramebufferCreateInfo(new[] { imageView }, 2, 2)))
                                            using (Pipeline pipeline = Device.CreateGraphicsPipeline(new GraphicsPipelineCreateInfo(
                                                                                                         pipelineLayout,
                                                                                                         renderPass,
                                                                                                         0,
                                                                                                         new[]
                                            {
                                                new PipelineShaderStageCreateInfo(ShaderStages.Vertex, vertexShader, "main"),
                                                new PipelineShaderStageCreateInfo(ShaderStages.Fragment, fragmentShader, "main")
                                            },
                                                                                                         new PipelineInputAssemblyStateCreateInfo(),
                                                                                                         new PipelineVertexInputStateCreateInfo(),
                                                                                                         new PipelineRasterizationStateCreateInfo {
                                                RasterizerDiscardEnable = true, LineWidth = 1.0f
                                            })))
                                            {
                                                CommandBuffer.Begin();
                                                CommandBuffer.CmdBeginRenderPass(new RenderPassBeginInfo(framebuffer, new Rect2D(0, 0, 2, 2)));
                                                CommandBuffer.CmdBindPipeline(PipelineBindPoint.Graphics, pipeline);
                                                CommandBuffer.CmdDraw(3);
                                                CommandBuffer.CmdEndRenderPass();
                                                CommandBuffer.End();
                                            }
                                }
                            }
        }
Beispiel #26
0
        public SharedFontManager(Switch Device, long PhysicalAddress)
        {
            this.PhysicalAddress = PhysicalAddress;

            Memory = Device.Memory;

            FontsPath = Path.Combine(Device.VFs.GetSystemPath(), "fonts");
        }
        /// <summary>
        /// Get a Windows HANDLE for a memory object.
        /// </summary>
        /// <param name="memory">The memory object from which the handle will be exported.</param>
        /// <param name="handleType">The type of handle requested.</param>
        /// <returns>The Windows handle representing the underlying resources of the device memory object.</returns>
        /// <exception cref="VulkanException">Vulkan returns an error code.</exception>
        public static IntPtr GetWin32HandleKhx(this DeviceMemory memory, ExternalMemoryHandleTypesKhx handleType)
        {
            IntPtr handle;
            Result result = vkGetMemoryWin32HandleKHX(memory.Parent, memory, handleType, &handle);

            VulkanException.ThrowForInvalidResult(result);
            return(handle);
        }
        /// <summary>
        /// Get a POSIX file descriptor for a memory object.
        /// </summary>
        /// <param name="memory">The memory object from which the handle will be exported.</param>
        /// <param name="handleType">The type of handle requested.</param>
        /// <returns>A file descriptor representing the underlying resources of the device memory object.</returns>
        /// <exception cref="VulkanException">Vulkan returns an error code.</exception>
        public static int GetFdKhx(this DeviceMemory memory, ExternalMemoryHandleTypesKhx handleType)
        {
            int    fd;
            Result result = vkGetMemoryFdKHX(memory.Parent, memory, handleType, &fd);

            VulkanException.ThrowForInvalidResult(result);
            return(fd);
        }
Beispiel #29
0
        public void AddBias(CUDNNTensorDescriptor dstTensorDesc, Layer layer, int c, DeviceMemory <float> data)
        {
            _biasTensorDesc.Set4D(TensorFormat, DataType, 1, c, 1, 1);
            const float alpha = 1.0f;
            const float beta  = 1.0f;

            _cudnn.AddTensor(CUDNNInterop.cudnnAddMode_t.CUDNN_ADD_SAME_C, alpha, _biasTensorDesc, layer.BiasD.Ptr, beta, dstTensorDesc, data.Ptr);
        }
Beispiel #30
0
        public static void CopyToBufferMemory(this Device @this, byte[] source, DeviceMemory destinationBufferMemory, DeviceSize offset, DeviceSize size, uint mapFlags)
        {
            var mappedMemoryPointer = @this.MapMemory(destinationBufferMemory, offset, size, mapFlags);

            Marshal.Copy(source, 0, mappedMemoryPointer, (int)(uint)size);

            @this.UnmapMemory(destinationBufferMemory);
        }
Beispiel #31
0
 private void Release(DeviceMemory <byte> memory)
 {
     lock (_repo)
     {
         _repo.Add(memory);
         _repo.Sort(MComp);
     }
 }
Beispiel #32
0
        private void TearDown()
        {
            device.WaitIdle();

            this.renderFinishedSemaphore.Dispose();
            this.renderFinishedSemaphore = null;

            this.imageAvailableSemaphore.Dispose();
            this.imageAvailableSemaphore = null;

            this.device.FreeMemory(this.vertexBufferMemory);
            this.vertexBufferMemory = null;

            this.vertexBuffer.Dispose();
            this.vertexBuffer = null;

            this.commandPool.Dispose();
            this.commandPool = null;

            foreach (var frameBuffer in this.frameBuffers)
            {
                frameBuffer.Dispose();
            }
            this.frameBuffers = null;

            this.fragShader.Dispose();
            this.fragShader = null;

            this.vertShader.Dispose();
            this.vertShader = null;

            this.pipeline.Dispose();
            this.pipeline = null;

            this.pipelineLayout.Dispose();
            this.pipelineLayout = null;

            foreach (var imageView in this.swapChainImageViews)
            {
                imageView.Dispose();
            }
            this.swapChainImageViews = null;

            this.renderPass.Dispose();
            this.renderPass = null;

            this.swapChain.Dispose();
            this.swapChain = null;

            this.device.Dispose();
            this.device = null;

            this.surface.Dispose();
            this.surface = null;

            this.instance.Dispose();
            this.instance = null;
        }
Beispiel #33
0
        //[CudnnMnistFCF]
        public void FullyConnectedForward(Layer ip, nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            if (nchw.N != 1) throw new Exception("Not Implemented");
            var dimX = nchw.C * nchw.H * nchw.W;
            var dimY = ip.Outputs;
            Resize(ref dstData, dimY);

            const float alpha = 1.0f;
            const float beta = 1.0f;

            // This cuMemcpyDtoD is a raw CUDA API call so it should be guarded with worker.Eval
            var output = dstData;
            _worker.EvalAction(() => CUDAInterop.cuMemcpyDtoD(output.Ptr.Handle, ip.BiasD.Handle, (IntPtr)(dimY * sizeof(float))));

            // This cublas call doesn't need worker.Eval because cublas is a thin wrapper for the raw API 
            // and it alreadyhas worke.eval  
            _cublas.Sgemv(CUBLASInterop.cublasOperation_t.CUBLAS_OP_T, dimX, dimY, alpha, ip.DataD.Ptr, dimX,
                srcData.Ptr, 1, beta, dstData.Ptr, 1);

            nchw.H = 1;
            nchw.W = 1;
            nchw.C = dimY;
        }
Beispiel #34
0
 public unsafe void BindImageMemory(Image image, DeviceMemory memory, ulong memoryOffset)
 {
     vkBindImageMemory(this, image, memory, memoryOffset).CheckError();
 }
Beispiel #35
0
 public unsafe void FreeMemory(DeviceMemory memory, AllocationCallbacks* allocator = null)
 {
     vkFreeMemory(this, memory, allocator);
 }
Beispiel #36
0
 public unsafe ulong GetMemoryCommitment(DeviceMemory memory)
 {
     ulong committedMemoryInBytes;
     vkGetDeviceMemoryCommitment(this, memory, &committedMemoryInBytes);
     return committedMemoryInBytes;
 }
Beispiel #37
0
 public unsafe IntPtr MapMemory(DeviceMemory memory, ulong offset, ulong size, MemoryMapFlags flags)
 {
     IntPtr data;
     vkMapMemory(this, memory, offset, size, flags, &data).CheckError();
     return data;
 }
Beispiel #38
0
 internal static unsafe extern Result vkBindImageMemory(Device device, Image image, DeviceMemory memory, ulong memoryOffset);
Beispiel #39
0
        private void CreateVertexBuffer()
        {
            var vertices = new[,]
            {
                {  0.0f, -0.5f,  0.5f, 1.0f, 0.0f, 0.0f },
                {  0.5f,  0.5f,  0.5f, 0.0f, 1.0f, 0.0f },
                { -0.5f,  0.5f,  0.5f, 0.0f, 0.0f, 1.0f },
            };

            var createInfo = new BufferCreateInfo
            {
                StructureType = StructureType.BufferCreateInfo,
                Usage = BufferUsageFlags.VertexBuffer,
                Size = (ulong)(sizeof(float) * vertices.Length)
            };
            vertexBuffer = device.CreateBuffer(ref createInfo);

            MemoryRequirements memoryRequirements;
            device.GetBufferMemoryRequirements(vertexBuffer, out memoryRequirements);

            if (memoryRequirements.Size == 0)
                return;

            var allocateInfo = new MemoryAllocateInfo
            {
                StructureType = StructureType.MemoryAllocateInfo,
                AllocationSize = memoryRequirements.Size,
                MemoryTypeIndex = MemoryTypeFromProperties(memoryRequirements.MemoryTypeBits, MemoryPropertyFlags.HostVisible)
            };
            vertexBufferMemory = device.AllocateMemory(ref allocateInfo);

            var mapped = device.MapMemory(vertexBufferMemory, 0, (ulong)createInfo.Size, MemoryMapFlags.None);
            fixed (float* source = &vertices[0, 0]) Utilities.CopyMemory(mapped, new IntPtr(source), (int)createInfo.Size);
            device.UnmapMemory(vertexBufferMemory);

            device.BindBufferMemory(vertexBuffer, vertexBufferMemory, 0);

            vertexAttributes = new []
            {
                new VertexInputAttributeDescription { Binding = 0, Location = 0, Format = Format.R32G32B32SFloat, Offset = 0 },
                new VertexInputAttributeDescription { Binding = 0, Location = 1, Format = Format.R32G32B32SFloat, Offset = sizeof(float) * 3 },
            };

            vertexBindings = new []
            {
                new VertexInputBindingDescription { Binding = 0, InputRate = VertexInputRate.Vertex, Stride = (uint)(sizeof(float) * vertices.GetLength(1)) }
            };
        }
Beispiel #40
0
 internal static unsafe extern void vkUnmapMemory(Device device, DeviceMemory memory);
Beispiel #41
0
 public void Resize(ref DeviceMemory<float> buffer, int length)
 {
     if (buffer.Length >= length) return;
     buffer.Dispose();
     buffer = _worker.Malloc<float>(length);
 }
Beispiel #42
0
        //[/CudnnMnistSF]

        //[CudnnMnistAF]
        public void ActivationForward(nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W);
            _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            const float alpha = 1.0f;
            const float beta = 0.0f;
            _cudnn.ActivationForward(CUDNNInterop.cudnnActivationMode_t.CUDNN_ACTIVATION_RELU, alpha, _srcTensorDesc, srcData.Ptr, beta, _dstTensorDesc, dstData.Ptr);
        }
Beispiel #43
0
        //[/CudnnMnistPF]

        //[CudnnMnistSF]
        public void SoftmaxForward(nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W);
            _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            const float alpha = 1.0f;
            const float beta = 0.0f;
            _cudnn.SoftmaxForward(CUDNNInterop.cudnnSoftmaxAlgorithm_t.CUDNN_SOFTMAX_ACCURATE, CUDNNInterop.cudnnSoftmaxMode_t.CUDNN_SOFTMAX_MODE_CHANNEL, alpha, _srcTensorDesc, srcData.Ptr, beta, _dstTensorDesc, dstData.Ptr);
        }
Beispiel #44
0
        //[/CudnnMnistCF]

        //[CudnnMnistPF]
        public void PoolForward(nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            _poolingDesc.Set2D(CUDNNInterop.cudnnPoolingMode_t.CUDNN_POOLING_MAX, 2, 2, 0, 0, 2, 2);
            _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            nchw.H /= 2;
            nchw.W /= 2;
            _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);

            Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W);
            const float alpha = 1.0f;
            const float beta = 0.0f;
            _cudnn.PoolingForward(_poolingDesc, alpha, _srcTensorDesc, srcData.Ptr, beta, _dstTensorDesc, dstData.Ptr);
        }
Beispiel #45
0
        //[/CudnnMnistFCF]

        //[CudnnMnistCF]
        public void ConvoluteForward(Layer conv, nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            _filterDesc.Set4D(DataType, conv.Outputs, conv.Inputs, conv.KernelDim, conv.KernelDim);
            _convDesc.Set2D(0, 0, 1, 1, 1, 1, CUDNNInterop.cudnnConvolutionMode_t.CUDNN_CROSS_CORRELATION);
            // find dimension of convoltion output
            // outputDim = 1 + (inputDim + 2*pad - filterDim) / convolutionStride
            int n, c, h, w;
            _convDesc.Get2DForwardOutputDim(_srcTensorDesc, _filterDesc, out n, out c, out h, out w);
            nchw.N = n;
            nchw.C = c;
            nchw.H = h;
            nchw.W = w;
            _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            var algo = _cudnn.GetConvolutionForwardAlgorithm(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc,
                CUDNNInterop.cudnnConvolutionFwdPreference_t.CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, (IntPtr)0);

            Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W);
            var sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, algo);

            using (var workSpace = _worker.Malloc<byte>(sizeInBytes.ToInt32()))
            {
                const float alpha = 1.0f;
                const float beta = 0.0f;
                _cudnn.ConvolutionForward(alpha, _srcTensorDesc, srcData.Ptr, _filterDesc, conv.DataD.Ptr, _convDesc, algo, workSpace.Ptr, sizeInBytes, beta, _dstTensorDesc, dstData.Ptr);
                AddBias(_dstTensorDesc, conv, c, dstData);
            }
        }
Beispiel #46
0
 internal static unsafe extern Result vkBindBufferMemory(Device device, Buffer buffer, DeviceMemory memory, ulong memoryOffset);
Beispiel #47
0
 public unsafe void BindBufferMemory(Buffer buffer, DeviceMemory memory, ulong memoryOffset)
 {
     vkBindBufferMemory(this, buffer, memory, memoryOffset).CheckError();
 }
        //[/LockPositions]

        public SimWindow() : base(800, 600, GraphicsMode.Default, "Gravitational n-body simulation")
        {
            _numBodies = 256*64;
            const float clusterScale = 1.0f;
            const float velocityScale = 1.0f;
            _deltaTime = 0.001f;
            _softeningSquared = 0.00125f;
            _damping = 0.9995f;
            //[CreateWorker]
            _worker = Worker.CreateByFunc(Generate);
            //[/CreateWorker]

            _stopwatch = Stopwatch.StartNew();
            _fpsCalcLag = 128;
            _frameCounter = 0;

            //[CreateSimulatros]
            _simulators = new Queue<ISimulator>();
            var target = GPUModuleTarget.Worker(_worker);

            var simulatorGpuDynamicBlockSizeModule = new GpuDynamicSimulatorModule(target);         // need dispose
            var simulatorGpuDynamicBlockSize64 = simulatorGpuDynamicBlockSizeModule.Create(64);
            var simulatorGpuDynamicBlockSize128 = simulatorGpuDynamicBlockSizeModule.Create(128);
            var simulatorGpuDynamicBlockSize256 = simulatorGpuDynamicBlockSizeModule.Create(256);
            var simulatorGpuDynamicBlockSize512 = simulatorGpuDynamicBlockSizeModule.Create(512);

            var simulatorGpuStaticBlockSizeModule64 = new GpuStaticSimulatorModule64(target);       // need dispose
            var simulatorGpuStaticBlockSizeModule128 = new GpuStaticSimulatorModule128(target);     // need dispose
            var simulatorGpuStaticBlockSizeModule256 = new GpuStaticSimulatorModule256(target);     // need dispose
            var simulatorGpuStaticBlockSizeModule512 = new GpuStaticSimulatorModule512(target);     // need dispose

            // First, enquene one simulator which is 256 blocksize so we can compare with C code for performance.
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256);

            // Enqueue several dynamic block size simulators.
            _simulators.Enqueue(simulatorGpuDynamicBlockSize64);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize128);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize256);
            _simulators.Enqueue(simulatorGpuDynamicBlockSize512);

            // Enqueue several static block size simulators.
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule64);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule128);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule256);
            _simulators.Enqueue(simulatorGpuStaticBlockSizeModule512);

            // We do not enqueue any cpu simulator as it is much too slow.
            //_simulators.Enqueue(new CpuSimulator(_worker, _numBodies));

            _disposeSimulators = () =>
            {
                simulatorGpuDynamicBlockSizeModule.Dispose();
                simulatorGpuStaticBlockSizeModule64.Dispose();
                simulatorGpuStaticBlockSizeModule128.Dispose();
                simulatorGpuStaticBlockSizeModule256.Dispose();
                simulatorGpuStaticBlockSizeModule512.Dispose();
            };

            _simulator = _simulators.Dequeue();
            //[/CreateSimulatros]

            //[CreateBuffers]
            _buffers = new uint[2];
            for (var i = 0; i < _buffers.Length; i++)
            {
                _buffers[i] = 0;
            }
            GL.GenBuffers(_buffers.Length, _buffers);
            foreach (var buffer in _buffers)
            {
                GL.BindBuffer(BufferTarget.ArrayBuffer, buffer);
                GL.BufferData(BufferTarget.ArrayBuffer,
                              (IntPtr) (Microsoft.FSharp.Core.Operators.SizeOf<float4>()*_numBodies), 
                              IntPtr.Zero, BufferUsageHint.DynamicDraw);
                var size = 0;
                unsafe
                {
                    GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, &size);
                }
                if (size != Microsoft.FSharp.Core.Operators.SizeOf<float4>()*_numBodies)
                {
                    throw new Exception("Pixel Buffer Object allocation failed!");
                }
                GL.BindBuffer(BufferTarget.ArrayBuffer, 0);
                CUDAInterop.cuSafeCall(CUDAInterop.cuGLRegisterBufferObject(buffer));
            }

            _resources = new IntPtr[_buffers.Length];
            for (var i = 0; i < _buffers.Length; i++)
            {
                var res = IntPtr.Zero;
                unsafe
                {
                    CUDAInterop.cuSafeCall(CUDAInterop.cuGraphicsGLRegisterBuffer(&res, _buffers[i], 0u));
                }
                _resources[i] = res;
            }
            //[/CreateBuffers]

            //[FinalizeGL]
            _vel = _worker.Malloc<float4>(_numBodies);

            float4[] hpos, hvel;
            BodyInitializer.Initialize(new BodyInitializer3(), clusterScale, velocityScale, _numBodies, 
                                       out hpos, out hvel);
            _worker.Scatter(hvel, _vel.Ptr, Microsoft.FSharp.Core.FSharpOption<int>.None,
                Microsoft.FSharp.Core.FSharpOption<int>.None);
            LockPos(
                (pos0, pos1) =>
                    _worker.Scatter(hpos, pos1, Microsoft.FSharp.Core.FSharpOption<int>.None,
                        Microsoft.FSharp.Core.FSharpOption<int>.None));

            Help();
            Description();
            //[/FinalizeGL]
        }
Beispiel #49
0
 internal static unsafe extern Result vkAllocateMemory(Device device, MemoryAllocateInfo* allocateInfo, AllocationCallbacks* allocator, DeviceMemory* memory);
Beispiel #50
0
 internal static unsafe extern Result vkMapMemory(Device device, DeviceMemory memory, ulong offset, ulong size, MemoryMapFlags flags, IntPtr* data);
Beispiel #51
0
 public unsafe void UnmapMemory(DeviceMemory memory)
 {
     vkUnmapMemory(this, memory);
 }
Beispiel #52
0
 internal static unsafe extern void vkFreeMemory(Device device, DeviceMemory memory, AllocationCallbacks* allocator);
Beispiel #53
0
 internal static unsafe extern void vkGetDeviceMemoryCommitment(Device device, DeviceMemory memory, ulong* committedMemoryInBytes);