Esempio n. 1
0
        /// <summary cref="Accelerator.EstimateGroupSizeInternal(
        /// Kernel, int, int, out int)"/>
        protected override int EstimateGroupSizeInternal(
            Kernel kernel,
            int dynamicSharedMemorySizeInBytes,
            int maxGroupSize,
            out int minGridSize)
        {
            if (dynamicSharedMemorySizeInBytes > 0)
            {
                throw new ArgumentOutOfRangeException(
                          nameof(dynamicSharedMemorySizeInBytes));
            }

            if (maxGroupSize < 1)
            {
                maxGroupSize = MaxNumThreadsPerGroup;
            }

            var clKernel            = kernel as CLKernel;
            var workGroupSizeNative = CurrentAPI.GetKernelWorkGroupInfo <IntPtr>(
                clKernel.KernelPtr,
                DeviceId,
                CLKernelWorkGroupInfoType.CL_KERNEL_WORK_GROUP_SIZE);
            int workGroupSize = workGroupSizeNative.ToInt32();

            workGroupSize = IntrinsicMath.Min(workGroupSize, maxGroupSize);
            minGridSize   = IntrinsicMath.DivRoundUp(MaxNumThreads, workGroupSize);

            return(workGroupSize);
        }
Esempio n. 2
0
        public void CopyFrom(
            AcceleratorStream stream,
            T[][][] source,
            Index3 sourceOffset,
            Index3 targetOffset,
            Index3 extent)
        {
            if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 ||
                extent.X > source.Length)
            {
                throw new ArgumentOutOfRangeException(nameof(extent));
            }

            if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 ||
                sourceOffset.X >= extent.X ||
                sourceOffset.Y >= extent.Y ||
                sourceOffset.Z >= extent.Z)
            {
                throw new ArgumentOutOfRangeException(nameof(sourceOffset));
            }

            var tempBuffer = new T[extent.Size];

            for (int i = 0; i < extent.X; ++i)
            {
                var subData = source[i + sourceOffset.X];
                if (subData == null)
                {
                    continue;
                }

                for (int j = 0; j < extent.Y; ++j)
                {
                    var subSubData = subData[j + sourceOffset.Y];
                    if (subSubData == null)
                    {
                        continue;
                    }

                    // Skip entries that are out of bounds
                    for (
                        int k = 0, e = IntrinsicMath.Min(subSubData.Length, extent.Z);
                        k < e;
                        ++k)
                    {
                        var targetIdx = new Index3(i, j, k).ComputeLinearIndex(extent);
                        tempBuffer[targetIdx] = subSubData[k + sourceOffset.Z];
                    }
                }
            }

            buffer.CopyFrom(
                stream,
                tempBuffer,
                0,
                targetOffset,
                extent.Size);
        }
Esempio n. 3
0
        /// <summary cref="Accelerator.EstimateMaxActiveGroupsPerMultiprocessor(Kernel, int, int)"/>
        protected override int EstimateMaxActiveGroupsPerMultiprocessorInternal(
            Kernel kernel,
            int groupSize,
            int dynamicSharedMemorySizeInBytes)
        {
            if (dynamicSharedMemorySizeInBytes > 0)
            {
                throw new ArgumentOutOfRangeException(nameof(dynamicSharedMemorySizeInBytes));
            }

            groupSize = IntrinsicMath.Min(groupSize, MaxNumThreadsPerGroup);
            return(MaxNumThreadsPerGroup / groupSize);
        }
Esempio n. 4
0
        /// <summary>
        /// Init memory information.
        /// </summary>
        private void InitMemoryInfo()
        {
            // Resolve memory size
            MemorySize = CurrentAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE);

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min(
                CurrentAPI.GetDeviceInfo <long>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE),
                int.MaxValue);

            // Resolve total constant memory
            MaxConstantMemory = (int)CurrentAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE);
        }
Esempio n. 5
0
        /// <summary>
        /// Constructs a new OpenCL accelerator.
        /// </summary>
        /// <param name="context">The ILGPU context.</param>
        /// <param name="acceleratorId">The accelerator id.</param>
        public CLAccelerator(Context context, CLAcceleratorId acceleratorId)
            : base(context, AcceleratorType.OpenCL)
        {
            if (acceleratorId == null)
            {
                throw new ArgumentNullException(nameof(acceleratorId));
            }

            PlatformId = acceleratorId.PlatformId;
            DeviceId   = acceleratorId.DeviceId;

            PlatformName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_NAME);

            VendorName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_VENDOR);

            // Create new context
            CLException.ThrowIfFailed(
                CLAPI.CreateContext(DeviceId, out contextPtr));

            // Resolve device info
            Name = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_NAME);

            MemorySize = CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE);

            DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_TYPE);

            // Max grid size
            int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>(
                                                           DeviceId,
                                                           CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3);
            var workItemSizes = new IntPtr[workItemDimensions];

            CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES,
                workItemSizes);
            MaxGridSize = new Index3(
                workItemSizes[0].ToInt32(),
                workItemSizes[1].ToInt32(),
                workItemSizes[2].ToInt32());

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32();

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min(
                CLAPI.GetDeviceInfo <long>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE),
                int.MaxValue);

            // Resolve total constant memory
            MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE);

            // Resolve clock rate
            ClockRate = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY);

            // Resolve number of multiprocessors
            NumMultiprocessors = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup;

            InitVendorFeatures();
            InitSubGroupSupport(acceleratorId);

            Bind();
            DefaultStream = CreateStreamInternal();
            base.Backend  = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor);
        }
Esempio n. 6
0
        public void CopyFrom(
            AcceleratorStream stream,
            T[][] source,
            LongIndex2 sourceOffset,
            LongIndex2 targetOffset,
            LongIndex2 extent)
        {
            if (source == null)
            {
                throw new ArgumentNullException(nameof(source));
            }

            if (sourceOffset.X < 0 || sourceOffset.Y < 0 ||
                sourceOffset.X >= source.LongLength)
            {
                throw new ArgumentOutOfRangeException(nameof(sourceOffset));
            }

            if (targetOffset.X < 0 || targetOffset.Y < 0 ||
                targetOffset.X >= Extent.X ||
                targetOffset.Y >= Extent.Y)
            {
                throw new ArgumentOutOfRangeException(nameof(targetOffset));
            }

            if (extent.X < 0 || extent.Y < 0 ||
                sourceOffset.X + extent.X > source.LongLength ||
                targetOffset.X + extent.X > Extent.X ||
                targetOffset.Y + extent.Y > Extent.Y)
            {
                throw new ArgumentOutOfRangeException(nameof(extent));
            }

            var tempBuffer = new T[extent.Size];

            for (long i = 0; i < extent.X; ++i)
            {
                var subData = source[i + sourceOffset.X];
                if (subData == null)
                {
                    continue;
                }

                // Skip entries that are out of bounds
                for (
                    long j = 0, e = IntrinsicMath.Min(subData.LongLength, extent.Y);
                    j < e;
                    ++j)
                {
                    var targetIdx = new LongIndex2(i, j).
                                    ComputeLinearIndex(extent);
                    tempBuffer[targetIdx] = subData[j + sourceOffset.Y];
                }
            }

            buffer.CopyFrom(
                stream,
                tempBuffer,
                0,
                targetOffset,
                extent.Size);
        }
Esempio n. 7
0
 public static ushort Min(ushort first, ushort second) =>
 IntrinsicMath.Min(first, second);
Esempio n. 8
0
 public static byte Min(byte first, byte second) =>
 IntrinsicMath.Min(first, second);
Esempio n. 9
0
 public static long Min(long first, long second) =>
 IntrinsicMath.Min(first, second);
Esempio n. 10
0
 public static int Min(int first, int second) =>
 IntrinsicMath.Min(first, second);
Esempio n. 11
0
 public static float Min(float first, float second) =>
 IntrinsicMath.Min(first, second);
Esempio n. 12
0
 public static double Min(double first, double second) =>
 IntrinsicMath.Min(first, second);
Esempio n. 13
0
 public static uint Min(uint first, uint second) =>
 IntrinsicMath.Min(first, second);