Beispiel #1
0
 public static sbyte Max(sbyte first, sbyte second) =>
 IntrinsicMath.Max(first, second);
Beispiel #2
0
 public static short Abs(short value) =>
 IntrinsicMath.Abs(value);
Beispiel #3
0
 public static long Abs(long value) =>
 IntrinsicMath.Abs(value);
Beispiel #4
0
        /// <summary>
        /// Constructs a new OpenCL accelerator.
        /// </summary>
        /// <param name="context">The ILGPU context.</param>
        /// <param name="acceleratorId">The accelerator id.</param>
        public CLAccelerator(Context context, CLAcceleratorId acceleratorId)
            : base(context, AcceleratorType.OpenCL)
        {
            if (acceleratorId == null)
            {
                throw new ArgumentNullException(nameof(acceleratorId));
            }

            PlatformId = acceleratorId.PlatformId;
            DeviceId   = acceleratorId.DeviceId;
            CVersion   = acceleratorId.CVersion;

            PlatformName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_NAME);

            VendorName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_VENDOR);

            // Create new context
            CLException.ThrowIfFailed(
                CLAPI.CreateContext(DeviceId, out contextPtr));

            // Resolve device info
            Name = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_NAME);

            MemorySize = CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE);

            DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_TYPE);

            // Max grid size
            int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>(
                                                           DeviceId,
                                                           CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3);
            var workItemSizes = new IntPtr[workItemDimensions];

            CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES,
                workItemSizes);
            MaxGridSize = new Index3(
                workItemSizes[0].ToInt32(),
                workItemSizes[1].ToInt32(),
                workItemSizes[2].ToInt32());

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32();

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min(
                CLAPI.GetDeviceInfo <long>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE),
                int.MaxValue);

            // Resolve total constant memory
            MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE);

            // Resolve clock rate
            ClockRate = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY);

            // Resolve number of multiprocessors
            NumMultiprocessors = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup;

            InitVendorFeatures();
            InitSubGroupSupport(acceleratorId);

            Bind();
            DefaultStream = CreateStreamInternal();
            Init(new CLBackend(Context, Vendor));
        }
Beispiel #5
0
 public static float Abs(float value) =>
 IntrinsicMath.Abs(value);
Beispiel #6
0
 public static ushort Min(ushort first, ushort second) =>
 IntrinsicMath.Min(first, second);
Beispiel #7
0
 /// <summary>
 /// Returns true if the given integer is a power of two.
 /// </summary>
 /// <param name="value">The integer value.</param>
 /// <returns>True, if the given integer is a power of two.</returns>
 public static bool IsPowerOf2(long value) =>
 value != long.MinValue && IsPowerOf2((ulong)IntrinsicMath.Abs(value));
Beispiel #8
0
 public static double Min(double first, double second) =>
 IntrinsicMath.Min(first, second);
Beispiel #9
0
 public static int Clamp(int value, int min, int max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #10
0
 public static double Clamp(double value, double min, double max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #11
0
 public static float Clamp(float value, float min, float max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #12
0
 public static ulong Max(ulong first, ulong second) =>
 IntrinsicMath.Max(first, second);
Beispiel #13
0
 public static uint Max(uint first, uint second) =>
 IntrinsicMath.Max(first, second);
Beispiel #14
0
 public static short Max(short first, short second) =>
 IntrinsicMath.Max(first, second);
Beispiel #15
0
 public static long Min(long first, long second) =>
 IntrinsicMath.Min(first, second);
Beispiel #16
0
 public static long Clamp(long value, long min, long max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #17
0
 public static byte Min(byte first, byte second) =>
 IntrinsicMath.Min(first, second);
Beispiel #18
0
 public static byte Clamp(byte value, byte min, byte max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #19
0
        /// <summary>
        /// Entry point for a single processing thread.
        /// </summary>
        /// <param name="arg">The absolute thread index.</param>
        private void ExecuteThread(object arg)
        {
            // Get the current thread information
            int absoluteThreadIndex = (int)arg;
            int threadIdx           = absoluteThreadIndex % MaxNumThreadsPerMultiprocessor;

            bool isMainThread = threadIdx == 0;

            // Setup a new thread context for this thread and initialize the lane index
            int laneIdx       = threadIdx % WarpSize;
            int warpIdx       = threadIdx / WarpSize;
            var threadContext = new CPURuntimeThreadContext(laneIdx, warpIdx)
            {
                LinearGroupIndex = threadIdx
            };

            threadContext.MakeCurrent();

            // Setup the current warp context as it always stays the same
            bool isMainWarpThread = threadIdx == 0;
            var  warpContext      = warpContexts[warpIdx];

            warpContext.MakeCurrent();

            // Setup the current group context as it always stays the same
            groupContext.MakeCurrent();

            CPUAcceleratorTask task = null;

            for (; ;)
            {
                // Get a new task to execute (if any)
                if (!Accelerator.WaitForTask(ref task))
                {
                    break;
                }

                // Setup the current group index
                threadContext.GroupIndex = Index3D.ReconstructIndex(
                    threadIdx,
                    task.GroupDim);

                // Wait for all threads of all multiprocessors to arrive here
                Thread.MemoryBarrier();
                processorBarrier.SignalAndWait();

                try
                {
                    // If we are an active group thread
                    int groupSize = task.GroupDim.Size;
                    if (threadIdx < groupSize)
                    {
                        try
                        {
                            var launcher = task.KernelExecutionDelegate;

                            // Split the grid into different chunks that will be processed
                            // by the available multiprocessors
                            int linearGridDim = task.GridDim.Size;
                            int gridChunkSize = IntrinsicMath.DivRoundUp(
                                linearGridDim,
                                Accelerator.NumMultiprocessors);
                            int gridOffset    = gridChunkSize * ProcessorIndex;
                            int linearUserDim = task.TotalUserDim.Size;
                            for (
                                int i = gridOffset, e = gridOffset + gridChunkSize;
                                i < e;
                                ++i)
                            {
                                BeginThreadProcessing();
                                try
                                {
                                    // Setup the current grid index
                                    threadContext.GridIndex = Index3D.ReconstructIndex(
                                        i,
                                        task.GridDim);

                                    // Invoke the actual kernel launcher
                                    int globalIndex = i * groupSize + threadIdx;
                                    if (globalIndex < linearUserDim)
                                    {
                                        launcher(task, globalIndex);
                                    }
                                }
                                finally
                                {
                                    EndThreadProcessing();
                                }
                            }
                        }
                        finally
                        {
                            // This thread has already finished processing
                            FinishThreadProcessing();
                        }
                    }
                }
                finally
                {
                    // Wait for all threads of all multiprocessors to arrive here
                    processorBarrier.SignalAndWait();

                    // If we reach this point and we are the main thread, notify the
                    // parent accelerator instance
                    if (isMainThread)
                    {
                        Accelerator.FinishTaskProcessing();
                    }
                }
            }
        }
Beispiel #20
0
 public static ushort Clamp(ushort value, ushort min, ushort max) =>
 IntrinsicMath.Clamp(value, min, max);
        public void CopyFrom(
            AcceleratorStream stream,
            T[][][] source,
            LongIndex3 sourceOffset,
            LongIndex3 targetOffset,
            LongIndex3 extent)
        {
            if (source == null)
            {
                throw new ArgumentNullException(nameof(source));
            }

            if (sourceOffset.X < 0 || sourceOffset.Y < 0 || sourceOffset.Z < 0 ||
                sourceOffset.X >= source.LongLength)
            {
                throw new ArgumentOutOfRangeException(nameof(sourceOffset));
            }

            if (targetOffset.X < 0 || targetOffset.Y < 0 || targetOffset.Z < 0 ||
                targetOffset.X >= Extent.X ||
                targetOffset.Y >= Extent.Y ||
                targetOffset.Z >= Extent.Z)
            {
                throw new ArgumentOutOfRangeException(nameof(targetOffset));
            }

            if (extent.X < 0 || extent.Y < 0 || extent.Z < 0 ||
                sourceOffset.X + extent.X > source.LongLength ||
                targetOffset.X + extent.X > Extent.X ||
                targetOffset.Y + extent.Y > Extent.Y ||
                targetOffset.Z + extent.Z > Extent.Z)
            {
                throw new ArgumentOutOfRangeException(nameof(extent));
            }

            var tempBuffer = new T[extent.Size];

            for (long i = 0; i < extent.X; ++i)
            {
                var subData = source[i + sourceOffset.X];
                if (subData == null)
                {
                    continue;
                }

                for (long j = 0; j < extent.Y; ++j)
                {
                    var subSubData = subData[j + sourceOffset.Y];
                    if (subSubData == null)
                    {
                        continue;
                    }

                    // Skip entries that are out of bounds
                    for (
                        long k = 0, e = IntrinsicMath.Min(
                            subSubData.LongLength,
                            extent.Z);
                        k < e;
                        ++k)
                    {
                        var targetIdx = new LongIndex3(i, j, k).
                                        ComputeLinearIndex(extent);
                        tempBuffer[targetIdx] = subSubData[k + sourceOffset.Z];
                    }
                }
            }

            buffer.CopyFrom(
                stream,
                tempBuffer,
                0,
                targetOffset,
                extent.Size);
        }
Beispiel #22
0
 public static uint Clamp(uint value, uint min, uint max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #23
0
 public static double Abs(double value) =>
 IntrinsicMath.Abs(value);
Beispiel #24
0
 public static ulong Clamp(ulong value, ulong min, ulong max) =>
 IntrinsicMath.Clamp(value, min, max);
Beispiel #25
0
 public static sbyte Abs(sbyte value) =>
 IntrinsicMath.Abs(value);
Beispiel #26
0
 public static float Min(float first, float second) =>
 IntrinsicMath.Min(first, second);
Beispiel #27
0
 public static int Abs(int value) =>
 IntrinsicMath.Abs(value);
Beispiel #28
0
 public static int Min(int first, int second) =>
 IntrinsicMath.Min(first, second);
Beispiel #29
0
 public static int TrailingZeroCount(uint value) =>
 IntrinsicMath.TrailingZeroCount(value);
Beispiel #30
0
 /// <summary>
 /// Returns true if the given integer is a power of two.
 /// </summary>
 /// <param name="value">The integer value.</param>
 /// <returns>True, if the given integer is a power of two.</returns>
 public static bool IsPowerOf2(long value) =>
 value == long.MinValue
     ? false
     : IsPowerOf2((ulong)IntrinsicMath.Abs(value));