Esempio n. 1
0
        /// <summary>
        /// Uses the CPU accelerator to allocate pinned chunks of memory in CPU host memory.
        /// </summary>
        /// <param name="accelerator">The current accelerator.</param>
        /// <param name="dataSize">The number of elements to copy.</param>
        static void PerformPinnedCopyToCPUAccelerator(Accelerator accelerator, int dataSize)
        {
            using (var cpuAccl = new CPUAccelerator(accelerator.Context))
            {
                // All buffers allocated through the CPUAccelerator class are automatically pinned
                // in memory to enable async memory transfers via AcceleratorStreams
                using (var pinnedCPUBuffer = cpuAccl.Allocate <int>(dataSize))
                {
                    var stream = accelerator.DefaultStream;

                    // Allocate buffer on this device
                    using (var bufferOnGPU = accelerator.Allocate <int>(pinnedCPUBuffer.Length))
                    {
                        // Use an accelerator stream to perform an async copy operation.
                        // Note that you should use the CopyTo function from the associated GPU
                        // buffer to perform the copy operation using the associated accelerator stream.
                        bufferOnGPU.CopyTo(stream, pinnedCPUBuffer, 0);

                        //
                        // Perform other operations...
                        //

                        // Wait for the copy operation to finish
                        stream.Synchronize();
                    }
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Constructs a new CPU texture 2D.
        /// </summary>
        /// <param name="accelerator">The target accelerator.</param>
        /// <param name="d3dDevice">The target DX device.</param>
        /// <param name="texture">The target DX texture.</param>
        /// <param name="bufferFlags">The used buffer flags.</param>
        /// <param name="viewFlags">The used view flags.</param>
        internal CPUDirectXTexture2DArray(
            CPUAccelerator accelerator,
            Device d3dDevice,
            Texture2D texture,
            DirectXBufferFlags bufferFlags,
            DirectXViewFlags viewFlags)
            : base(accelerator, d3dDevice, texture, bufferFlags, viewFlags)
        {
            var desc        = texture.Description;
            var stagingDesc = new Texture2DDescription()
            {
                ArraySize         = desc.ArraySize,
                MipLevels         = 1,
                Format            = desc.Format,
                OptionFlags       = ResourceOptionFlags.None,
                SampleDescription = new global::SharpDX.DXGI.SampleDescription(1, 0),
                BindFlags         = BindFlags.None,
                CpuAccessFlags    = CpuAccessFlags.Read | CpuAccessFlags.Write,
                Usage             = ResourceUsage.Staging,
                Width             = desc.Width,
                Height            = desc.Height
            };

            stagingTexture = new Texture2D(d3dDevice, stagingDesc);
        }
        private static int[] Process(Template tempate, float[] series, float[][] clusters, float error)
        {
            var clustersHeight = clusters.Length;
            var clustersWidth  = clusters[0].Length;
            var clusters2d     = new float[clustersHeight, clustersWidth];

            for (int i = 0; i < clustersHeight; i++)
            {
                for (int j = 0; j < clustersWidth; j++)
                {
                    clusters2d[i, j] = clusters[i][j];
                }
            }

            using (var context = new Context())
            {
                using (var accelerator = new CPUAccelerator(context))
                {
                    var paintKernel = accelerator.LoadAutoGroupedStreamKernel <Index, Template, ArrayView <float>, ArrayView2D <float>, ArrayView <int>, float>(PaintKernel);
                    using (var seriesBuffer = accelerator.Allocate <float>(series.Count()))
                        using (var clustersBuffer = accelerator.Allocate <float>(clustersHeight, clustersWidth))
                            using (var buffer = accelerator.Allocate <int>(series.Count()))
                            {
                                seriesBuffer.CopyFrom(series, 0, 0, series.Count());
                                clustersBuffer.CopyFrom(clusters2d, new Index2(0, 0), new Index2(0, 0), new Index2(clustersHeight, clustersWidth));
                                paintKernel(clustersHeight, tempate, seriesBuffer, clustersBuffer, buffer, error);

                                accelerator.Synchronize();

                                var data = buffer.GetAsArray();
                                return(data);
                            }
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Detects all available accelerators and prints device information about each
        /// of them on the command line.
        /// </summary>
        static void Main()
        {
            // Create main context
            using (var context = new Context())
            {
                // For each available accelerator...
                foreach (var acceleratorId in Accelerator.Accelerators)
                {
                    // Create default accelerator for the given accelerator id.
                    // Note that all accelerators have to be disposed before the global context is disposed
                    using (var accelerator = Accelerator.Create(context, acceleratorId))
                    {
                        Console.WriteLine($"AcceleratorId: {acceleratorId.AcceleratorType}, {accelerator.Name}");
                        PrintAcceleratorInfo(accelerator);
                        Console.WriteLine();
                    }
                }

                // Accelerators can also be created manually with custom settings.
                // The following code snippet creates a CPU accelerator with 4 threads
                // and highest thread priority.
                using (var accelerator = new CPUAccelerator(context, 4, ThreadPriority.Highest))
                {
                    PrintAcceleratorInfo(accelerator);
                }
            }
        }
Esempio n. 5
0
        static void Main(string[] args)
        {
            using (var context = new Context())
            {
                // For each available accelerator... (without CPU)
                foreach (var acceleratorId in Accelerator.Accelerators.Where(id => id.AcceleratorType != AcceleratorType.CPU))
                {
                    using (var accelerator = Accelerator.Create(context, acceleratorId))
                    {
                        Console.WriteLine($"Performing operations on {accelerator}");

                        Reduce(accelerator);
                        AtomicReduce(accelerator);
                    }
                }

                // Create custom CPU context with a warp size > 1
                using (var accelerator = new CPUAccelerator(context, 4, 4))
                {
                    Console.WriteLine($"Performing operations on {accelerator}");

                    Reduce(accelerator);
                    AtomicReduce(accelerator);
                }
            }
        }
Esempio n. 6
0
 /// <summary>
 /// Constructs a new memory-buffer cache.
 /// </summary>
 /// <param name="accelerator">
 /// The associated accelerator to allocate memory on.
 /// </param>
 /// <param name="initialLength">The initial length of the buffer.</param>
 public CPUMemoryBufferCache(CPUAccelerator accelerator, long initialLength)
     : base(accelerator)
 {
     if (initialLength > 0)
     {
         cache = accelerator.Allocate1D <byte>(initialLength);
     }
 }
Esempio n. 7
0
        /// <summary>
        /// Creates a new CPU accelerator based on the configuration provided via
        /// the environment variable <see cref="CPUKindEnvVariable"/>.
        /// </summary>
        /// <param name="context">The parent context to use.</param>
        /// <returns>The created (parallel) CPU accelerator instance.</returns>
        /// <remarks>
        /// If the environment variables does not exists or does not contain a valid kind
        /// (specified by the <see cref="CPUAcceleratorKind"/> enumeration), this
        /// function creates a simulator compatible with the kind
        /// <see cref="CPUAcceleratorKind.Default"/>.
        /// </remarks>
        private static CPUAccelerator CreateCPUAccelerator(Context context)
        {
            var cpuConfig = Environment.GetEnvironmentVariable(CPUKindEnvVariable);

            if (!Enum.TryParse(cpuConfig, out CPUAcceleratorKind kind))
            {
                kind = CPUAcceleratorKind.Default;
            }
            return(CPUAccelerator.Create(context, kind, CPUAcceleratorMode.Parallel));
        }
        public static int[] Process(IList <Template> tempate,
                                    IList <float[][]> clusterCollection,
                                    float[] series,
                                    float error)
        {
            var templatesCount   = clusterCollection.Count;
            var clustersCount    = clusterCollection.Max(x => x.Length);
            var pointsInClusters = clusterCollection[0][0].Length;

            float[,,] clsts = new float[templatesCount, clustersCount, pointsInClusters];
            for (int k = 0; k < templatesCount; k++)
            {
                for (int i = 0; i < clustersCount; i++)
                {
                    if (i >= clusterCollection[k].Length)
                    {
                        break;
                    }

                    for (int j = 0; j < pointsInClusters; j++)
                    {
                        clsts[k, i, j] = clusterCollection[k][i][j];
                    }
                }
            }

            using (var context = new Context())
            {
                using (var accelerator = new CPUAccelerator(context))
                {
                    var paintKernel = accelerator.LoadAutoGroupedStreamKernel <Index2, ArrayView <Template>, ArrayView <float>, ArrayView3D <float>, ArrayView <int>, float>(PaintKernel2d);
                    using (var seriesBuffer = accelerator.Allocate <float>(series.Count()))
                        using (var templatesBuffer = accelerator.Allocate <Template>(templatesCount))
                            using (var clustersBuffer = accelerator.Allocate <float>(templatesCount, clustersCount, pointsInClusters))
                                using (var buffer = accelerator.Allocate <int>(series.Count()))
                                {
                                    seriesBuffer.CopyFrom(series, 0, 0, series.Count());
                                    clustersBuffer.CopyFrom(clsts, new Index3(0, 0, 0), new Index3(0, 0, 0), new Index3(templatesCount, clustersCount, pointsInClusters));
                                    paintKernel(new Index2(templatesCount, clustersCount), templatesBuffer, seriesBuffer, clustersBuffer, buffer, error);

                                    accelerator.Synchronize();

                                    var data = buffer.GetAsArray();
                                    return(data);
                                }
                }
            }
        }
Esempio n. 9
0
        /// <summary cref="DisposeBase.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            if (disposing)
            {
                CPUAccelerator.Dispose();

                codeGenerationSemaphore.Dispose();
                IRContext.Dispose();

                ILFrontend.Dispose();
                DefautltILBackend.Dispose();

                DebugInformationManager.Dispose();
                TypeContext.Dispose();
            }
            base.Dispose(disposing);
        }
Esempio n. 10
0
        /// <summary>
        /// Constructs a new CPU buffer for DX interop.
        /// </summary>
        /// <param name="accelerator">The target accelerator.</param>
        /// <param name="d3dDevice">The target DX device.</param>
        /// <param name="buffer">The target DX buffer.</param>
        /// <param name="bufferFlags">The buffer flags.</param>
        /// <param name="viewFlags">The registration flags.</param>
        internal CPUDirectXBuffer(
            CPUAccelerator accelerator,
            Device d3dDevice,
            Buffer buffer,
            DirectXBufferFlags bufferFlags,
            DirectXViewFlags viewFlags)
            : base(accelerator, d3dDevice, buffer, bufferFlags, viewFlags)
        {
            cpuMemory = Accelerator.Allocate <T, Index1>(Length);

            var desc = new BufferDescription()
            {
                BindFlags           = BindFlags.None,
                CpuAccessFlags      = CpuAccessFlags.Read | CpuAccessFlags.Write,
                OptionFlags         = ResourceOptionFlags.None,
                SizeInBytes         = ElementSize * Length,
                StructureByteStride = ElementSize,
                Usage = ResourceUsage.Staging,
            };

            stagingBuffer = new Buffer(D3DDevice, desc);
        }
Esempio n. 11
0
        /// <summary>
        /// Demonstrates the use of array views. Operations on array views are
        /// supported on all accelerators.
        /// </summary>
        static void Main()
        {
            // Create main context
            using (var context = new Context())
            {
                // We perform all operations in CPU memory here
                using (var accelerator = new CPUAccelerator(context))
                {
                    using (var buffer = accelerator.Allocate <int>(1024))
                    {
                        // Retrieve a view to the whole buffer.
                        ArrayView <int> bufferView = buffer.View;

                        // Note that accessing an array view which points to memory
                        // that is not accessible in the current context triggers
                        // an invalid access exception.
                        // For instance, array views that point to CUDA memory are
                        // inaccessible from the CPU by default (and vice-versa).
                        // We can ignore this restriction in the current context since we
                        // perform all operations in CPU memory.

                        // Perform some unsafe operations on array views.
                        UnsafeAccess(bufferView);

                        // SubView access
                        SubViewAccess(bufferView);

                        // VariableView access
                        VariableViewAccess(bufferView);

                        // Perform some unsafe operations on variable views.
                        UnsafeVariableViewAccess(bufferView);
                    }
                }
            }
        }
 /// <summary>
 /// Constructs a new CPU DX-interop accelerator.
 /// </summary>
 /// <param name="accelerator">The target CPU accelerator.</param>
 /// <param name="d3dDevice">The target DX device.</param>
 internal CPUDirectXAccelerator(CPUAccelerator accelerator, Device d3dDevice)
     : base(accelerator, d3dDevice)
 {
 }
Esempio n. 13
0
        /// <summary>
        /// Constructs a new ILGPU main context
        /// </summary>
        /// <param name="builder">The parent builder instance.</param>
        /// <param name="devices">The array of accelerator descriptions.</param>
        internal Context(
            Builder builder,
            ImmutableArray <Device> devices)
        {
            InstanceId     = InstanceId.CreateNew();
            TargetPlatform = Backend.RuntimePlatform;
            RuntimeSystem  = new RuntimeSystem();
            Properties     = builder.InstantiateProperties();

            // Initialize verifier
            Verifier = builder.EnableVerifier ? Verifier.Instance : Verifier.Empty;

            // Initialize main contexts
            TypeContext = new IRTypeContext(this);
            IRContext   = new IRContext(this);

            // Initialize intrinsic manager
            IntrinsicManager = builder.IntrinsicManager;

            // Create frontend
            DebugInformationManager frontendDebugInformationManager =
                Properties.DebugSymbolsMode > DebugSymbolsMode.Disabled
                ? DebugInformationManager
                : null;

            ILFrontend = builder.EnableParallelCodeGenerationInFrontend
                ? new ILFrontend(this, frontendDebugInformationManager)
                : new ILFrontend(this, frontendDebugInformationManager, 1);

            // Create default IL backend
            DefautltILBackend = new DefaultILBackend(this);

            // Initialize default transformer
            ContextTransformer = Optimizer.CreateTransformer(
                Properties.OptimizationLevel,
                TransformerConfiguration.Transformed,
                Properties.InliningMode);

            // Initialize the default CPU device
            CPUAccelerator = new CPUAccelerator(
                this,
                CPUDevice.Implicit,
                CPUAcceleratorMode.Parallel,
                ThreadPriority.Lowest);

            // Initialize all devices
            Devices = devices;
            if (devices.IsDefaultOrEmpty)
            {
                // Add a default CPU device
                Devices = ImmutableArray.Create <Device>(CPUDevice.Default);
            }

            // Create a mapping
            deviceMapping = new Dictionary <AcceleratorType, List <Device> >(Devices.Length);
            foreach (var device in Devices)
            {
                if (!deviceMapping.TryGetValue(device.AcceleratorType, out var devs))
                {
                    devs = new List <Device>(8);
                    deviceMapping.Add(device.AcceleratorType, devs);
                }
                devs.Add(device);
            }
        }
Esempio n. 14
0
 /// <summary>
 /// Constructs a new memory-buffer cache.
 /// </summary>
 /// <param name="accelerator">
 /// The associated accelerator to allocate memory on.
 /// </param>
 public CPUMemoryBufferCache(CPUAccelerator accelerator)
     : this(accelerator, 0)
 {
 }
 public DirectXInteropAccelerator CreateCPUExtension(CPUAccelerator accelerator)
 {
     return(new CPUDirectXAccelerator(accelerator, D3DDevice));
 }
Esempio n. 16
0
 public RadixSortPairsProviderImplementation CreateCPUExtension(CPUAccelerator accelerator)
 {
     return(new CPU.CPURadixSortPairsProviderImplementation(accelerator));
 }
        //Note: This program runs a *lot* faster in Release mode than Debug mode (because bounds checking is disabled in ILGPU).
        static void Main(string[] args)
        {
            //needed for the other method (Parallel.For)
            //int maxLength = 0;
            //long iterations = 0;

            long originalMin = 0;

            //As a note, it takes around 2 hours on CPU (Core i7 4790K) to search ~120B numbers
            long min = 0;
            long max = 113373373373;

            //Length of an array of longs.
            //Since longs are int64, multiply by ~8 for actual memory use in bytes
            const long allocatedMemory = 200000000;

            //Cache to store results that work in. As this grows, so does the time to search it for good results.
            const int resultCacheSize = 100;

            //Stores the minimum value for the specific depth
            long minForMax = long.MaxValue;

            //The chain length to search for:
            //E.G. 100 (OneHundred) -> 10 (Ten) -> 3 (Three) -> 5 (Five) -> 4 [end] is of length 4.
            const int chainLength = 8;

            //Include punctuation in the count or not.
            //E.G., With punctuation 137 -> "One Hundred and Thirty-Seven" (28 characters)
            //& Without punctuation, 137 -> "OneHundredandThirtySeven" (24 characters)
            const bool includePunctuation = false;

            //Stop when one number is found with specified chain length (obviously invalidates the percentage count)
            const bool stopAtOneFound = false;

            //min is increased as the program runs, so we need a copy of its original value for calculating the % done.
            originalMin = min;

            using (var context = new Context())
            {
                Accelerator acc;

                try
                {
                    acc = new CudaAccelerator(context);
                }
                catch (Exception)
                {
                    //no cuda
                    acc = new CPUAccelerator(context);
                }

                var a = acc;
                Console.WriteLine("Performing ops on " + a.Name + ". " + a.NumMultiprocessors.ToString() + " processors.");

                //Set up two kernels to get the data
                var searchKernel = a.LoadAutoGroupedStreamKernel <Index, ArrayView <UInt64>, long, long, bool>(SearchForChain);
                var resultKernel = a.LoadAutoGroupedStreamKernel <Index, ArrayView <UInt64>, ArrayView <UInt64> >(FindNonZero);

                using (var buffa = a.Allocate <UInt64>((int)allocatedMemory))
                {
                    using (var buffb = a.Allocate <UInt64>(resultCacheSize))
                    {
                        //Loop while we haven't gone over the maximum value in search range
                        while (min < max)
                        {
                            //Search for numbers first (Kernel)
                            searchKernel((int)allocatedMemory, buffa.View, min, chainLength, !includePunctuation);
                            a.Synchronize();

                            //Read back array to find nonzero entries (Kernel)
                            resultKernel(resultCacheSize, buffb, buffa);
                            a.Synchronize();

                            var  arr   = buffb.GetAsArray();
                            bool found = false;

                            //Read back the results array for nonzero entries (Normal .net)
                            for (int i = 0; i < buffb.Length; i++)
                            {
                                if (arr[i] != 0)
                                {
                                    found = true;
                                    Console.WriteLine(arr[i]);
                                    if (arr[i] < (ulong)minForMax)
                                    {
                                        minForMax = (long)arr[i];
                                    }
                                }
                            }

                            //break if we have found a number and had to stop at one found
                            if (found && stopAtOneFound)
                            {
                                break;
                            }
                            min += allocatedMemory;
                            long total = max - originalMin;
                            long diff  = min - originalMin;

                            //For displaying the percentage complete
                            Console.WriteLine((((decimal)diff / (decimal)total) * 100).ToString() + "% complete");
                        }
                    }
                }
            }


            //The code commented below is for doing this via a Parallel.for.

            /*
             * Parallel.For(min,max,(i)=> {
             *  int chainLength = searchGPU(i,true);
             *  if (chainLength > maxLength)
             *  {
             *      maxLength = chainLength;
             *      Console.WriteLine(NumberToString(i) + "   <=>   (" + i.ToString() + ") gave chain length: " + chainLength.ToString());
             *      minForMax = long.MaxValue;
             *  }
             *
             *  if (chainLength == maxLength && Math.Abs(i) < Math.Abs(minForMax))
             *  {
             *      minForMax = i;
             *      Console.WriteLine(i.ToString() + " was a better candidate for " + chainLength.ToString());
             *  }
             *
             *  iterations++;
             *  if (iterations % ((max - min) / 10000) == 0)
             *  {
             *
             *      decimal percent = (decimal)iterations / (decimal)((max - min));
             *
             *      Console.WriteLine((percent * 100)+" Percent done.");
             *  }
             * });*/

            Console.WriteLine(NumberToString(minForMax) + "   <=>   (" + minForMax.ToString() + ") gave chain length: " + chainLength.ToString());
        }
Esempio n. 18
0
 public ScanProviderImplementation CreateCPUExtension(CPUAccelerator accelerator)
 {
     return(new CPU.CPUScanProviderImplementation(accelerator));
 }