Beispiel #1
0
 private RenderKernel(ComputeContext context, ComputeKernel kernel, string[] sourcecodes, Dictionary<string, string> defines)
 {
     _context = context;
     _kernel = kernel;
     _sourcecodes = sourcecodes;
     _defines = defines;
 }
Beispiel #2
0
        internal OpenCLKernel(OpenCLProgram program, ComputeKernel kernel)
        {
            Contract.Requires(program != null);
            Contract.Requires(kernel != null);

            Program = program;
            ComputeKernel = kernel;
        }
 public override void ApplyToKernel(ComputeKernel kernel, bool useDouble, ref int startIndex)
 {
     if (useDouble)
     {
         kernel.SetValueArgument(startIndex++, new Vector4d(_position));
         kernel.SetValueArgument(startIndex++, new Vector4d(_lookat));
         kernel.SetValueArgument(startIndex++, new Vector4d(_up));
     }
     else
     {
         kernel.SetValueArgument(startIndex++, new Vector4((Vector3)_position));
         kernel.SetValueArgument(startIndex++, new Vector4((Vector3)_lookat));
         kernel.SetValueArgument(startIndex++, new Vector4((Vector3)_up));
     }
 }
Beispiel #4
0
 public override void ApplyToKernel(ComputeKernel kernel, bool isDouble, ref int startIndex)
 {
     if (isDouble)
     {
         kernel.SetValueArgument(startIndex++, _x);
         kernel.SetValueArgument(startIndex++, _y);
         kernel.SetValueArgument(startIndex++, _zoom);
     }
     else
     {
         kernel.SetValueArgument(startIndex++, (float)_x);
         kernel.SetValueArgument(startIndex++, (float)_y);
         kernel.SetValueArgument(startIndex++, (float)_zoom);
     }
 }
Beispiel #5
0
 public void SetupDevice(params string[] kernelNames) {
     try {
         this.program.Build(new[] { device }, string.Empty, null, IntPtr.Zero);
     }
     catch (Exception) {
         Tracer.TraceLine(this.program.GetBuildLog(ComputePlatform.Platforms[0].Devices[0]));
         throw;
     }
     if (kernelNames.Length > 1)
     {
         kernels = program.CreateAllKernels().ToDictionary(item => item.FunctionName);
     }
     else
     {
         kernel = program.CreateKernel(kernelNames[0]);                
     }
     commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
 }
Beispiel #6
0
        private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
        {
            var todos = GetQueenTaskPartition(NumQueens, 4);
            var done = new List<QueenTask>();

            ComputeEventList eventList = new ComputeEventList();

            var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);

            Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);

            QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);

            var sw = new Stopwatch();
            sw.Start();

            while (inProgress.Any())
            {
                var taskBuffer =
                    new ComputeBuffer<QueenTask>(context,
                        ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                        inProgress);

                kernel.SetMemoryArgument(0, taskBuffer);
                commands.WriteToBuffer(inProgress, taskBuffer, false, null);

                for (int i = 0; i < 12; i++)
                    commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);

                commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
                commands.Finish();

                inProgress = GetNextAssignment(inProgress, todos, done);
            }

            sw.Stop();

            Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);

            ulong sum = done.Select(state => state.solutions)
                            .Aggregate((total, next) => total + next);

            Console.WriteLine("Q({0})={1}", NumQueens, sum);
        }
		public OpenCLPasswordMatcher ()
		{
			if (ComputePlatform.Platforms.Count == 0) {
				Console.WriteLine ("Cound not find any OpenCL platforms");
				Environment.Exit (1);
			}

			var platform = ComputePlatform.Platforms [0];

			logger.Info ("Found {0} computing devices:", platform.Devices.Count);

			foreach (var d in platform.Devices) {
				logger.Info ("* {0}", d.Name);
			}

			Context = new ComputeContext (ComputeDeviceTypes.All,	
				new ComputeContextPropertyList (platform), null, IntPtr.Zero);

			Device = Context.Devices [0];

			logger.Info ("Using first device.");

			// load opencl source
			StreamReader streamReader = new StreamReader (MD5_OPENCL_FILE);
			string clSource = streamReader.ReadToEnd ();
			streamReader.Close ();

			// create program with opencl source
			ComputeProgram program = new ComputeProgram (Context, clSource);

			// compile opencl source
			try {
				program.Build (null, null, null, IntPtr.Zero);
			} catch (Exception e) {
				logger.Error ("Build log: " + program.GetBuildLog(Device));
				throw e;
			}

			// load chosen kernel from program
			Kernel = program.CreateKernel ("crackMD5");
		}
Beispiel #8
0
            public static void UpdateArguments(Mat frame, ComputeContext ctx, ComputeKernel k, int WindowValue)
            {
                if (frame.Width * frame.Height != Length)
                {
                    // alokuj pamiec.
                    maxBuffer = new int[frame.Cols];
                    minBuffer = new int[frame.Cols];

                    minBufferCB = new ComputeBuffer <int>(ctx, ComputeMemoryFlags.WriteOnly, minBuffer.Length);
                    maxBufferCB = new ComputeBuffer <int>(ctx, ComputeMemoryFlags.WriteOnly, maxBuffer.Length);

                    Length = frame.Width * frame.Height;
                }

                if (frameDataHandler.IsAllocated)
                {
                    frameDataHandler.Free();
                }

                if (frameBuffer != null)
                {
                    frameBuffer.Dispose();
                }

                frameData        = new byte[frame.Width * frame.Height];
                frameDataHandler = GCHandle.Alloc(frameData, GCHandleType.Pinned);

                // ustaw parametry alokacji pamieci.
                frameBuffer = new ComputeBuffer <byte>(ctx,
                                                       ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, frame.Width * frame.Height, frameDataHandler.AddrOfPinnedObject());


                frame.CopyTo(frameData);
                k.SetMemoryArgument(0, frameBuffer);
                k.SetValueArgument <int>(1, frame.Rows);
                k.SetValueArgument <int>(2, frame.Cols);
                k.SetMemoryArgument(3, maxBufferCB);
                k.SetMemoryArgument(4, minBufferCB);
                k.SetValueArgument <int>(5, WindowValue);
            }
Beispiel #9
0
        /// <summary>
        /// Compile the kernel with a map of preprocessor defines, a collection of
        /// name-value pairs.
        /// </summary>
        ///
        /// <param name="options">A map of preprocessor defines.</param>
        public void Compile(IDictionary <String, String> options)
        {
            // clear out any old program

            if (this.program != null)
            {
                this.program.Dispose();
                this.kernel.Dispose();
            }

            // Create the program from the source code

            this.program = new ComputeProgram(this.context, this.cl);

            if (options.Count > 0)
            {
                StringBuilder builder = new StringBuilder();
                /* foreach */
                foreach (KeyValuePair <String, String> obj in options)
                {
                    if (builder.Length > 0)
                    {
                        builder.Append(" ");
                    }
                    builder.Append("-D ");
                    builder.Append(obj.Key);
                    builder.Append("=");
                    builder.Append(obj.Value);
                }

                program.Build(null, builder.ToString(), null, IntPtr.Zero);
            }
            else
            {
                program.Build(null, null, null, IntPtr.Zero);
            }

            // Create the kernel
            this.kernel = Program.CreateKernel(this.kernelName);
        }
        private void BuildEthashProgram()
        {
            ComputeDevice computeDevice = OpenCLDevice.GetComputeDevice();

            try { mProgramArrayMutex.WaitOne(5000); } catch (Exception) { }

            if (mEthashProgramArray.ContainsKey(new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }))
            {
                mEthashProgram      = mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }];
                mEthashDAGKernel    = mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }];
                mEthashSearchKernel = mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }];
            }
            else
            {
                mEthashProgram = BuildProgram("ethash_lbry", mEthashLocalWorkSizeArray[0], "-O1", "", "");
                mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]      = mEthashProgram;
                mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]    = mEthashDAGKernel = mEthashProgram.CreateKernel("GenerateDAG");
                mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashSearchKernel = mEthashProgram.CreateKernel("search");
            }

            try { mProgramArrayMutex.ReleaseMutex(); } catch (Exception) { }
        }
Beispiel #11
0
        private void BuildEthashProgram()
        {
            ComputeDevice computeDevice = OpenCLDevice.GetComputeDevice();

            try { mProgramArrayMutex.WaitOne(5000); } catch (Exception) { }

            if (mEthashProgramArray.ContainsKey(new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }))
            {
                mEthashProgram      = mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }];
                mEthashDAGKernel    = mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }];
                mEthashSearchKernel = mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }];
            }
            else
            {
                String source = System.IO.File.ReadAllText(@"Kernels\ethash_lbry.cl");
                mEthashProgram = new ComputeProgram(Context, source);
                MainForm.Logger(@"Loaded Kernels\ethash_lbry.cl for Device #" + DeviceIndex + ".");
                String buildOptions = (OpenCLDevice.GetVendor() == "AMD"    ? "-O1 " :
                                       OpenCLDevice.GetVendor() == "NVIDIA" ? "" : // "-cl-nv-opt-level=1 -cl-nv-maxrregcount=256 " :
                                       "")
                                      + " -IKernels -DWORKSIZE=" + mEthashLocalWorkSizeArray[0];
                try
                {
                    mEthashProgram.Build(OpenCLDevice.DeviceList, buildOptions, null, IntPtr.Zero);
                }
                catch (Exception)
                {
                    MainForm.Logger(mEthashProgram.GetBuildLog(computeDevice));
                    throw;
                }
                MainForm.Logger("Built Ethash program for Device #" + DeviceIndex + ".");
                MainForm.Logger("Build options: " + buildOptions);
                mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]      = mEthashProgram;
                mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]    = mEthashDAGKernel = mEthashProgram.CreateKernel("GenerateDAG");
                mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashSearchKernel = mEthashProgram.CreateKernel("search");
            }

            try { mProgramArrayMutex.ReleaseMutex(); } catch (Exception) { }
        }
        // --- GrassComponent ---
        private void InitializeMaterial()
        {
            string _QuadKernelName = "FindQuad";

            _treeMaterial = new Material(TreeShader);

            // ********************** Grass material **********************
            _treeMaterial.SetBuffer(ShaderID.treeBuffer, _megaBuffer);
            TestMat.SetBuffer("_Buffer", _closeMegaBuffer);

            _treeMaterial.SetTexture(ShaderID.treeTexture, _treeTextures);
            _treeMaterial.SetTexture(ShaderID.perlinNoise, PerlinNoise);
            _treeMaterial.SetTexture(ShaderID.colorVariance, PerlinNoise);

            _treeMaterial.SetVectorArray(ShaderID.minMaxWidthHeight, _minMaxWidthHeight);
            List <Vector4> quads            = new List <Vector4>();
            ComputeBuffer  smallestQuad     = new ComputeBuffer(1, sizeof(float) * 4, ComputeBufferType.Append);
            ComputeKernel  findSmallestQuad = new ComputeKernel(_QuadKernelName, ComputeShader);

            foreach (TerrainTextures terrain in TreeTextures)
            {
                var front = GetBillboardTexture(Sides.Front, terrain.FeatureTexture);
                var side  = GetBillboardTexture(Sides.Side, terrain.FeatureTexture);
                var top   = GetBillboardTexture(Sides.Top, terrain.FeatureTexture);


                var frontXY = CalcSide(front, findSmallestQuad, smallestQuad);
                var sideXY  = CalcSide(side, findSmallestQuad, smallestQuad);
                var topXY   = CalcSide(top, findSmallestQuad, smallestQuad);

                quads.Add(frontXY / front.width);
                quads.Add(sideXY / side.width);
                quads.Add(topXY / top.width);
            }

            smallestQuad.SafeRelease();
            _treeMaterial.SetVectorArray(ShaderID.quads, quads.ToArray());
            //Debug.LogFormat("Generated Mesh");
        }
 static void Main(string[] args)
     {
         int[] r1 = new int[]
             {1, 2, 3, 4};
         int[] r2 = new int[]
             {4, 3, 2, 1};
         int rowSize = r1.Length;
         // pick first platform
         ComputePlatform platform = ComputePlatform.Platforms[0];
         // create context with all gpu devices
         ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
             new ComputeContextPropertyList(platform), null, IntPtr.Zero);
         // create a command queue with first gpu found
         ComputeCommandQueue queue = new ComputeCommandQueue(context,
             context.Devices[0], ComputeCommandQueueFlags.None);
         // load opencl source and
         // create program with opencl source
         ComputeProgram program = new ComputeProgram(context, CalculateKernel);
         // compile opencl source
         program.Build(null, null, null, IntPtr.Zero);
         // load chosen kernel from program
         ComputeKernel kernel = program.CreateKernel("Calc");
         // allocate a memory buffer with the message (the int array)
         ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context,
             ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);
         // allocate a memory buffer with the message (the int array)
         ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context,
             ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);
         kernel.SetMemoryArgument(0, row1Buffer); // set the integer array
         kernel.SetMemoryArgument(1, row2Buffer); // set the integer array
         kernel.SetValueArgument(2, rowSize); // set the array size
             // execute kernel
         queue.ExecuteTask(kernel, null);
         // wait for completion
         queue.Finish();
         Console.WriteLine("Finished");
         Console.ReadKey();
     }
Beispiel #14
0
        public void InitializeComponents()
        {
            //ceb = new ComputeEventList();
            time3 = new Stopwatch();
            time3.Start();

            p_dataInitialization = new ComputeProgram(context, s_dataInitialization);
            p_dataInitialization.Build(devices, "-g", null, IntPtr.Zero);
            p_reorder = new ComputeProgram(context, s_reorder);
            p_reorder.Build(devices, "-g", null, IntPtr.Zero);
            p_elementCount = new ComputeProgram(context, s_elementCount);
            p_elementCount.Build(devices, "-g", null, IntPtr.Zero);
            p_prefixSum = new ComputeProgram(context, s_prefixSum);
            p_prefixSum.Build(devices, "-g", null, IntPtr.Zero);
            p_ccArrayCreation = new ComputeProgram(context, s_ccArrayCreation);
            p_ccArrayCreation.Build(devices, "-g", null, IntPtr.Zero);
            p_radixSort = new ComputeProgram(context, s_radixSort);
            p_radixSort.Build(devices, "-g", null, IntPtr.Zero);

            p_count = time3.ElapsedMilliseconds;

            k_start = time3.ElapsedMilliseconds;

            k_dataInitialization = p_dataInitialization.CreateKernel("dataInitialization");
            k_reorder            = p_reorder.CreateKernel("reorder");
            k_elementCount       = p_elementCount.CreateKernel("elementCount");
            k_prefixSum          = p_prefixSum.CreateKernel("prefixSum");
            k_ccArrayCreation    = p_ccArrayCreation.CreateKernel("ccArrayCreation");
            kernel_block_sort    = p_radixSort.CreateKernel("clBlockSort");
            kernel_block_scan    = p_radixSort.CreateKernel("clBlockScan");
            kernel_block_prefix  = p_radixSort.CreateKernel("clBlockPrefix");
            kernel_reorder       = p_radixSort.CreateKernel("clReorder");

            k_count = time3.ElapsedMilliseconds - k_start;

            Console.WriteLine("TIME SPENT INITIALIZING AND BUILDING PROGRAMS: " + p_count + "ms");
            Console.WriteLine("TIME SPENT CREATING KERNELS: " + k_count + "ms");
        }
Beispiel #15
0
        /// <summary>
        /// Executes the specified kernel function name.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="functionName">Name of the function.</param>
        /// <param name="inputs">The inputs.</param>
        /// <param name="returnInputVariable">The return result.</param>
        /// <returns></returns>
        /// <exception cref="ExecutionException">
        /// </exception>
        public override void Execute <TSource>(string functionName, params object[] args)
        {
            ComputeKernel       kernel   = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName));
            ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None);

            if (kernel == null)
            {
                throw new ExecutionException(string.Format("Kernal function {0} not found", functionName));
            }

            try
            {
                var ndobject = (TSource[])args.FirstOrDefault(x => (x.GetType() == typeof(TSource[])));

                long length = ndobject != null ? ndobject.Length : 1;

                var buffers = BuildKernelArguments <TSource>(args, kernel, length);
                commands.Execute(kernel, null, new long[] { length }, null, null);

                foreach (var item in buffers)
                {
                    TSource[] r = (TSource[])args[item.Key];
                    commands.ReadFromBuffer(item.Value, ref r, true, null);
                    //args[item.Key] = r;
                    item.Value.Dispose();
                }

                commands.Finish();
            }
            catch (Exception ex)
            {
                throw new ExecutionException(ex.Message);
            }
            finally
            {
                commands.Dispose();
            }
        }
        public OpenCLEthashMiner(ComputeDevice aDevice, int aDeviceIndex, EthashStratum aStratum)
            : base(aDevice, aDeviceIndex, "Ethash")
        {
            mStratum        = aStratum;
            mGlobalWorkSize = 4096 * mLocalWorkSize * Device.MaxComputeUnits;

            mProgram = new ComputeProgram(this.Context, System.IO.File.ReadAllText(@"Kernels\ethash.cl"));
            //mProgram = new ComputeProgram(this.Context, new List<byte[]> { System.IO.File.ReadAllBytes(@"BinaryKernels\ethash-newEllesmeregw192l8.bin") }, new List<ComputeDevice> { Device });
            MainForm.Logger("Loaded ethash program for Device #" + aDeviceIndex + ".");
            List <ComputeDevice> deviceList = new List <ComputeDevice>();

            deviceList.Add(Device);
            mProgram.Build(deviceList, "-DWORKSIZE=" + mLocalWorkSize, null, IntPtr.Zero);
            MainForm.Logger("Built ethash program for Device #" + aDeviceIndex + ".");
            mDAGKernel = mProgram.CreateKernel("GenerateDAG");
            MainForm.Logger("Created DAG kernel for Device #" + aDeviceIndex + ".");
            mSearchKernel = mProgram.CreateKernel("search");
            MainForm.Logger("Created search kernel for Device #" + aDeviceIndex + ".");

            mMinerThread = new Thread(new ThreadStart(MinerThread));
            mMinerThread.IsBackground = true;
            mMinerThread.Start();
        }
Beispiel #17
0
        void Setargument(ComputeKernel kernel, int index, object arg)
        {
            if (arg == null)
            {
                throw new ArgumentException("Argument " + index + " is null");
            }

            Type argtype = arg.GetType();

            if (argtype.IsArray)
            {
                ComputeMemory messageBuffer = (ComputeMemory)Activator.CreateInstance(typeof(ComputeBuffer <>).MakeGenericType(argtype.GetElementType()), new object[]
                {
                    context,
                    ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                    arg
                });
                kernel.SetMemoryArgument(index, messageBuffer); // set the array
            }
            else
            {
                typeof(ComputeKernel).GetMethod("SetValueArgument").MakeGenericMethod(argtype).Invoke(kernel, new object[] { index, arg });
            }
        }
Beispiel #18
0
        // 26 ms 4096x4096@512 iter with 1024 cores
        static void SetupCUDA(string sourceFile)
        {
            //var watch = System.Diagnostics.Stopwatch.StartNew();
            // pick first platform
            platform = ComputePlatform.Platforms[0];

            // create context with all gpu devices
            context = new ComputeContext(ComputeDeviceTypes.Gpu,
                                         new ComputeContextPropertyList(platform), null, IntPtr.Zero); // LEAK

            // create a command queue with first gpu found
            queue = new ComputeCommandQueue(context,
                                            context.Devices[0], ComputeCommandQueueFlags.None);

            // load opencl source
            using (var streamReader = new StreamReader(sourceFile))
            {
                string clSource = streamReader.ReadToEnd();

                // create program with opencl source
                program = new ComputeProgram(context, clSource);

                // compile opencl source
                program.Build(null, null, null, IntPtr.Zero);

                // load chosen kernel from program
                kernel = program.CreateKernel("mandel");

                // allocate a memory buffer with the message
                messageBuffer = new ComputeBuffer <int>(context,
                                                        ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, message);
                gradientBuffer = new ComputeBuffer <int>(context,
                                                         ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, gradient);
                streamReader.Close();
            }
        }
Beispiel #19
0
        void DestroyClBuffers()
        {
            if (clImage != null)
            {
                clImage.Dispose();
                clImage = null;
            }

            if (result != null)
            {
                result.Dispose();
                result = null;
            }
            if (cmap != null)
            {
                cmap.Dispose();
                cmap = null;
            }
            if (clCommands != null)
            {
                clCommands.Dispose();
                clCommands = null;
            }
            if (clKernel != null)
            {
                clKernel.Dispose();
                clKernel = null;
            }
            if (clProgram != null)
            {
                clProgram.Dispose();
                clProgram = null;
            }

            clDirty = true;
        }
Beispiel #20
0
        public ComputeKernel CreateKernel(object kernelInstance)
        {
            string kernelName = kernelInstance.GetType().Name;

            if (HardwareAccelerationEnabled)
            {
                IKernel program = KernelManager.LoadKernel(kernelName);

                // Create and build the opencl program.
                var computeProgram = new ComputeProgram(_context, program.Code);
                computeProgram.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = computeProgram.CreateKernel("Run");

                int index = 0;

                foreach (string key in _intComputeBuffers.Keys)
                {
                    kernel.SetMemoryArgument(index, _intComputeBuffers[key]);

                    index++;
                }

                foreach (string key in _floatComputeBuffers.Keys)
                {
                    kernel.SetMemoryArgument(index, _floatComputeBuffers[key]);

                    index++;
                }

                return(kernel);
            }

            return(null);
        }
Beispiel #21
0
 public void Recompile()
 {
     lock (_kernelLock)
     {
         var newKernel = Compile(_context, _sourcecodes, _defines);
         Dispose();
         int useDoubleDefine;
         _useDouble = _defines.ContainsKey("UseDouble") && int.TryParse(_defines["UseDouble"], out useDoubleDefine) && useDoubleDefine != 0;
         _kernel = newKernel;
     }
 }
 /// <summary>
 /// Release this kernel.
 /// </summary>
 ///
 public virtual void Release()
 {
     if (this.program != null)
     {
         this.program.Dispose();
         this.kernel.Dispose();
         this.program = null;
         this.kernel = null;
     }
 }
 public override void ApplyToKernel(ComputeKernel kernel, bool isDouble, ref int param)
 {
     base.ApplyToKernel(kernel, isDouble, ref param);
     if (isDouble)
     {
         kernel.SetValueArgument(param++, Fov);
         kernel.SetValueArgument(param++, MoveSpeed * 3);
         kernel.SetValueArgument(param++, Frame);
     }
     else
     {
         kernel.SetValueArgument(param++, (float)Fov);
         kernel.SetValueArgument(param++, (float)MoveSpeed * 3);
         kernel.SetValueArgument(param++, Frame);
     }
 }
Beispiel #24
0
 ///// <summary>Number of arguments</summary>
 //private int nArgs = 0;
 ///// <summary>Gets how many arguments this kernel has</summary>
 //public int NumberOfArguments { get { return nArgs; } }
 /// <summary>Creates a new Kernel</summary>
 /// <param name="KernelName"></param>
 public Kernel(string KernelName)
 {
     kernel = Prog.CreateKernel(KernelName);
 }
Beispiel #25
0
 public void Exec2D(ComputeKernel kernel, long global1, long global2, long local1, long local2, ICollection <ComputeEventBase> events)
 {
     Queue.Execute(kernel, null, new[] { global1, global2 }, new[] { local1, local2 }, events);
 }
Beispiel #26
0
 public void Exec1D(ComputeKernel kernel, long global, long local)
 {
     Queue.Execute(kernel, null, new[] { global }, new[] { local }, null);
 }
        /// <summary>
        /// OpenCL関係の準備をする
        /// </summary>
        static void InitializeOpenCL(Real[] result, Real[] left, Real[] right)
        {
            // プラットフォームを取得
            var platform = ComputePlatform.Platforms[0];
            Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version);

            // コンテキストを作成
            var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // 利用可能なデバイス群を取得
            var devices = context.Devices;
            Console.WriteLine("デバイス数:{0}", devices.Count);

            // キューを作成
            queue = new ComputeCommandQueue(context, devices[0], ComputeCommandQueueFlags.None);

            // 利用可能なデバイスすべてに対して
            for(int i = 0; i < devices.Count; i++)
            {
                var device = devices[i];

                // デバイス情報を表示
                Console.WriteLine("* {0} ({1})", device.Name, device.Vendor);
            }

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.HeavyWorkItem);

            // ビルドしてみて
            try
            {
                string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float");

                program.Build(devices,
                    string.Format(" -D REAL={0} -D REALV={0}{1} -D VLOADN=vload{1} -D VSTOREN=vstore{1} -D COUNT_PER_WORKITEM={2} -Werror", realString, VECTOR_COUNT, COUNT_PER_WORKITEM),
                    null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // ログを表示して例外を投げる
                throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex);
            }

            // カーネルを作成
            addOneElement =  program.CreateKernel("AddOneElement");
            addOneVector = program.CreateKernel("AddOneVector");
            addMoreElement = program.CreateKernel("AddMoreElement");
            addMoreVector = program.CreateKernel("AddMoreVector");

            // バッファーを作成
            bufferLeft = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, left);
            bufferRight = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, right);
            bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, result);
        }
Beispiel #28
0
        /// <summary>
        /// Attempts to initialize OpenCL for the selected GPU.
        /// </summary>
        private void InitializeOpenCL()
        {
            // only initialize once
            if (clKernel != null)
                return;

            // select the device we've been instructed to use
            clDevice = ComputePlatform.Platforms
                .SelectMany(i => i.Devices)
                .SingleOrDefault(i => i.Handle.Value == Gpu.CLDeviceHandle.Value);

            // context we'll be working underneath
            clContext = new ComputeContext(new ComputeDevice[] { clDevice }, new ComputeContextPropertyList(clDevice.Platform), null, IntPtr.Zero);

            // queue to control device
            clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None);

            // buffers to store kernel output
            clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);
            clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);

            // kernel code
            string kernelCode;
            using (var rdr = new StreamReader(GetType().Assembly.GetManifestResourceStream("BitMaker.Miner.Gpu.DiabloMiner.cl")))
                kernelCode = rdr.ReadToEnd();

            clProgram = new ComputeProgram(clContext, kernelCode);

            try
            {
                // build kernel for device
                clProgram.Build(new ComputeDevice[] { clDevice }, "-D WORKSIZE=" + clDevice.MaxWorkGroupSize, null, IntPtr.Zero);
            }
            catch (ComputeException)
            {
                throw new Exception(clProgram.GetBuildLog(clDevice));
            }

            clKernel = clProgram.CreateKernel("search");
        }
Beispiel #29
0
        /// <summary>
        /// Entry point for a standard work thread.
        /// </summary>
        private void WorkThread()
        {
            InitializeOpenCL();

            try
            {
                // continue working until canceled
                while (!cts.IsCancellationRequested)
                    Work(Context.GetWork(this, GetType().Name));
            }
            catch (OperationCanceledException)
            {
                // ignore
            }

            clQueue.Finish();

            clKernel.Dispose();
            clKernel = null;

            clBuffer0.Dispose();
            clBuffer0 = null;

            clBuffer1.Dispose();
            clBuffer1 = null;

            clQueue.Dispose();
            clQueue = null;

            clDevice = null;

            clProgram.Dispose();
            clProgram = null;

            clContext.Dispose();
            clContext = null;
        }
Beispiel #30
0
 public abstract void ApplyToKernel(ComputeKernel kernel, bool useDouble, ref int startIndex);
Beispiel #31
0
        /// <summary>
        /// Attempts to initialize OpenCL for the selected GPU.
        /// </summary>
        internal void InitializeOpenCL()
        {
            // only initialize once
            if (clKernel != null)
                return;

            // unused memory so Cloo doesn't break with a null ptr
            var userDataPtr = Marshal.AllocCoTaskMem(512);

            try
            {
                clDevice = Gpu.CLDevice;

                // context we'll be working underneath
                clContext = new ComputeContext(
                    new[] { clDevice },
                    new ComputeContextPropertyList(clDevice.Platform),
                    (p1, p2, p3, p4) => { },
                    userDataPtr);

                // queue to control device
                clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None);

                // buffers to store kernel output
                clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);
                clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16);

                // obtain the program
                clProgram = new ComputeProgram(clContext, Gpu.GetSource());

                var b = new StringBuilder();
                if (Gpu.WorkSize > 0)
                    b.Append(" -D WORKSIZE=").Append(Gpu.WorkSize);
                if (Gpu.HasBitAlign)
                    b.Append(" -D BITALIGN");
                if (Gpu.HasBfiInt)
                    b.Append(" -D BFIINT");

                try
                {
                    // build kernel for device
                    clProgram.Build(new[] { clDevice }, b.ToString(), (p1, p2) => { }, userDataPtr);
                }
                catch (ComputeException)
                {
                    throw new Exception(clProgram.GetBuildLog(clDevice));
                }

                clKernel = clProgram.CreateKernel("search");
            }
            finally
            {
                Marshal.FreeCoTaskMem(userDataPtr);
            }
        }
Beispiel #32
0
                /// <summary>Sets this variable as an argument for a kernel</summary>
                /// <param name="ArgIndex">Index of kernel argument</param>
                /// <param name="Kernel">Kernel to receive argument</param>
                public void SetAsArgument(int ArgIndex, ComputeKernel Kernel)
                {
                    //Is this a buffer object?
                    if (this is Variable)
                    {
                        Variable v = (Variable)this;
                        if (v.CreatedFromGLBuffer && (!v.AcquiredInOpenCL))
                        {
                            throw new Exception("Attempting to use a variable created from OpenGL buffer without acquiring. Should use CLGLInteropFunctions to properly acquire and release these variables");
                        }
                    }

                    Kernel.SetMemoryArgument(ArgIndex, VarPointer);
                }
Beispiel #33
0
        bool useGPU = true; // GPU code enabled (from commandline)

        #endregion Fields

        #region Methods

        // initialize renderer: takes in command line parameters passed by template code
        public void Init( int rt, bool gpu, int platformIdx )
        {
            // pass command line parameters
            runningTime = rt;
            useGPU = gpu;
            gpuPlatform = platformIdx;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera( screen.width, screen.height );

            // Generate randoms
            Console.Write("Generating randoms....\t");

            randoms = new float[1000];
            Random r = RTTools.GetRNG();
            for (int i = 0; i < 1000; i++)
            randoms[i] = (float)r.NextDouble();

            int variable = r.Next();

            Console.WriteLine("Done!");

            // initialize required opencl things if gpu is used
            if (useGPU)
            {
            StreamReader streamReader = new StreamReader("../../kernel.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();

            platform = ComputePlatform.Platforms[0];
            context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);
            queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            program = new ComputeProgram(context, clSource);
            try
            {
                program.Build(null, null, null, IntPtr.Zero);
                kernel = program.CreateKernel("Main");

                sceneBuffer = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.toCL());
                rndBuffer = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms);
                cameraBuffer = new ComputeBuffer<Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL());
                outputBuffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, screen.pixels);
                skydome = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.Skydome);

                kernel.SetMemoryArgument(0, outputBuffer);
                kernel.SetValueArgument(1, screen.width);
                kernel.SetValueArgument(2, screen.height);
                kernel.SetMemoryArgument(3, sceneBuffer);
                kernel.SetValueArgument(4, scene.toCL().Length);
                kernel.SetMemoryArgument(5, skydome);
                kernel.SetMemoryArgument(6, cameraBuffer);
                kernel.SetMemoryArgument(7, rndBuffer);

            }
            catch (ComputeException e) {
                Console.WriteLine("Error in kernel code: {0}", program.GetBuildLog(context.Devices[0]));
                Console.ReadLine();
                useGPU = false;
            }
            }
            else {
            return;
            }
        }
Beispiel #34
0
        public void RunKernel(ComputeKernel kernel, int count)
        {
            int argOffset = _intComputeBuffers.Count + _floatComputeBuffers.Count;

            foreach (string key in _intArguments.Keys)
            {
                kernel.SetValueArgument(argOffset, _intArguments[key]);

                argOffset++;
            }

            foreach (string key in _floatArguments.Keys)
            {
                kernel.SetValueArgument(argOffset, _floatArguments[key]);

                argOffset++;
            }

            foreach (string key in _doubleArguments.Keys)
            {
                kernel.SetValueArgument(argOffset, _doubleArguments[key]);

                argOffset++;
            }

            _commands.Execute(kernel, count);

            _commands.Finish();
        }
Beispiel #35
0
 public void Dispose()
 {
     lock (_kernelLock)
     {
         if (_kernel != null)
         {
             _kernel.Program.Dispose();
             _kernel.Dispose();
             _kernel = null;
         }
     }
 }
        // 26 ms 4096x4096@512 iter with 1024 cores
        static long Method05(float ymin, float xmin, float width, int[] message)
        {
            // pick first platform
            ComputePlatform platform = ComputePlatform.Platforms[0];

            // create context with all gpu devices
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
                                                        new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // create a command queue with first gpu found
            ComputeCommandQueue queue = new ComputeCommandQueue(context,
                                                                context.Devices[0], ComputeCommandQueueFlags.None);

            // load opencl source
            StreamReader streamReader = new StreamReader("Mandel3.cl");
            string       clSource     = streamReader.ReadToEnd();

            streamReader.Close();

            // create program with opencl source
            ComputeProgram program = new ComputeProgram(context, clSource);

            // compile opencl source
            program.Build(null, null, null, IntPtr.Zero);

            // load chosen kernel from program
            ComputeKernel kernel = program.CreateKernel("mandel");

            int messageSize = message.Length;

            // allocate a memory buffer with the message
            ComputeBuffer <int> messageBuffer = new ComputeBuffer <int>(context,
                                                                        ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, message);

            kernel.SetMemoryArgument(0, messageBuffer);
            kernel.SetValueArgument(1, N);
            kernel.SetValueArgument(2, ymin);
            kernel.SetValueArgument(3, xmin);
            kernel.SetValueArgument(4, width);
            kernel.SetValueArgument(5, MaxIter);

            var watch = System.Diagnostics.Stopwatch.StartNew();

            // Execute kernel
            //queue.ExecuteTask(kernel, null);
            //queue.Execute(kernel, new long[] { 0, 0, 0, 0 }, new long[] { 8, 8 }, new long[] { 8, 8 }, null);
            for (var i = 0; i < N / 32; ++i)
            {
                for (var j = 0; j < N / 32; ++j)
                {
                    queue.Execute(kernel, new long[] { i *32, j *32 }, new long[] { 32, 32 }, null, null);
                }
            }


            // Read data back
            unsafe
            {
                fixed(int *retPtr = message)
                {
                    queue.Read(messageBuffer,
                               false, 0,
                               messageSize,
                               new IntPtr(retPtr),
                               null);

                    queue.Finish();
                }
            }

            watch.Stop();
            return(watch.ElapsedMilliseconds);
        }
Beispiel #37
0
 public void Exec2D(ComputeKernel kernel, long global1, long global2, long local1, long local2)
 {
     Queue.Execute(kernel, null, new[] { global1, global2 }, new[] { local1, local2 }, null);
 }
Beispiel #38
0
        static float ComputeAverageGPUTime(
            ushort[] depthPixels,
            int width,
            float inverseRotatedFx,
            float rotatedCx,
            float inverseRotatedFy,
            float rotatedCy,
            Matrix bedTransformationM,
            Matrix bedTransformationb,
            Matrix floorTransformationM,
            Matrix floorTransformationb,
            int numberOfIterations)
        {
            // pick the device platform
            ComputePlatform intelGPU = ComputePlatform.Platforms.Where(n => n.Name.Contains("Intel")).First();

            ComputeContext context = new ComputeContext(
                ComputeDeviceTypes.Gpu,                   // use the gpu
                new ComputeContextPropertyList(intelGPU), // use the intel openCL platform
                null,
                IntPtr.Zero);

            // the command queue is the, well, queue of commands sent to the "device" (GPU)
            ComputeCommandQueue commandQueue = new ComputeCommandQueue(
                context,                        // the compute context
                context.Devices[0],             // first device matching the context specifications
                ComputeCommandQueueFlags.None); // no special flags

            string kernelSource = null;

            using (StreamReader sr = new StreamReader("kernel.cl"))
            {
                kernelSource = sr.ReadToEnd();
            }

            // create the "program"
            ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });

            // compile.
            program.Build(null, null, null, IntPtr.Zero);
            ComputeKernel kernel = program.CreateKernel("ComputePoints");

            Point3D[] outProjectivePoints = new Point3D[depthPixels.Length];
            Point3D[] outRealPoints       = new Point3D[depthPixels.Length];
            Point3D[] outBedPoints        = new Point3D[depthPixels.Length];
            Point3D[] outFloorPoints      = new Point3D[depthPixels.Length];

            float[] affines = new float[24];

            // do bed affines first because that's what assembly code expects
            int z = 0;

            for (int b = 0; b < 3; b++)
            {
                for (int c = 0; c < 3; c++)
                {
                    affines[z++] = bedTransformationM[b, c];
                }
                affines[z++] = bedTransformationb[b, 0];
            }

            // do floor affines next because that's what assembly code expects
            for (int b = 0; b < 3; b++)
            {
                for (int c = 0; c < 3; c++)
                {
                    affines[z++] = floorTransformationM[b, c];
                }
                affines[z++] = floorTransformationb[b, 0];
            }

            ComputeBuffer <float> affinesBuffer = new ComputeBuffer <float>(context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer,
                                                                            affines);

            kernel.SetMemoryArgument(1, affinesBuffer);

            ComputeBuffer <Point3D> projectivePointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                         ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                         outProjectivePoints);

            kernel.SetMemoryArgument(2, projectivePointsBuffer);

            ComputeBuffer <Point3D> realPointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                   ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                   outBedPoints);

            kernel.SetMemoryArgument(3, realPointsBuffer);

            ComputeBuffer <Point3D> bedPointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                  ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                  outFloorPoints);

            kernel.SetMemoryArgument(4, projectivePointsBuffer);

            ComputeBuffer <Point3D> floorPointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                    ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                    outRealPoints);

            kernel.SetMemoryArgument(5, realPointsBuffer);

            kernel.SetValueArgument <int>(6, width);
            kernel.SetValueArgument <float>(7, inverseRotatedFx);
            kernel.SetValueArgument <float>(8, rotatedCx);
            kernel.SetValueArgument <float>(9, inverseRotatedFy);
            kernel.SetValueArgument <float>(10, rotatedCy);

            Stopwatch sw = new Stopwatch();

            sw.Start();
            for (int c = 0; c < numberOfIterations; c++)
            {
                ComputeBuffer <ushort> depthPointsBuffer = new ComputeBuffer <ushort>(context,
                                                                                      ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                      depthPixels);
                kernel.SetMemoryArgument(0, depthPointsBuffer);

                commandQueue.Execute(kernel, new long[] { 0 }, new long[] { depthPixels.Length }, null, null);

                unsafe
                {
                    fixed(Point3D *projectivePointsPtr = outProjectivePoints)
                    {
                        fixed(Point3D *realPointsPtr = outRealPoints)
                        {
                            fixed(Point3D *bedPointsPtr = outBedPoints)
                            {
                                fixed(Point3D *floorPointsPtr = outFloorPoints)
                                {
                                    commandQueue.Read(projectivePointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(projectivePointsPtr), null);
                                    commandQueue.Read(realPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(realPointsPtr), null);
                                    commandQueue.Read(bedPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(bedPointsPtr), null);
                                    commandQueue.Read(floorPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(floorPointsPtr), null);
                                    commandQueue.Finish();
                                }
                            }
                        }
                    }
                }
            }
            sw.Stop();

            return(sw.ElapsedMilliseconds / (numberOfIterations * 1.0f));
        }
Beispiel #39
0
        /// <summary>
        /// Prepare OpenCL program, data buffers, etc.
        /// </summary>
        public void PrepareClBuffers(bool dirty = true)
        {
            clDirty = clDirty || dirty;

            if (texName == 0 ||
                !checkOpenCL.Checked)
            {
                DestroyClBuffers();
                return;
            }

            if (!clDirty)
            {
                return;
            }

            DestroyClBuffers();

            if (clContext == null)
            {
                SetupClContext();
            }
            if (clContext == null) // to be sure
            {
                Util.Log("OpenCL error");
                clImage = null;
                clDirty = true;
                return;
            }

            GL.BindTexture(TextureTarget.Texture2D, 0);
            GL.Finish();
            try
            {
                // OpenCL C source:
                string src = ClInfo.ReadSourceFile(CanUseDouble ? "mandel.cl" : "mandelSingle.cl", "090opencl");
                if (string.IsNullOrEmpty(src))
                {
                    return;
                }

                // program & kernel:
                clProgram = new ComputeProgram(clContext, src);
                clProgram.Build(clContext.Devices, null, null, IntPtr.Zero);
                clKernel     = clProgram.CreateKernel((checkDouble.Checked && CanUseDouble) ? "mandelDouble" : "mandelSingle");
                clCommands   = new ComputeCommandQueue(clContext, clContext.Devices[0], ComputeCommandQueueFlags.None);
                globalWidth  = (texWidth + groupSize - 1) & -groupSize;
                globalHeight = (texHeight + groupSize - 1) & -groupSize;

                // buffers:
                // 1. colormap array
                cmap = new ComputeBuffer <byte>(clContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, colormap);

                bool interopOk = checkInterop.Checked;
                if (interopOk)
                {
                    // 2. CL image for OpenGL interop
                    clImage = ComputeImage2D.CreateFromGLTexture2D(clContext, ComputeMemoryFlags.ReadWrite, (int)TextureTarget.Texture2D, 0, texName);
                    if (clImage == null)
                    {
                        Util.Log("OpenCL cannot reference OpenGL texture!");
                        interopOk = false;
                    }
                }

                // 3. CL output array
                result = new ComputeBuffer <byte>(clContext, ComputeMemoryFlags.ReadWrite, texWidth * texHeight * 4);

                // synced..
                clDirty = false;
            }
            catch (Exception exc)
            {
                Util.LogFormat("OpenCL build error: {0}", exc.Message);
                clImage = null;
                clDirty = true;
            }
        }
        /// <summary>
        /// Builds the kernel arguments.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="inputs">The inputs.</param>
        /// <param name="kernel">The kernel.</param>
        /// <param name="length">The length.</param>
        /// <param name="returnInputVariable">The return result.</param>
        /// <returns></returns>
        private Dictionary <int, GenericArrayMemory> BuildKernelArguments(object[] inputs, ComputeKernel kernel, long length, int?returnInputVariable = null)
        {
            int i = 0;
            Dictionary <int, GenericArrayMemory> result = new Dictionary <int, GenericArrayMemory>();

            foreach (var item in inputs)
            {
                int size = 0;
                if (item.GetType().IsArray)
                {
                    var datagch            = GCHandle.Alloc(item, GCHandleType.Pinned);
                    GenericArrayMemory mem = new GenericArrayMemory(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, item);
                    kernel.SetMemoryArgument(i, mem);
                    result.Add(i, mem);
                }
                else
                {
                    size = Marshal.SizeOf(item);
                    var datagch = GCHandle.Alloc(item, GCHandleType.Pinned);
                    kernel.SetArgument(i, new IntPtr(size), datagch.AddrOfPinnedObject());
                }

                i++;
            }

            return(result);
        }
        /// <summary>
        /// Initializes local fields and the underlying compute context.
        /// </summary>
        public void Initialize()
        {
            if (this.context == null)
            {
                var devices = ComputePlatform.Platforms.SelectMany(a => a.Devices).Where(a => a.Extensions.Contains("cl_khr_fp64")).Take(1).ToArray();
                ComputeContextPropertyList list = new ComputeContextPropertyList(devices[0].Platform);
                this.context = new ComputeContext(devices, list, null, IntPtr.Zero);
            }

            this.program = new ComputeProgram(this.context, File.ReadAllText("Mandelbrot.cl"));

            this.program.Build(null, null, null, IntPtr.Zero);

            this.mandelbrot = this.program.CreateKernel("Mandelbrot");
            this.toBitmap = this.program.CreateKernel("ToBitmap");

            this.resultBuffer = new ComputeBuffer<int>(this.context, ComputeMemoryFlags.ReadWrite, this.ImageWidth * this.ImageHeight);
            this.bitmapBuffer = new ComputeBuffer<byte>(this.context, ComputeMemoryFlags.ReadWrite, this.ImageWidth * this.ImageHeight * 4);

            this.mandelbrot.SetMemoryArgument(7, this.resultBuffer);
            this.toBitmap.SetMemoryArgument(1, this.resultBuffer);
            this.toBitmap.SetMemoryArgument(2, this.bitmapBuffer);

            this.commandQueue = new ComputeCommandQueue(this.context, this.context.Devices.OrderBy(a => a.Type).Where(a => a.Extensions.Contains("cl_khr_fp64")).First(), ComputeCommandQueueFlags.None);
        }
Beispiel #42
0
    // Use this for initialization
    void Awake()
    {
        var platform = ComputePlatform.Platforms[0];
        _context = new ComputeContext(ComputeDeviceTypes.Cpu,
            new ComputeContextPropertyList(platform), null, System.IntPtr.Zero);
        _queue = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None);
        string clSource = System.IO.File.ReadAllText(clProgramPath);
        _program = new ComputeProgram(_context, clSource);
        try {
            _program.Build(null, null, null, System.IntPtr.Zero);
        } catch(BuildProgramFailureComputeException) {
            Debug.Log(_program.GetBuildLog(_context.Devices[0]));
            throw;
        }
        _events = new ComputeEventList();
        _updateGridKernel = _program.CreateKernel(clUpdateGridKernelName);
        _updateBoidsKernel = _program.CreateKernel(clUpdateBoidsKernelName);
        _boundaryKernel = _program.CreateKernel(clBoundaryKernelName);

        _pointCounters = new int[nGridPartitions * nGridPartitions * nGridPartitions];
        _pointIndices = new int[_pointCounters.Length * maxIndices];

        _pointCountersBuffer = new Cloo.ComputeBuffer<int>(
            _context, ComputeMemoryFlags.WriteOnly, _pointCounters.Length);
        _pointIndicesBuffer = new Cloo.ComputeBuffer<int>(
            _context, ComputeMemoryFlags.WriteOnly, _pointIndices.Length);

        _gridInfo = new GridInfo() {
            worldOrigin = gridbounds.min,
            worldSize = gridbounds.size,
            cellSize = gridbounds.size * (1f / nGridPartitions),
            nGridPartitions = nGridPartitions,
            maxIndices = maxIndices
        };

        _boundaryKernel.SetValueArgument(1, _gridInfo);

        _updateGridKernel.SetMemoryArgument(1, _pointCountersBuffer);
        _updateGridKernel.SetMemoryArgument(2, _pointIndicesBuffer);
        _updateGridKernel.SetValueArgument(3, _gridInfo);

        _updateBoidsKernel.SetMemoryArgument(2, _pointCountersBuffer);
        _updateBoidsKernel.SetMemoryArgument(3, _pointIndicesBuffer);
        _updateBoidsKernel.SetValueArgument(4, _gridInfo);
    }
Beispiel #43
0
        // initialize renderer: takes in command line parameters passed by template code
        public void Init(int rt, bool gpu, int platformIdx)
        {
            // pass command line parameters
            runningTime = rt;
            useGPU = gpu;
            gpuPlatform = platformIdx;
            //Determine tile width and height
            tileCount = GreatestDiv(screen.width, screen.height);
            tileWidth = screen.width/tileCount;
            tileHeight = screen.height/tileCount;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera(screen.width, screen.height);

            //Init OpenCL
            ComputePlatform platform = ComputePlatform.Platforms[gpuPlatform];
            context = new ComputeContext(
                ComputeDeviceTypes.Gpu,
                new ComputeContextPropertyList(platform),
                null,
                IntPtr.Zero
                );
            var streamReader = new StreamReader("../../program.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();

            ComputeProgram program = new ComputeProgram(context, clSource);

            //try to compile
            try
            {
                program.Build(null, null, null, IntPtr.Zero);
            }
            catch
            {
                Console.Write("error in kernel code:\n");
                Console.Write(program.GetBuildLog(context.Devices[0]) + "\n");
            }
            kernel = program.CreateKernel("device_function");

            //setup RNG
            rngSeed = new int[screen.width * screen.height];
            Random r = RTTools.GetRNG();
            for (int i = 0; i < rngSeed.Length; i++)
                rngSeed[i] = r.Next();

            //import buffers etc to GPU
            Vector3[] data = new Vector3[screen.width * screen.height];
            Vector3[] sphereOrigins = Scene.GetOrigins;
            float[] sphereRadii = Scene.GetRadii;

            var FlagRW = ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer;
            var FlagR = ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer;

            rngBuffer = new ComputeBuffer<int>(context, FlagRW, rngSeed);
            screenPixels = new ComputeBuffer<int>(context, FlagRW, screen.pixels);
            skyBox = new ComputeBuffer<float>(context, FlagR, scene.skybox);
            originBuffer = new ComputeBuffer<Vector3>(context, FlagR, sphereOrigins);
            radiusBuffer = new ComputeBuffer<float>(context, FlagR, sphereRadii);
            accBuffer = new ComputeBuffer<Vector3>(context, FlagRW, accumulator);

            kernel.SetValueArgument(0, camera.p1);
            kernel.SetValueArgument(1, camera.p2);
            kernel.SetValueArgument(2, camera.p3);
            kernel.SetValueArgument(3, camera.up);
            kernel.SetValueArgument(4, camera.right);
            kernel.SetValueArgument(5, camera.pos);
            kernel.SetValueArgument(6, camera.lensSize);
            kernel.SetValueArgument(7, (float)screen.width);
            kernel.SetValueArgument(8, (float)screen.height);
            kernel.SetMemoryArgument(9, rngBuffer);
            kernel.SetMemoryArgument(10, screenPixels);
            kernel.SetMemoryArgument(11, skyBox);
            kernel.SetMemoryArgument(12, originBuffer);
            kernel.SetMemoryArgument(13, radiusBuffer);
            kernel.SetMemoryArgument(14, accBuffer);

            queue = new ComputeCommandQueue(context, context.Devices[0], 0);

            long[] tempWorkSize = { screen.width * screen.height };             //For some reason, doing this directly produces a build error.
            workSize = tempWorkSize;                                            //Luckily, this works.
        }
Beispiel #44
0
 public override void ApplyToKernel(ComputeKernel kernel, bool isDouble, ref int startIndex)
 {
     base.ApplyToKernel(kernel, isDouble, ref startIndex);
     kernel.SetValueArgument(startIndex++, Frame);
 }
Beispiel #45
0
        static void Main(string[] args)
        {
            //Test2();

            Test1();

            ComputePlatform plat = ComputePlatform.Platforms[0];

            Console.WriteLine("Plat:" + plat.Name);

            ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(plat), null, IntPtr.Zero);

            ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            StreamReader rs = new StreamReader("Foom/CL/testProg.txt");

            string clSrc = rs.ReadToEnd();

            rs.Close();

            ComputeProgram prog = new ComputeProgram(context, clSrc);

            prog.Build(null, null, null, IntPtr.Zero);

            Console.WriteLine("BS:" + prog.GetBuildStatus(context.Devices[0]).ToString());
            Console.WriteLine("Info:" + prog.GetBuildLog(context.Devices[0]));

            ComputeKernel kern = prog.CreateKernel("vector_add");

            int[] data = new int[1024];

            for (int i = 0; i < 1024; i++)
            {
                data[i] = 100;
            }

            ComputeBuffer <int> b1 = new ComputeBuffer <int>(context, ComputeMemoryFlags.CopyHostPointer, data);

            ComputeBuffer <int> b2 = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly, 1024);

//            queue.WriteToBuffer<int>(data, b1, true, null);



            kern.SetMemoryArgument(0, b1);
            kern.SetMemoryArgument(1, b2);

            long[] wo = new long[1];
            wo[0] = 0;

            long[] ws = new long[1];
            ws[0] = 1024;

            long[] tc = new long[1];
            tc[0] = 16;

            queue.Execute(kern, wo, ws, tc, null);
            int c = Environment.TickCount;

            queue.Finish();

            c = Environment.TickCount - c;



            queue.ReadFromBuffer <int>(b2, ref data, true, null);


            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine("C:" + (int)data[i]);
            }

            Console.WriteLine("Done:" + c);


            while (true)
            {
            }
        }
Beispiel #46
0
        /// <summary>
        /// Executes the specified kernel function name.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="functionName">Name of the function.</param>
        /// <param name="args"></param>
        /// <exception cref="ExecutionException">
        /// </exception>
        public override void Execute(string functionName, params object[] args)
        {
            ValidateArgs(functionName, args);

            ComputeKernel       kernel   = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName));
            ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None);

            if (kernel == null)
            {
                throw new ExecutionException(string.Format("Kernal function {0} not found", functionName));
            }

            try
            {
                Array       ndobject    = (Array)args.FirstOrDefault(x => (x.GetType().IsArray));
                List <long> length      = new List <long>();
                long        totalLength = 0;
                if (ndobject == null)
                {
                    var xarrayList = args.Where(x => (x.GetType().Name == "XArray" || x.GetType().BaseType.Name == "XArray")).ToList();
                    foreach (var item in xarrayList)
                    {
                        var xarrayobj = (XArray)item;
                        if (xarrayobj.Direction == Direction.Output)
                        {
                            totalLength = xarrayobj.Count;
                            if (!xarrayobj.IsElementWise)
                            {
                                length = xarrayobj.Sizes.ToList();
                            }
                            else
                            {
                                length.Add(totalLength);
                            }
                        }
                    }

                    if (totalLength == 0)
                    {
                        var xarrayobj = (XArray)xarrayList[0];
                        totalLength = xarrayobj.Count;
                        if (!xarrayobj.IsElementWise)
                        {
                            length = xarrayobj.Sizes.ToList();
                        }
                        else
                        {
                            length.Add(totalLength);
                        }
                    }
                }
                else
                {
                    totalLength = ndobject.Length;
                    for (int i = 0; i < ndobject.Rank; i++)
                    {
                        length.Add(ndobject.GetLength(i));
                    }
                }

                var method = KernelFunctions.FirstOrDefault(x => (x.Name == functionName));

                var buffers = BuildKernelArguments(method, args, kernel, totalLength);
                commands.Execute(kernel, null, length.ToArray(), null, null);

                for (int i = 0; i < args.Length; i++)
                {
                    if (args[i].GetType().IsArray)
                    {
                        var ioMode = method.Parameters.ElementAt(i).Value.IOMode;
                        if (ioMode == IOMode.InOut || ioMode == IOMode.Out)
                        {
                            Array r = (Array)args[i];
                            commands.ReadFromMemory(buffers[i], ref r, true, 0, null);
                        }

                        buffers[i].Dispose();
                    }
                    else if (args[i].GetType().Name == "XArray" || args[i].GetType().BaseType.Name == "XArray")
                    {
                        var ioMode = method.Parameters.ElementAt(i).Value.IOMode;
                        if (ioMode == IOMode.InOut || ioMode == IOMode.Out)
                        {
                            XArray r = (XArray)args[i];
                            commands.ReadFromMemory(buffers[i], ref r, true, 0, null);
                        }

                        buffers[i].Dispose();
                    }
                }
            }
            catch (Exception ex)
            {
                throw new ExecutionException(ex.Message);
            }
            finally
            {
                commands.Finish();
                commands.Dispose();
            }
        }
Beispiel #47
0
        public void Load(OpenCLProxy clProxy)
        {
            _kernel = new ClearKernel();

            _computeKernel = clProxy.CreateKernel(_kernel);
        }
Beispiel #48
0
 private void PrepareResources()
 {
     Output.UploadToComputingDevice();
     _kernel = OpenClApplication.Program.CreateKernel(KernelName);
 }
        // initialize renderer: takes in command line parameters passed by template code
        public void Init(int rt, bool gpu, int platformIdx)
        {
            // pass command line parameters
            runningTime = rt;
            useGPU = gpu;
            gpuPlatform = platformIdx;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera(screen.width, screen.height);
            rngQueue = new ConcurrentQueue<Random>();

            xtiles = (int)Math.Ceiling((float)screen.width / TILESIZE);
            ytiles = (int)Math.Ceiling((float)screen.height / TILESIZE);

#if DEBUG
            RTTools.factorials[0] = Vector<float>.One;
            for (int i = 1; i < RTTools.TERMS * 2; i++)
                RTTools.factorials[i] = RTTools.factorials[i - 1] * i;
            //for (int i = 0; i < RTTools.TERMS; i++)
            //    RTTools.atanStuff[i] = (new Vector<float>((float)Math.Pow(2, 2 * i)) * (RTTools.factorials[i] * RTTools.factorials[i])) / RTTools.factorials[2 * i + 1];
#endif

#region OpenCL related things

            randNums = new float[screen.width * screen.height + 25];

            var streamReader = new StreamReader("../../assets/GPUCode.cl");
            string clSource = streamReader.ReadToEnd();
            streamReader.Close();

            platform = ComputePlatform.Platforms[gpuPlatform];
            context = new ComputeContext(ComputeDeviceTypes.Gpu,
                new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            program = new ComputeProgram(context, clSource);
            try
            {
                program.Build(null, null, null, IntPtr.Zero);
                kernel = program.CreateKernel("Test");
            }
            catch
            {
                Console.Write("error in kernel code:\n");
                Console.Write(program.GetBuildLog(context.Devices[0]) + "\n");
                Debugger.Break();
            }

            eventList = new ComputeEventList();
            commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
#endregion
        }
Beispiel #50
0
 public void CreateKernel()
 {
     ForwardKernel = Weaver.CreateProgram(Weaver.GetKernelSource(FUNCTION_NAME)).CreateKernel("MaxPoolingForward");
 }
        /// <summary>
        /// Compile the kernel with a map of preprocessor defines, a collection of
        /// name-value pairs.
        /// </summary>
        ///
        /// <param name="options">A map of preprocessor defines.</param>
        public void Compile(IDictionary<String, String> options)
        {
            // clear out any old program

            if (this.program != null)
            {
                this.program.Dispose();
                this.kernel.Dispose();
            }

            // Create the program from the source code

            this.program = new ComputeProgram(this.context, this.cl);

            if (options.Count > 0)
            {
                StringBuilder builder = new StringBuilder();
                /* foreach */
                foreach (KeyValuePair<String, String> obj in options)
                {
                    if (builder.Length > 0)
                    {
                        builder.Append(" ");
                    }
                    builder.Append("-D ");
                    builder.Append(obj.Key);
                    builder.Append("=");
                    builder.Append(obj.Value);
                }

                program.Build(null, builder.ToString(), null, IntPtr.Zero);
            }
            else
            {
                program.Build(null, null, null, IntPtr.Zero);
            }

            // Create the kernel
            this.kernel = Program.CreateKernel(this.kernelName);
        }
Beispiel #52
0
        // initialize renderer: takes in command line parameters passed by template code
        public void Init(int rt, bool gpu, int platformIdx)
        {
            // pass command line parameters
            runningTime = rt;
            useGPU      = gpu;
            gpuPlatform = platformIdx;
            // initialize accumulator
            accumulator = new Vector3[screen.width * screen.height];
            ClearAccumulator();
            // setup scene
            scene = new Scene();
            // setup camera
            camera = new Camera(screen.width, screen.height);

            // Generate randoms
            Console.Write("Generating randoms....\t");

            randoms = new float[1000];
            Random r = RTTools.GetRNG();

            for (int i = 0; i < 1000; i++)
            {
                randoms[i] = (float)r.NextDouble();
            }

            int variable = r.Next();

            Console.WriteLine("Done!");

            // initialize required opencl things if gpu is used
            if (useGPU)
            {
                StreamReader streamReader = new StreamReader("../../kernel.cl");
                string       clSource     = streamReader.ReadToEnd();
                streamReader.Close();

                platform = ComputePlatform.Platforms[0];
                context  = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);
                queue    = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                program = new ComputeProgram(context, clSource);
                try
                {
                    program.Build(null, null, null, IntPtr.Zero);
                    kernel = program.CreateKernel("Main");

                    sceneBuffer  = new ComputeBuffer <Vector4>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.toCL());
                    rndBuffer    = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms);
                    cameraBuffer = new ComputeBuffer <Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL());
                    outputBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, screen.pixels);
                    skydome      = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.Skydome);

                    kernel.SetMemoryArgument(0, outputBuffer);
                    kernel.SetValueArgument(1, screen.width);
                    kernel.SetValueArgument(2, screen.height);
                    kernel.SetMemoryArgument(3, sceneBuffer);
                    kernel.SetValueArgument(4, scene.toCL().Length);
                    kernel.SetMemoryArgument(5, skydome);
                    kernel.SetMemoryArgument(6, cameraBuffer);
                    kernel.SetMemoryArgument(7, rndBuffer);
                }
                catch (ComputeException e) {
                    Console.WriteLine("Error in kernel code: {0}", program.GetBuildLog(context.Devices[0]));
                    Console.ReadLine();
                    useGPU = false;
                }
            }
            else
            {
                return;
            }
        }
Beispiel #53
0
        public virtual void PrepareModel(Model model, IDictionary <string, TensorShape> inputShapes)
        {
            var modelHash = CalcModelWithInputsHashCode(model, inputShapes);

            if (modelHash == m_CachedModelHash)
            {
                return;
            }

            m_CachedModelHash = modelHash;
            foreach (var l in m_CompiledLayers)
            {
                foreach (var i in l.Value.instructions)
                {
                    if (i.tensors.Length == 0)
                    {
                        continue;
                    }
                    foreach (var t in i.tensors)
                    {
                        t.Dispose();
                    }
                }
            }
            m_CompiledLayers.Clear();

            IDictionary <string, TensorShape?> shapesByName;

            ModelAnalyzer.ListTemporaryTensorShapes(model, inputShapes, out shapesByName);

            foreach (var l in model.layers)
            {
                if (m_CompiledLayers.ContainsKey(l))
                {
                    continue; // already compiled
                }
                if (l.inputs.Length == 0)
                {
                    continue; // don't need to compile layers without inputs, so far all of them are CPU only
                }
                if (shapesByName[l.inputs[0]] == null || shapesByName[l.name] == null)
                {
                    continue;
                }

                var X = shapesByName[l.inputs[0]].Value;
                var O = shapesByName[l.name].Value;

                ComputeKernel kernel = new ComputeKernel();
                if (l.type == Layer.Type.Dense)
                {
                    var instructions = new List <CompiledInstruction>();
                    var itemSize     = 4; // @TODO: itemSizeInBytes == 2 | float16
                    kernel = BestKernel(ComputeKernelLibrary.Dense(X, l.datasets[0].shape, O, itemSize >> 2));
                    instructions.Add(new CompiledInstruction {
                        kernel = kernel, shape = O
                    });

                    if (ShouldFlattenInputForDenseLayer(X))
                    {
                        var flattenedShape = X.Flatten();
                        var flattenKernel  = BestKernel(ComputeKernelLibrary.ReshapeFromNHWCModel(flattenedShape));
                        instructions.Add(new CompiledInstruction {
                            kernel = flattenKernel, shape = flattenedShape
                        });
                    }

                    // FusedActivation
                    var fusedActivation = (Layer.FusedActivation)l.activation;
                    if (!IsFusedActivationSupported(fusedActivation))
                    {
                        var activationKernel = BestKernel(ComputeKernelLibrary.Activation(X, O, fusedActivation.ToString()));
                        instructions.Add(new CompiledInstruction {
                            kernel = activationKernel, shape = O
                        });
                    }

                    m_CompiledLayers.Add(l, new CompiledLayer {
                        instructions = instructions.ToArray(), shape = O
                    });
                    continue;
                }
                else if (
                    l.type == Layer.Type.Conv2D)
                {
                    Assert.IsNotNull(l.stride);
                    Assert.IsNotNull(l.pad);
                    var instructions = new List <CompiledInstruction>();

                    // Conv2D
                    kernel = BestKernel(ComputeKernelLibrary.Conv2D(X, l.datasets[0].shape, O, l.stride, l.pad));
                    if (kernel.func.kernelName.StartsWith("Conv2DWinograd_2x2_3x3"))
                    {
                        instructions.Add(new CompiledInstruction {
                            kernel = kernel, shape = O, tensors = PrepareConv2dWinograd(model, l)
                        });
                    }
                    else
                    {
                        instructions.Add(new CompiledInstruction {
                            kernel = kernel, shape = O
                        });
                    }

                    // FusedActivation
                    var fusedActivation = (Layer.FusedActivation)l.activation;
                    if (!IsFusedActivationSupported(fusedActivation))
                    {
                        var activationKernel = BestKernel(ComputeKernelLibrary.Activation(X, O, fusedActivation.ToString()));
                        instructions.Add(new CompiledInstruction {
                            kernel = activationKernel, shape = O
                        });
                    }

                    m_CompiledLayers.Add(l, new CompiledLayer {
                        instructions = instructions.ToArray(), shape = O
                    });
                    continue;
                }
                else if (
                    l.type == Layer.Type.DepthwiseConv2D)
                {
                    kernel = BestKernel(
                        ComputeKernelLibrary.DepthwiseConv2D(X, l.datasets[0].shape, O));
                }
                else if (
                    l.type == Layer.Type.Conv2DTrans)
                {
                    var outputAdjustment = l.pool;
                    var stride           = l.stride;

                    var K   = l.datasets[0].shape;
                    var B   = l.datasets[1].shape;
                    var pad = new int[]
                    {
                        K.kernelWidth - l.pad[0] - 1, K.kernelHeight - l.pad[1] - 1,
                        K.kernelWidth - l.pad[2] - 1, K.kernelHeight - l.pad[3] - 1
                    };

                    var XpaddedShape = new TensorShape(X.batch, stride[0] * (X.height - 1) + 1 + outputAdjustment[0], stride[0] * (X.width - 1) + 1 + outputAdjustment[1], X.channels);

                    var kernelFill = CompileKernel(new ComputeKernelLibrary.Entry("Conv2DTransPadFill", (X.channels, X.width, X.height), 1.0f, 0));

                    var kernelConv = BestKernel(
                        ComputeKernelLibrary.Conv2D(XpaddedShape, K, O, new int[] { 1, 1 }, pad));
                    bool isConvWinograd = (kernelConv.func.kernelName.StartsWith("Conv2DWinograd_2x2_3x3"));

                    m_CompiledLayers.Add(l, new CompiledLayer {
                        instructions = new CompiledInstruction[]
                        {
                            new CompiledInstruction {
                                kernel = kernelFill, shape = XpaddedShape
                            },
                            new CompiledInstruction {
                                shape = K, tensors = PrepareConv2DTrans(model, l)
                            },
                            new CompiledInstruction {
                                kernel = kernelConv, shape = O, tensors = isConvWinograd ? PrepareConv2dWinograd(model, l) : null
                            }
                        }, shape = O
                    });

                    continue;
                }
                else if (
                    l.type == Layer.Type.Upsample2D)
                {
                    // axis is treated as upsample point/bilinear flag
                    var bilinear = l.axis > 0;
                    kernel = BestKernel(
                        ComputeKernelLibrary.Upsample2D(X, O, l.pool, bilinear));
                }
                else if (
                    l.type == Layer.Type.MaxPool2D ||
                    l.type == Layer.Type.AvgPool2D)
                {
                    var kernelName = l.type.ToString();

                    Assert.IsNotNull(l.pool);
                    Assert.IsNotNull(l.stride);
                    Assert.IsNotNull(l.pad);
                    kernel = BestKernel(
                        ComputeKernelLibrary.Pool2D(X, O, kernelName));
                }
                else if (
                    l.type == Layer.Type.GlobalMaxPool2D ||
                    l.type == Layer.Type.GlobalAvgPool2D)
                {
                    var poolKernelName   = l.type.ToString().Substring(6) + "Reduce";
                    var globalKernelName = l.type.ToString();

                    var instructions = new List <CompiledInstruction>();
                    var Xr           = X;
                    while (Xr.height * Xr.width >= 64)
                    {
                        var lastLength = Xr.length;
                        var pool       = new[] { 8, 8 };
                        var stride     = pool;
                        var pad        = new[] { 0, 0, 0, 0 };

                        var Oshape     = Xr.ApplyPool(pool, stride, pad, ceilMode: true);
                        var Or         = new TensorShape(Oshape.batch, IDivC(Oshape.height, 2), IDivC(Oshape.width, 2), Oshape.channels);
                        var poolKernel = BestKernel(
                            ComputeKernelLibrary.Pool2DReduce(Xr, Or, poolKernelName));

                        instructions.Add(new CompiledInstruction {
                            kernel = poolKernel, shape = Or
                        });

                        Xr = Or;
                        Assert.IsTrue(Xr.length < lastLength);
                    }

                    var globalKernel = BestKernel(
                        ComputeKernelLibrary.GlobalPool2D(Xr, O, globalKernelName));

                    instructions.Add(new CompiledInstruction {
                        kernel = globalKernel, shape = O
                    });

                    m_CompiledLayers.Add(l, new CompiledLayer {
                        instructions = instructions.ToArray(), shape = O
                    });

                    continue;
                }
                else if (
                    l.type == Layer.Type.ScaleBias)
                {
                    kernel = BestKernel(
                        ComputeKernelLibrary.ScaleBias(X, O));
                }
                else if (
                    l.type == Layer.Type.Normalization)
                {
                    // GlobalAvgVariancePool2D
                    var poolKernelName   = "AvgVariancePool2DReduce";
                    var globalKernelName = "GlobalAvgVariancePool2D";

                    var instructions = new List <CompiledInstruction>();
                    var Xr           = X;
                    while (Xr.height * Xr.width >= 64)
                    {
                        var lastLength = Xr.length;
                        var pool       = new[] { 8, 8 };
                        var stride     = pool;
                        var pad        = new[] { 0, 0, 0, 0 };

                        var Oshape     = Xr.ApplyPool(pool, stride, pad, ceilMode: true);
                        var Or         = new TensorShape(Oshape.batch, IDivC(Oshape.height, 2), IDivC(Oshape.width, 2), Oshape.channels);
                        var poolKernel = BestKernel(
                            ComputeKernelLibrary.PoolAvgVar2D(Xr, Or, poolKernelName));

                        instructions.Add(new CompiledInstruction {
                            kernel = poolKernel, shape = Or
                        });

                        Xr = Or;
                        Assert.IsTrue(Xr.length < lastLength);
                    }

                    var meanVariance = new TensorShape(Xr.batch, 2, 1, Xr.channels);
                    var globalKernel = BestKernel(
                        ComputeKernelLibrary.GlobalPool2D(Xr, meanVariance, globalKernelName));
                    instructions.Add(new CompiledInstruction {
                        kernel = globalKernel, shape = meanVariance
                    });

                    // ScaleBias
                    var S = l.datasets[0].shape;
                    var B = l.datasets[1].shape;
                    Assert.AreEqual(X.channels, B.channels); Assert.AreEqual(X.channels, S.channels);
                    Assert.AreEqual(B.length, B.channels); Assert.AreEqual(S.length, S.channels);
                    var normlizationKernel = BestKernel(ComputeKernelLibrary.NormalizationTail(X, O));
                    instructions.Add(new CompiledInstruction {
                        kernel = normlizationKernel, shape = O
                    });

                    // FusedActivation
                    var fusedActivation = (Layer.FusedActivation)l.activation;
                    if (!IsFusedActivationSupported(fusedActivation))
                    {
                        var activationKernel = BestKernel(ComputeKernelLibrary.Activation(X, O, fusedActivation.ToString()));
                        instructions.Add(new CompiledInstruction {
                            kernel = activationKernel, shape = O
                        });
                    }
                    else
                    {
                        instructions.Add(new CompiledInstruction {
                            shape = O
                        });
                    }

                    m_CompiledLayers.Add(l, new CompiledLayer {
                        instructions = instructions.ToArray(), shape = O
                    });
                    continue;
                }
                else if (
                    l.type == Layer.Type.Add ||
                    l.type == Layer.Type.Sub ||
                    l.type == Layer.Type.Mul ||
                    l.type == Layer.Type.Div ||
                    l.type == Layer.Type.Pow ||
                    l.type == Layer.Type.Min ||
                    l.type == Layer.Type.Max ||
                    l.type == Layer.Type.Mean
                    )
                {
                    var kernelName = "Broadcast" + l.type;
                    kernel = BestKernel(
                        ComputeKernelLibrary.Broadcast(X, O, kernelName));
                }
                else if (
                    l.type == Layer.Type.Concat)
                {
                    var instructions = new List <CompiledInstruction>();

                    foreach (var input in l.inputs)
                    {
                        var I = shapesByName[input];

                        if (I == null)
                        {
                            instructions.Add(new CompiledInstruction {
                            });
                            continue;
                        }
                        var kernelI = BestKernel(ComputeKernelLibrary.Copy(I.Value, O));

                        instructions.Add(new CompiledInstruction {
                            kernel = kernelI, shape = I.Value
                        });
                    }

                    m_CompiledLayers.Add(l, new CompiledLayer {
                        instructions = instructions.ToArray(), shape = O
                    });
                    continue;
                }
                // Activations
                else if (l.type == Layer.Type.Activation)
                {
                    if (l.activation == Layer.Activation.Softmax)
                    {
                        kernel = BestKernel(
                            ComputeKernelLibrary.Softmax(X, O));
                    }
                    else if (l.activation == Layer.Activation.LogSoftmax)
                    {
                        kernel = BestKernel(
                            ComputeKernelLibrary.LogSoftmax(X, O));
                    }
                    else if (l.activation == Layer.Activation.PRelu)
                    {
                        kernel = BestKernel(
                            ComputeKernelLibrary.PRelu(X, O));
                    }
                    else if (l.activation != Layer.Activation.None)
                    {
                        var kernelName = l.activation.ToString();
                        kernel = BestKernel(
                            ComputeKernelLibrary.Activation(X, O, kernelName));
                    }
                }

                m_CompiledLayers.Add(l, new CompiledLayer {
                    instructions = new CompiledInstruction[]
                    {
                        new CompiledInstruction {
                            kernel = kernel, shape = O
                        }
                    }, shape = O
                });
            }
        }
Beispiel #54
0
        override unsafe protected void MinerThread()
        {
            Random r = new Random();

            UInt32[]      output        = new UInt32[256];
            ComputeDevice computeDevice = Device.GetComputeDevice();

            MarkAsAlive();

            MainForm.Logger("Miner thread for Device #" + DeviceIndex + " started.");

            ComputeProgram program;

            try { mProgramArrayMutex.WaitOne(); } catch (Exception) { }
            if (mProgramArray.ContainsKey(new long[] { DeviceIndex, mLocalWorkSize }))
            {
                program = mProgramArray[new long[] { DeviceIndex, mLocalWorkSize }];
            }
            else
            {
                String source = System.IO.File.ReadAllText(@"Kernels\ethash.cl");
                program = new ComputeProgram(Context, source);
                MainForm.Logger("Loaded ethash program for Device #" + DeviceIndex + ".");
                String buildOptions = (Device.Vendor == "AMD"
                                          ? "-O1 "
                                          : Device.Vendor == "NVIDIA"
                                              ? ""
                                              : // "-cl-nv-opt-level=1 -cl-nv-maxrregcount=256 " :
                                       "")
                                      + " -IKernels -DWORKSIZE=" + mLocalWorkSize;
                try
                {
                    program.Build(Device.DeviceList, buildOptions, null, IntPtr.Zero);
                }
                catch (Exception)
                {
                    MainForm.Logger(program.GetBuildLog(computeDevice));
                    throw;
                }
                MainForm.Logger("Built cryptonight program for Device #" + DeviceIndex + ".");
                MainForm.Logger("Built options: " + buildOptions);
                mProgramArray[new long[] { DeviceIndex, mLocalWorkSize }] = program;
            }
            try { mProgramArrayMutex.ReleaseMutex(); } catch (Exception) { }

            while (!Stopped)
            {
                MarkAsAlive();

                try
                {
                    // Wait for the first job to arrive.
                    int elapsedTime = 0;
                    while ((mStratum == null || mStratum.GetJob() == null) && elapsedTime < 5000)
                    {
                        Thread.Sleep(10);
                        elapsedTime += 10;
                    }
                    if (mStratum == null || mStratum.GetJob() == null)
                    {
                        MainForm.Logger("Stratum server failed to send a new job.");
                        //throw new TimeoutException("Stratum server failed to send a new job.");
                        return;
                    }

                    int  epoch   = -1;
                    long DAGSize = 0;
                    ComputeBuffer <byte> DAGBuffer = null;

                    using (ComputeKernel DAGKernel = program.CreateKernel("GenerateDAG"))
                        using (ComputeKernel searchKernel = program.CreateKernel("search"))
                            using (ComputeBuffer <UInt32> outputBuffer = new ComputeBuffer <UInt32>(Context, ComputeMemoryFlags.ReadWrite, 256))
                                using (ComputeBuffer <byte> headerBuffer = new ComputeBuffer <byte>(Context, ComputeMemoryFlags.ReadOnly, 32))
                                {
                                    MarkAsAlive();

                                    System.Diagnostics.Stopwatch consoleUpdateStopwatch = new System.Diagnostics.Stopwatch();
                                    EthashStratum.Work           work;

                                    while (!Stopped && (work = mStratum.GetWork()) != null)
                                    {
                                        String poolExtranonce      = mStratum.PoolExtranonce;
                                        byte[] extranonceByteArray = Utilities.StringToByteArray(poolExtranonce);
                                        byte   localExtranonce     = work.LocalExtranonce;
                                        UInt64 startNonce          = (UInt64)localExtranonce << (8 * (7 - extranonceByteArray.Length));
                                        for (int i = 0; i < extranonceByteArray.Length; ++i)
                                        {
                                            startNonce |= (UInt64)extranonceByteArray[i] << (8 * (7 - i));
                                        }
                                        startNonce += (ulong)r.Next(0, int.MaxValue) & (0xfffffffffffffffful >> (extranonceByteArray.Length * 8 + 8));
                                        String jobID      = work.GetJob().ID;
                                        String headerhash = work.GetJob().Headerhash;
                                        String seedhash   = work.GetJob().Seedhash;
                                        double difficulty = mStratum.Difficulty;

                                        fixed(byte *p = Utilities.StringToByteArray(headerhash))
                                        Queue.Write <byte>(headerBuffer, true, 0, 32, (IntPtr)p, null);

                                        if (epoch != work.GetJob().Epoch)
                                        {
                                            if (DAGBuffer != null)
                                            {
                                                DAGBuffer.Dispose();
                                                DAGBuffer = null;
                                            }
                                            epoch = work.GetJob().Epoch;
                                            DAGCache cache = new DAGCache(epoch, work.GetJob().Seedhash);
                                            DAGSize = Utilities.GetDAGSize(epoch);

                                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                                            sw.Start();
                                            fixed(byte *p = cache.GetData())
                                            {
                                                long globalWorkSize = DAGSize / 64;

                                                globalWorkSize /= 8;
                                                if (globalWorkSize % mLocalWorkSize > 0)
                                                {
                                                    globalWorkSize += mLocalWorkSize - globalWorkSize % mLocalWorkSize;
                                                }

                                                ComputeBuffer <byte> DAGCacheBuffer = new ComputeBuffer <byte>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, cache.GetData().Length, (IntPtr)p);

                                                DAGBuffer = new ComputeBuffer <byte>(Context, ComputeMemoryFlags.ReadWrite, globalWorkSize * 8 * 64 /* DAGSize */); // With this, we can remove a conditional statement in the DAG kernel.

                                                DAGKernel.SetValueArgument <UInt32>(0, 0);
                                                DAGKernel.SetMemoryArgument(1, DAGCacheBuffer);
                                                DAGKernel.SetMemoryArgument(2, DAGBuffer);
                                                DAGKernel.SetValueArgument <UInt32>(3, (UInt32)cache.GetData().Length / 64);
                                                DAGKernel.SetValueArgument <UInt32>(4, 0xffffffffu);

                                                for (long start = 0; start < DAGSize / 64; start += globalWorkSize)
                                                {
                                                    Queue.Execute(DAGKernel, new long[] { start }, new long[] { globalWorkSize }, new long[] { mLocalWorkSize }, null);
                                                    Queue.Finish();
                                                    if (Stopped || !mStratum.GetJob().ID.Equals(jobID))
                                                    {
                                                        break;
                                                    }
                                                }
                                                DAGCacheBuffer.Dispose();
                                                if (Stopped || !mStratum.GetJob().ID.Equals(jobID))
                                                {
                                                    break;
                                                }
                                            }
                                            sw.Stop();
                                            MainForm.Logger("Generated DAG for Epoch #" + epoch + " (" + (long)sw.Elapsed.TotalMilliseconds + "ms).");
                                        }

                                        consoleUpdateStopwatch.Start();

                                        while (!Stopped && mStratum.GetJob().ID.Equals(jobID) && mStratum.PoolExtranonce.Equals(poolExtranonce))
                                        {
                                            MarkAsAlive();

                                            // Get a new local extranonce if necessary.
                                            if ((startNonce & (0xfffffffffffffffful >> (extranonceByteArray.Length * 8 + 8)) + (ulong)mGlobalWorkSize) >= ((ulong)0x1 << (64 - (extranonceByteArray.Length * 8 + 8))))
                                            {
                                                break;
                                            }

                                            UInt64 target = (UInt64)((double)0xffff0000U / difficulty);
                                            searchKernel.SetMemoryArgument(0, outputBuffer);                    // g_output
                                            searchKernel.SetMemoryArgument(1, headerBuffer);                    // g_header
                                            searchKernel.SetMemoryArgument(2, DAGBuffer);                       // _g_dag
                                            searchKernel.SetValueArgument <UInt32>(3, (UInt32)(DAGSize / 128)); // DAG_SIZE
                                            searchKernel.SetValueArgument <UInt64>(4, startNonce);              // start_nonce
                                            searchKernel.SetValueArgument <UInt64>(5, target);                  // target
                                            searchKernel.SetValueArgument <UInt32>(6, 0xffffffffu);             // isolate

                                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                                            sw.Start();
                                            fixed(UInt32 *p = output)
                                            {
                                                output[255] = 0; // output[255] is used as an atomic counter.
                                                Queue.Write <UInt32>(outputBuffer, true, 0, 256, (IntPtr)p, null);
                                                Queue.Execute(searchKernel, new long[] { 0 }, new long[] { mGlobalWorkSize }, new long[] { mLocalWorkSize }, null);
                                                Queue.Read <UInt32>(outputBuffer, true, 0, 256, (IntPtr)p, null);
                                            }
                                            sw.Stop();
                                            mSpeed = ((double)mGlobalWorkSize) / sw.Elapsed.TotalSeconds;
                                            if (consoleUpdateStopwatch.ElapsedMilliseconds >= 10 * 1000)
                                            {
                                                MainForm.Logger("Device #" + DeviceIndex + ": " + String.Format("{0:N2} Mh/s", mSpeed / (1000000)));
                                                consoleUpdateStopwatch.Restart();
                                            }
                                            if (mStratum.GetJob().ID.Equals(jobID))
                                            {
                                                for (int i = 0; i < output[255]; ++i)
                                                {
                                                    mStratum.Submit(GatelessGateDevice, work.GetJob(), startNonce + (UInt64)output[i]);
                                                }
                                            }
                                            startNonce += (UInt64)mGlobalWorkSize;
                                        }
                                    }
                                }

                    if (DAGBuffer != null)
                    {
                        DAGBuffer.Dispose();
                        DAGBuffer = null;
                    }
                }
                catch (Exception ex)
                {
                    MainForm.Logger("Exception in miner thread: " + ex.Message + ex.StackTrace);
                    MainForm.Logger("Restarting miner thread...");
                }
            }

            MarkAsDone();
        }
Beispiel #55
0
        /// <summary>
        /// Builds the kernel arguments.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="inputs">The inputs.</param>
        /// <param name="kernel">The kernel.</param>
        /// <param name="length">The length.</param>
        /// <param name="returnInputVariable">The return result.</param>
        /// <returns></returns>
        private static Dictionary <int, ComputeBuffer <TSource> > BuildKernelArguments <TSource>(object[] inputs, ComputeKernel kernel, long length, int?returnInputVariable = null) where TSource : struct
        {
            int i = 0;
            Dictionary <int, ComputeBuffer <TSource> > result = new Dictionary <int, ComputeBuffer <TSource> >();

            foreach (var item in inputs)
            {
                if (item.GetType() == typeof(TSource[]))
                {
                    var buffer = new ComputeBuffer <TSource>(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, ((TSource[])item));
                    kernel.SetMemoryArgument(i, buffer);
                    result.Add(i, buffer);
                }
                else if (item.GetType().IsPrimitive)
                {
                    kernel.SetValueArgument(i, (TSource)item);
                }

                i++;
            }

            return(result);
        }
Beispiel #56
0
        public static NdArray <Real> SingleInputForward(NdArray <Real> input, int kernelWidth, int kernelHeight, int strideX, int strideY, int padX, int padY, bool coverAll, List <int[]> outputIndicesList, Func <NdArray <Real>, int[], int, int, List <int[]>, IFunction <Real>, NdArray <Real> > getForwardResult, ComputeKernel forwardKernel, IFunction <Real> maxPooling2d)
        {
            int outputHeight = coverAll ?
                               (int)Math.Floor((input.Shape[1] - kernelHeight + padY * 2.0f + strideY - 1.0f) / strideY) + 1 :
                               (int)Math.Floor((input.Shape[1] - kernelHeight + padY * 2.0f) / strideY) + 1;
            int outputWidth = coverAll ?
                              (int)Math.Floor((input.Shape[2] - kernelWidth + padX * 2.0f + strideX - 1.0f) / strideX) + 1 :
                              (int)Math.Floor((input.Shape[2] - kernelWidth + padX * 2.0f) / strideX) + 1;

            int[] outputIndices = new int[input.Shape[0] * outputHeight * outputWidth * input.BatchCount];

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, input.Data))
                using (ComputeBuffer <int> gpuYIndex = new ComputeBuffer <int>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, outputIndices.Length))
                {
                    forwardKernel.SetMemoryArgument(0, gpuX);
                    forwardKernel.SetMemoryArgument(1, gpuYIndex);
                    forwardKernel.SetValueArgument(2, outputHeight);
                    forwardKernel.SetValueArgument(3, outputWidth);
                    forwardKernel.SetValueArgument(4, input.Shape[0]);
                    forwardKernel.SetValueArgument(5, input.Shape[1]);
                    forwardKernel.SetValueArgument(6, input.Shape[2]);
                    forwardKernel.SetValueArgument(7, kernelHeight);
                    forwardKernel.SetValueArgument(8, kernelWidth);
                    forwardKernel.SetValueArgument(9, strideX);
                    forwardKernel.SetValueArgument(10, strideY);
                    forwardKernel.SetValueArgument(11, padY);
                    forwardKernel.SetValueArgument(12, padX);

                    OpenCL.CommandQueue.Execute
                    (
                        forwardKernel,
                        null,
                        new long[] { input.BatchCount *input.Shape[0], outputHeight, outputWidth },
                        null,
                        null
                    );

                    OpenCL.CommandQueue.Finish();
                    OpenCL.CommandQueue.ReadFromBuffer(gpuYIndex, ref outputIndices, true, null);
                }

            return(getForwardResult(input, outputIndices, outputWidth, outputHeight, outputIndicesList, maxPooling2d));
        }
        public TerrainGen()
        {
            #if CPU_DEBUG
            var platform = ComputePlatform.Platforms[1];
            #else
            var platform = ComputePlatform.Platforms[0];
            #endif
            _devices = new List<ComputeDevice>();
            _devices.Add(platform.Devices[0]);
            _properties = new ComputeContextPropertyList(platform);
            _context = new ComputeContext(_devices, _properties, null, IntPtr.Zero);
            _cmdQueue = new ComputeCommandQueue(_context, _devices[0], ComputeCommandQueueFlags.None);

            #region setup generator kernel
            bool loadFromSource = Gbl.HasRawHashChanged[Gbl.RawDir.Scripts];
            loadFromSource = true;
            _chunkWidthInBlocks = Gbl.LoadContent<int>("TGen_ChunkWidthInBlocks");
            _chunkWidthInVerts = _chunkWidthInBlocks + 1;
            _blockWidth = Gbl.LoadContent<int>("TGen_BlockWidthInMeters");
            float lacunarity = Gbl.LoadContent<float>("TGen_Lacunarity");
            float gain = Gbl.LoadContent<float>("TGen_Gain");
            int octaves = Gbl.LoadContent<int>("TGen_Octaves");
            float offset = Gbl.LoadContent<float>("TGen_Offset");
            float hScale = Gbl.LoadContent<float>("TGen_HScale");
            float vScale = Gbl.LoadContent<float>("TGen_VScale");

            _genConstants = new ComputeBuffer<float>(_context, ComputeMemoryFlags.ReadOnly, 8);
            var genArr = new[]{
                lacunarity,
                gain,
                offset,
                octaves,
                hScale,
                vScale,
                _blockWidth,
                _chunkWidthInBlocks
            };

            _cmdQueue.WriteToBuffer(genArr, _genConstants, false, null);
            if (loadFromSource){
                _generationPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_Generator"));
            #if CPU_DEBUG
                _generationPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\GenTerrain.cl", null, IntPtr.Zero); //use option -I + scriptDir for header search
            #else
                _generationPrgm.Build(null, "", null, IntPtr.Zero);//use option -I + scriptDir for header search
            #endif
                Gbl.SaveBinary(_generationPrgm.Binaries, "TGen_Generator");
            }
            else{
                var binary = Gbl.LoadBinary("TGen_Generator");
                _generationPrgm = new ComputeProgram(_context, binary, _devices);
                _generationPrgm.Build(null, "", null, IntPtr.Zero);
            }
            //loadFromSource = false;

            _terrainGenKernel = _generationPrgm.CreateKernel("GenTerrain");
            _normalGenKernel = _generationPrgm.CreateKernel("GenNormals");

            //despite the script using float3 for these fields, we need to consider it to be float4 because the
            //implementation is basically a float4 wrapper that uses zero for the last variable
            _geometry = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4);
            _normals = new ComputeBuffer<ushort>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts * _chunkWidthInVerts * 4);
            _binormals = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4);
            _tangents = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4);
            _uvCoords = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*2);

            _terrainGenKernel.SetMemoryArgument(0, _genConstants);
            _terrainGenKernel.SetMemoryArgument(3, _geometry);
            _terrainGenKernel.SetMemoryArgument(4, _uvCoords);

            _normalGenKernel.SetMemoryArgument(0, _genConstants);
            _normalGenKernel.SetMemoryArgument(3, _geometry);
            _normalGenKernel.SetMemoryArgument(4, _normals);
            _normalGenKernel.SetMemoryArgument(5, _binormals);
            _normalGenKernel.SetMemoryArgument(6, _tangents);

            #endregion

            #region setup quadtree kernel

            if (loadFromSource){
                _qTreePrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_QTree"));
            #if CPU_DEBUG
                _qTreePrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\Quadtree.cl", null, IntPtr.Zero);
            #else
                _qTreePrgm.Build(null, "", null, IntPtr.Zero);
            #endif
                Gbl.SaveBinary(_qTreePrgm.Binaries, "TGen_QTree");
            }
            else{
                var binary = Gbl.LoadBinary("TGen_QTree");
                _qTreePrgm = new ComputeProgram(_context, binary, _devices);
                _qTreePrgm.Build(null, "", null, IntPtr.Zero);
            }

            _qTreeKernel = _qTreePrgm.CreateKernel("QuadTree");
            _crossCullKernel = _qTreePrgm.CreateKernel("CrossCull");

            _activeVerts = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts);

            _dummy = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, 50);
            var rawNormals = new ushort[_chunkWidthInVerts * _chunkWidthInVerts * 4];
            _emptyVerts = new byte[_chunkWidthInVerts*_chunkWidthInVerts];
            for (int i = 0; i < _emptyVerts.Length; i++){
                _emptyVerts[i] = 1;
            }
            _cmdQueue.WriteToBuffer(rawNormals, _normals, true, null);
            _cmdQueue.WriteToBuffer(_emptyVerts, _activeVerts, true, null);

            _qTreeKernel.SetValueArgument(1, _chunkWidthInBlocks);
            _qTreeKernel.SetMemoryArgument(2, _normals);
            _qTreeKernel.SetMemoryArgument(3, _activeVerts);
            _qTreeKernel.SetMemoryArgument(4, _dummy);

            _crossCullKernel.SetValueArgument(1, _chunkWidthInBlocks);
            _crossCullKernel.SetMemoryArgument(2, _normals);
            _crossCullKernel.SetMemoryArgument(3, _activeVerts);
            _crossCullKernel.SetMemoryArgument(4, _dummy);

            #endregion

            #region setup winding kernel

            if (loadFromSource){
                _winderPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_VertexWinder"));
            #if CPU_DEBUG
                _winderPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\VertexWinder.cl", null, IntPtr.Zero);
            #else
                _winderPrgm.Build(null, "", null, IntPtr.Zero);
            #endif
                Gbl.SaveBinary(_winderPrgm.Binaries, "TGen_VertexWinder");
            }
            else{
                var binary = Gbl.LoadBinary("TGen_VertexWinder");
                _winderPrgm = new ComputeProgram(_context, binary, _devices);
                _winderPrgm.Build(null, "", null, IntPtr.Zero);
            }

            _winderKernel = _winderPrgm.CreateKernel("VertexWinder");
            _indicies = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8);

            _winderKernel.SetMemoryArgument(0, _activeVerts);
            _winderKernel.SetMemoryArgument(1, _indicies);

            _emptyIndices = new int[(_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8];
            for (int i = 0; i < (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8; i++){
                _emptyIndices[i] = 0;
            }
            _cmdQueue.WriteToBuffer(_emptyIndices, _indicies, true, null);

            #endregion

            if (loadFromSource){
                Gbl.AllowMD5Refresh[Gbl.RawDir.Scripts] = true;
            }

            _cmdQueue.Finish();
        }
Beispiel #58
0
        protected virtual void buildOpenCLProgram()
        {
            if (CLSourcePaths == null)
            {
                System.Diagnostics.Trace.Write("No CL source defined.\n");
                return;
            }

            String[] sourceArray = new String[CLSourcePaths.Length];
            try
            {

                for (int i = 0; i < CLSourcePaths.Length; i++)
                {
                    StreamReader sourceReader = new StreamReader(CLSourcePaths[i]);
                    sourceArray[i] = sourceReader.ReadToEnd();
                }
            }
            catch (FileNotFoundException e)
            {
                System.Diagnostics.Trace.Write("Can't find: " + e.FileName + "\n");
                Environment.Exit(-1);
            }

            // Build and compile the OpenCL program
            _renderKernel = null;
            _renderProgram = new ComputeProgram(_commandQueue.Context, sourceArray);
            try
            {
                // build the program
                _renderProgram.Build(null, "-cl-nv-verbose", null, IntPtr.Zero);

                // create a reference a kernel function
                _renderKernel = _renderProgram.CreateKernel("render");
            }
            catch (BuildProgramFailureComputeException)
            {
                printBuildLog();

                Environment.Exit(-1);
            }
            catch (InvalidBuildOptionsComputeException)
            {
                printBuildLog();

                Environment.Exit(-1);
            }
            catch (InvalidBinaryComputeException)
            {
                printBuildLog();

                Environment.Exit(-1);
            }
        }
Beispiel #59
0
        /// <summary>
        /// Builds the kernel arguments.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="method">The method.</param>
        /// <param name="inputs">The inputs.</param>
        /// <param name="kernel">The kernel.</param>
        /// <param name="length">The length.</param>
        /// <param name="returnInputVariable">The return result.</param>
        /// <returns></returns>
        private Dictionary <int, GenericArrayMemory> BuildKernelArguments(KernelFunction method, object[] inputs, ComputeKernel kernel, long length, int?returnInputVariable = null)
        {
            int i = 0;
            Dictionary <int, GenericArrayMemory> result = new Dictionary <int, GenericArrayMemory>();

            foreach (var item in inputs)
            {
                int size = 0;
                if (item.GetType().IsArray)
                {
                    var mode = method.Parameters.ElementAt(i).Value.IOMode;
                    var flag = ComputeMemoryFlags.ReadWrite;
                    if (mode == IOMode.Out)
                    {
                        flag |= ComputeMemoryFlags.AllocateHostPointer;
                    }
                    else
                    {
                        flag |= ComputeMemoryFlags.CopyHostPointer;
                    }
                    GenericArrayMemory mem = new GenericArrayMemory(_context, flag, (Array)item);
                    kernel.SetMemoryArgument(i, mem);
                    result.Add(i, mem);
                }
                else if (item.GetType().Name == "XArray" || item.GetType().BaseType.Name == "XArray")
                {
                    var mode = method.Parameters.ElementAt(i).Value.IOMode;
                    var flag = ComputeMemoryFlags.ReadWrite;
                    if (mode == IOMode.Out)
                    {
                        flag |= ComputeMemoryFlags.AllocateHostPointer;
                    }
                    else
                    {
                        flag |= ComputeMemoryFlags.CopyHostPointer;
                    }
                    GenericArrayMemory mem = new GenericArrayMemory(_context, flag, (XArray)item);
                    kernel.SetMemoryArgument(i, mem);
                    result.Add(i, mem);
                }
                else
                {
                    size = Marshal.SizeOf(item);
                    var datagch = GCHandle.Alloc(item, GCHandleType.Pinned);
                    kernel.SetArgument(i, new IntPtr(size), datagch.AddrOfPinnedObject());
                }

                i++;
            }

            return(result);
        }
 public OpenCLCalculator(ComputeContext context, ComputeProgram prg, ComputeKernel krnl)
 {
     _context = context;
     _prg = prg;
     _krnl = krnl;
 }