public static void ForGpu(int fromInclusiveX, int toExclusiveX, int fromInclusiveY, int toExclusiveY, Action <int, int> action, OpenCLNet.Device device) { HlGraphEntry hlGraphEntry = GetHlGraph(action.Method, 2, device); System.Diagnostics.Debug.Assert(hlGraphEntry.fromInclusiveLocation != null && hlGraphEntry.fromInclusiveLocation.Count == 2); System.Diagnostics.Debug.Assert(hlGraphEntry.toExclusiveLocation != null && hlGraphEntry.toExclusiveLocation.Count == 2); using (InvokeContext ctx = new InvokeContext(hlGraphEntry.HlGraph)) { if (hlGraphEntry.fromInclusiveLocation.Count > 0) { ctx.PutArgument(hlGraphEntry.fromInclusiveLocation[0], fromInclusiveX); } if (hlGraphEntry.toExclusiveLocation.Count > 0) { ctx.PutArgument(hlGraphEntry.toExclusiveLocation[0], toExclusiveX); } if (hlGraphEntry.fromInclusiveLocation.Count > 1) { ctx.PutArgument(hlGraphEntry.fromInclusiveLocation[1], fromInclusiveY); } if (hlGraphEntry.toExclusiveLocation.Count > 1) { ctx.PutArgument(hlGraphEntry.toExclusiveLocation[1], toExclusiveY); } DoInvoke(new int[] { toExclusiveX - fromInclusiveX, toExclusiveY - fromInclusiveY }, action.Target, hlGraphEntry, ctx, device); } }
private static HlGraphEntry ConstructKernelHlGraphEntry(MethodInfo Method, int GidParamCount) { HlGraphEntry CacheEntry = ConstructHlGraphEntry(Method, GidParamCount, true, "Cil2OpenCL_Kernel_Seq{0}"); GenerateOpenCLSource(CacheEntry); return(CacheEntry); }
internal static HlGraphEntry ConstructRelatedHlGraphEntry(MethodInfo Method, HighLevel.HlGraph ParentGraph, HlGraphCache RelatedGraphCache) { HlGraphEntry CacheEntry = ConstructHlGraphEntry(Method, 0, false, "Cil2OpenCL_Sub_Seq{0}"); RelatedGraphCache.SetValue(IntPtr.Zero, Method, CacheEntry); ParentGraph.RelatedGraphs[Method] = CacheEntry; GenerateOpenCLSource(CacheEntry); return(CacheEntry); }
internal void SetValue(IntPtr deviceId, MethodInfo methodInfo, HlGraphEntry hlGraphEntry) { lock (hlGraphCache) { if (!hlGraphCache.ContainsKey(deviceId)) { hlGraphCache[deviceId] = new Dictionary <MethodInfo, HlGraphEntry>(); } hlGraphCache[deviceId][methodInfo] = hlGraphEntry; } }
internal bool TryGetValue(IntPtr deviceId, MethodInfo methodInfo, out HlGraphEntry hlGraphEntry) { lock (hlGraphCache) { if (hlGraphCache.ContainsKey(deviceId) && hlGraphCache[deviceId].TryGetValue(methodInfo, out hlGraphEntry)) { return(true); } else { hlGraphEntry = null; return(false); } } }
private static void DoInvoke(int[] WorkSize, object Target, HlGraphEntry CacheEntry, InvokeContext ctx, OpenCLNet.Device device) { HighLevel.HlGraph HLgraph = CacheEntry.HlGraph; foreach (KeyValuePair <FieldInfo, HighLevel.ArgumentLocation> Entry in HLgraph.StaticFieldMap) { ctx.PutArgument(Entry.Value, Entry.Key.GetValue(null)); } SetArguments(ctx, Target, HLgraph.RootPathEntry); /* * foreach (KeyValuePair<FieldInfo, HighLevel.ArgumentLocation> Entry in HLgraph.ThisFieldMap) * { * ctx.PutArgument(Entry.Value, Entry.Key.GetValue(Target)); * } * foreach (KeyValuePair<FieldInfo, Dictionary<FieldInfo, HighLevel.ArgumentLocation>> Entry in HLgraph.OuterThisFieldMap) { * object RealThis = Entry.Key.GetValue(Target); * foreach (KeyValuePair<FieldInfo, HighLevel.ArgumentLocation> SubEntry in Entry.Value) { * ctx.PutArgument(SubEntry.Value, SubEntry.Key.GetValue(RealThis)); * } * }*/ foreach (KeyValuePair <HighLevel.ArgumentLocation, HighLevel.ArrayInfo> Entry in HLgraph.MultiDimensionalArrayInfo) { System.Diagnostics.Debug.Assert(Entry.Key.Index >= 0 && Entry.Key.Index < ctx.Arguments.Count); InvokeArgument BaseArrayArg = ctx.Arguments[Entry.Key.Index]; System.Diagnostics.Debug.Assert(BaseArrayArg != null && BaseArrayArg.Value != null && BaseArrayArg.Value.GetType() == Entry.Key.DataType); System.Diagnostics.Debug.Assert(Entry.Key.DataType.IsArray && Entry.Key.DataType.GetArrayRank() == Entry.Value.DimensionCount); System.Diagnostics.Debug.Assert(BaseArrayArg.Value is Array); Array BaseArray = (System.Array)BaseArrayArg.Value; long BaseFactor = 1; for (int Dimension = 1; Dimension < Entry.Value.DimensionCount; Dimension++) { int ThisDimensionLength = BaseArray.GetLength(Entry.Value.DimensionCount - 1 - (Dimension - 1)); BaseFactor *= ThisDimensionLength; ctx.PutArgument(Entry.Value.ScaleArgument[Dimension], (int)BaseFactor); } } ctx.Complete(); OpenCLInterop.CallOpenCLNet(WorkSize, CacheEntry, ctx, HLgraph, device); }
internal static void CallOpenCLNet(int[] WorkSize, HlGraphEntry CacheEntry, InvokeContext ctx, HighLevel.HlGraph HLgraph, OpenCLNet.Device device) { // We can invoke the kernel using the arguments from ctx now :) if (device == null) { device = GetFirstGpu(); if (device == null) { device = GetFirstCpu(); } if (device == null) { throw new ArgumentNullException("device"); } } OpenCLNet.Platform Platform = device.Platform; OpenCLNet.Context context; OpenCLNet.Program program; lock (CacheEntry) { context = CacheEntry.Context; if (context == null) { IntPtr[] properties = new IntPtr[] { new IntPtr((long)OpenCLNet.ContextProperties.PLATFORM), Platform.PlatformID, IntPtr.Zero, }; context = CacheEntry.Context = Platform.CreateContext(properties, new OpenCLNet.Device[] { device }, null, IntPtr.Zero); } program = CacheEntry.Program; if (program == null) { StringBuilder source = new StringBuilder(); source.Append(GetOpenCLSourceHeader(Platform, device, CacheEntry)); source.Append(CacheEntry.Source); source.Append(GetOpenCLSourceFooter(Platform, device)); program = context.CreateProgramWithSource(source.ToString()); try { program.Build(); } catch (Exception ex) { string err = program.GetBuildLog(device); throw new Exception(err, ex); } CacheEntry.Program = program; } } using (CallContext CallContext = new CallContext(context, device, OpenCLNet.CommandQueueProperties.PROFILING_ENABLE, program.CreateKernel(HLgraph.MethodName))) { OpenCLNet.CommandQueue commandQueue = CallContext.CommandQueue; for (int i = 0; i < ctx.Arguments.Count; i++) { ctx.Arguments[i].WriteToKernel(CallContext, i); } //OpenCLNet.Event StartEvent, EndEvent; //commandQueue.EnqueueMarker(out StartEvent); IntPtr[] GlobalWorkSize = new IntPtr[WorkSize.Length]; for (int i = 0; i < WorkSize.Length; i++) { GlobalWorkSize[i] = new IntPtr(WorkSize[i]); } commandQueue.EnqueueNDRangeKernel(CallContext.Kernel, (uint)GlobalWorkSize.Length, null, GlobalWorkSize, null); for (int i = 0; i < ctx.Arguments.Count; i++) { ctx.Arguments[i].ReadFromKernel(CallContext, i); } commandQueue.Finish(); //commandQueue.EnqueueMarker(out EndEvent); //commandQueue.Finish(); //ulong StartTime, EndTime; //StartEvent.GetEventProfilingInfo(OpenCLNet.ProfilingInfo.QUEUED, out StartTime); //EndEvent.GetEventProfilingInfo(OpenCLNet.ProfilingInfo.END, out EndTime); } }
private static string GetOpenCLSourceHeader(OpenCLNet.Platform platform, OpenCLNet.Device device, HlGraphEntry KernelGraphEntry) { StringBuilder result = new System.Text.StringBuilder(); result.AppendLine("// BEGIN GENERATED OpenCL"); setExtensionIfAvailable(result, device, "cl_amd_fp64"); setExtensionIfAvailable(result, device, "cl_khr_fp64"); setExtensionIfAvailable(result, device, "cl_khr_global_int32_base_atomics"); setExtensionIfAvailable(result, device, "cl_khr_global_int32_extended_atomics"); setExtensionIfAvailable(result, device, "cl_khr_local_int32_base_atomics"); setExtensionIfAvailable(result, device, "cl_khr_local_int32_extended_atomics"); if (KernelGraphEntry.HlGraph.RandomStateLocation != null) { result.AppendLine(); result.AppendLine("// Source: http://www.doc.ic.ac.uk/~dt10/research/rngs-gpu-mwc64x.html"); result.AppendLine("uint MWC64X(uint2 *state)"); result.AppendLine("{"); result.AppendLine(" enum{ A=4294883355U };"); result.AppendLine(" uint x=(*state).x, c=(*state).y; // Unpack the state"); result.AppendLine(" uint res=x^c; // Calculate the result"); result.AppendLine(" uint hi=mul_hi(x,A); // Step the RNG"); result.AppendLine(" x=x*A+c;"); result.AppendLine(" c=hi+(x<c);"); result.AppendLine(" *state=(uint2)(x,c); // Pack the state back up"); result.AppendLine(" return res; // Return the next result"); result.AppendLine("}"); } return(result.ToString()); }
private static void GenerateOpenCLSource(HlGraphEntry CacheEntry) { // Non-kernel methods include just their own code, but kernel methods include everything required if (!CacheEntry.HlGraph.IsKernel || CacheEntry.HlGraph.RelatedGraphs.Count == 0) { CacheEntry.Source = getOpenCLSource(CacheEntry.HlGraph); return; } // No recursion allowed, so we can get away with a topological sort of all involved functions // with no prototypes beforehand. // The following code has been adapted from // http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py // "Tarjan's algorithm and topological sorting implementation in Python" by Paul Harrison // Step 1: get list of all involved methods List <HighLevel.HlGraph> Nodes = new List <HighLevel.HlGraph>(); Nodes.Add(CacheEntry.HlGraph); for (int i = 0; i < Nodes.Count; i++) { HighLevel.HlGraph Entry = Nodes[i]; foreach (HlGraphEntry SubEntry in Entry.RelatedGraphs.Values) { if (!Nodes.Contains(SubEntry.HlGraph)) { Nodes.Add(SubEntry.HlGraph); } } } // Step 2: topological sort Dictionary <HighLevel.HlGraph, int> count = new Dictionary <HighLevel.HlGraph, int>(); foreach (HighLevel.HlGraph Current in Nodes) { count[Current] = 0; } foreach (HighLevel.HlGraph Current in Nodes) { foreach (HlGraphEntry SuccessorEntry in Current.RelatedGraphs.Values) { count[SuccessorEntry.HlGraph]++; } } List <HighLevel.HlGraph> Ready = new List <HighLevel.HlGraph>(); List <HighLevel.HlGraph> Result = new List <HighLevel.HlGraph>(Nodes.Count); Ready.Add(CacheEntry.HlGraph); while (Ready.Count > 0) { HighLevel.HlGraph Current = Ready[Ready.Count - 1]; Ready.RemoveAt(Ready.Count - 1); Result.Add(Current); System.Diagnostics.Debug.Assert(count[Current] == 0); foreach (HlGraphEntry Successor in Current.RelatedGraphs.Values) { count[Successor.HlGraph]--; if (count[Successor.HlGraph] == 0) { Ready.Add(Successor.HlGraph); } } } // Step 3: check for recursions. If there is any strongly-connected component, count[s] // will never reach zero, so the Ready list runs empty without all functions being // inserted into the Result list. if (Result.Count != Nodes.Count) { throw new InvalidOperationException("Unable to compute topological sort of functions. Recursions are not supported."); } // Generate code for all HlGraphs in the Result list, in reverse order Result.Reverse(); CacheEntry.Source = getOpenCLSource(Result); }
private static HlGraphEntry ConstructHlGraphEntry(MethodInfo Method, int GidParamCount, bool IsKernel, string NameTemplate) { TextWriter writer = System.Console.Out; string MethodName = string.Format(NameTemplate, HlGraphSequenceNumber++); HighLevel.HlGraph HLgraph = new HighLevel.HlGraph(Method, MethodName); HLgraph.IsKernel = IsKernel; HLgraph.ValueTypeMap = m_ValueTypeMap; if (!IsKernel && Method.DeclaringType.IsValueType && ((Method.CallingConvention & CallingConventions.HasThis) != 0)) { System.Diagnostics.Debug.Assert(HLgraph.Arguments.Count > 0); System.Diagnostics.Debug.Assert(HLgraph.Arguments[0].DataType.IsByRef && HLgraph.Arguments[0].DataType.GetElementType() == Method.DeclaringType); HLgraph.KeepThis = true; } if (DumpCode > 3) { WriteCode(HLgraph, writer); } // Optimize it (just some copy propagation and dead assignment elimination to get rid of // CIL stack accesses) HLgraph.Optimize(); if (DumpCode > 4) { WriteCode(HLgraph, writer); } // Convert all expression trees into something OpenCL can understand HLgraph.ConvertForOpenCl(subGraphCache); System.Diagnostics.Debug.Assert(HLgraph.KeepThis || !HLgraph.HasThisParameter); // Change the real first arguments (the "int"s of the Action<> method) to local variables // which get their value from OpenCL's built-in get_global_id() routine. // NOTE: ConvertArgumentToLocal removes the specified argument, so both calls need to specify // an ArgumentId of zero!!! List <HighLevel.LocalVariableLocation> IdLocation = new List <HighLevel.LocalVariableLocation>(); for (int i = 0; i < GidParamCount; i++) { IdLocation.Add(HLgraph.ConvertArgumentToLocal(0)); } // Add fromInclusive and toExclusive as additional parameters List <HighLevel.ArgumentLocation> StartIdLocation = new List <HighLevel.ArgumentLocation>(); List <HighLevel.ArgumentLocation> EndIdLocation = new List <HighLevel.ArgumentLocation>(); for (int i = 0; i < GidParamCount; i++) { StartIdLocation.Add(HLgraph.InsertArgument(i * 2 + 0, "fromInclusive" + i, typeof(int), false)); EndIdLocation.Add(HLgraph.InsertArgument(i * 2 + 1, "toExclusive" + i, typeof(int), false)); } // "i0 = get_global_id(0) + fromInclusive0;" for (int i = 0; i < GidParamCount; i++) { HLgraph.CanonicalStartBlock.Instructions.Insert(i, new HighLevel.AssignmentInstruction( new HighLevel.LocationNode(IdLocation[i]), new HighLevel.AddNode( new HighLevel.CallNode(typeof(OpenClFunctions).GetMethod("get_global_id", new Type[] { typeof(uint) }), new HighLevel.IntegerConstantNode(i)), new HighLevel.LocationNode(StartIdLocation[i]) ) ) ); } // "if (i0 >= toExclusive0) return;" if (GidParamCount > 0) { HighLevel.BasicBlock ReturnBlock = null; foreach (HighLevel.BasicBlock BB in HLgraph.BasicBlocks) { if (BB.Instructions.Count == 1 && BB.Instructions[0].InstructionType == HighLevel.InstructionType.Return) { ReturnBlock = BB; break; } } if (ReturnBlock == null) { ReturnBlock = new HighLevel.BasicBlock("CANONICAL_RETURN_BLOCK"); ReturnBlock.Instructions.Add(new HighLevel.ReturnInstruction(null)); HLgraph.BasicBlocks.Add(ReturnBlock); } ReturnBlock.LabelNameUsed = true; for (int i = 0; i < GidParamCount; i++) { HLgraph.CanonicalStartBlock.Instructions.Insert(GidParamCount + i, new HighLevel.ConditionalBranchInstruction( new HighLevel.GreaterEqualsNode( new HighLevel.LocationNode(IdLocation[i]), new HighLevel.LocationNode(EndIdLocation[i]) ), ReturnBlock ) ); } } // Create the argument to pass the random seed, if necessary if (!object.ReferenceEquals(HLgraph.RandomStateLocation, null) && HLgraph.RandomStateLocation.LocationType == HighLevel.LocationType.LocalVariable) { // This can only happen for kernels. All nested routines get a pointer to // the kernel's rnd_state instead System.Diagnostics.Debug.Assert(HLgraph.IsKernel); System.Diagnostics.Debug.Assert(object.ReferenceEquals(HLgraph.RandomSeedArgument, null)); HLgraph.RandomSeedArgument = HLgraph.CreateArgument("rnd_seed", typeof(uint), false); if (GidParamCount > 0) { HighLevel.Node LocalSeed = null; for (int i = 0; i < GidParamCount; i++) { if (LocalSeed == null) { LocalSeed = new HighLevel.LocationNode(IdLocation[i]); } else { LocalSeed = new HighLevel.AddNode( new HighLevel.LocationNode(IdLocation[i]), new HighLevel.MulNode( LocalSeed, new HighLevel.IntegerConstantNode(0x10000) /* TODO: what is a good factor here ??? */ ) ); } } HLgraph.CanonicalStartBlock.Instructions.Add(new HighLevel.AssignmentInstruction( new HighLevel.NamedFieldNode(new HighLevel.LocationNode(HLgraph.RandomStateLocation), "x", typeof(uint)), new HighLevel.AddNode( LocalSeed, new HighLevel.IntegerConstantNode(1) ) ) ); } else { HLgraph.CanonicalStartBlock.Instructions.Add(new HighLevel.AssignmentInstruction( new HighLevel.NamedFieldNode(new HighLevel.LocationNode(HLgraph.RandomStateLocation), "x", typeof(uint)), new HighLevel.IntegerConstantNode(1) ) ); } HLgraph.CanonicalStartBlock.Instructions.Add(new HighLevel.AssignmentInstruction( new HighLevel.NamedFieldNode(new HighLevel.LocationNode(HLgraph.RandomStateLocation), "y", typeof(uint)), new HighLevel.LocationNode(HLgraph.RandomSeedArgument) )); // Perform TWO warmup rounds, so our not-so-random start states // get a chance to really inflict changes to all 32 bits of // generated random numbers. HLgraph.CanonicalStartBlock.Instructions.Add(new HighLevel.AssignmentInstruction( null, new HighLevel.CallNode( typeof(OpenClFunctions).GetMethod("rnd"), new HighLevel.AddressOfNode( new HighLevel.LocationNode(HLgraph.RandomStateLocation) ) ) ) ); HLgraph.CanonicalStartBlock.Instructions.Add(new HighLevel.AssignmentInstruction( null, new HighLevel.CallNode( typeof(OpenClFunctions).GetMethod("rnd"), new HighLevel.AddressOfNode( new HighLevel.LocationNode(HLgraph.RandomStateLocation) ) ) ) ); } if (DumpCode > 5) { WriteCode(HLgraph, writer); } // Update location usage information HLgraph.AnalyzeLocationUsage(); // Finally, add the graph to the cache HlGraphEntry CacheEntry = new HlGraphEntry(HLgraph, StartIdLocation, EndIdLocation); return(CacheEntry); }