/// <summary> /// Does the copy from device. /// </summary> /// <typeparam name="T"></typeparam> /// <param name="devArray">The dev array.</param> /// <param name="devOffset">The dev offset.</param> /// <param name="hostArray">The host array.</param> /// <param name="hostOffset">The host offset.</param> /// <param name="count">The count.</param> protected override void DoCopyFromDevice <T>(Array devArray, int devOffset, Array hostArray, int hostOffset, int count) { EmuDevicePtrEx devPtr = (EmuDevicePtrEx)GetDeviceMemory(devArray); DoCopy <T>(devPtr.DevPtr, devPtr.Offset + devOffset, hostArray, hostOffset, count); //Array.Copy(devArray, devOffset, hostArray, hostOffset, count); }
/// <summary> /// Copies between preallocated arrays on device. /// </summary> /// <typeparam name="T">Blittable type.</typeparam> /// <param name="srcDevArray">The source device array.</param> /// <param name="dstDevArray">The destination device array.</param> public override void CopyOnDevice <T>(T[] srcDevArray, T[] dstDevArray) { EmuDevicePtrEx srcPtrEx = (EmuDevicePtrEx)GetDeviceMemory(srcDevArray); EmuDevicePtrEx dstPtrEx = (EmuDevicePtrEx)GetDeviceMemory(dstDevArray); Array.Copy(srcPtrEx.DevPtr, srcPtrEx.Offset, dstPtrEx.DevPtr, dstPtrEx.Offset, Math.Min(srcPtrEx.TotalSize, dstPtrEx.TotalSize)); }
protected override void DoCopyDeviceToDevice <T>(Array srcDevArray, int srcOffset, GPGPU peer, Array dstDevArray, int dstOffet, int count) { EmuDevicePtrEx srcPtrEx = (EmuDevicePtrEx)GetDeviceMemory(srcDevArray); EmuDevicePtrEx dstPtrEx = (EmuDevicePtrEx)peer.GetDeviceMemory(dstDevArray); Array.Copy(srcPtrEx.DevPtr, srcPtrEx.Offset + srcOffset, dstPtrEx.DevPtr, dstPtrEx.Offset + dstOffet, count); }
/// <summary> /// Does the set. /// </summary> /// <typeparam name="T"></typeparam> /// <param name="devArray">The dev array.</param> /// <param name="offset">The offset.</param> /// <param name="count">The count.</param> protected override void DoSet <T>(Array devArray, int offset = 0, int count = 0) { VerifyOnGPU(devArray); EmuDevicePtrEx ptrEx = (EmuDevicePtrEx)GetDeviceMemory(devArray); if (count == 0) { count = ptrEx.TotalSize; } Array.Clear(ptrEx.DevPtr, ptrEx.Offset + offset, count); }
protected override Array DoCast <T, U>(int offset, Array devArray, int x, int y, int z) { if (typeof(T) != typeof(U)) { throw new CudafyHostException(CudafyHostException.csX_NOT_SUPPORTED, "Casting between types on Emulator"); } T[, ,] devMemPtr = new T[0, 0, 0]; EmuDevicePtrEx ptrEx = (EmuDevicePtrEx)GetDeviceMemory(devArray); ptrEx = new EmuDevicePtrEx(offset, ptrEx.DevPtr, x, y, z); AddToDeviceMemory(devMemPtr, ptrEx); return(devMemPtr); }
// protected override void DoLaunch(dim3 gridSize, dim3 blockSize, int streamId, KernelMethodInfo gpuMethodInfo, params object[] arguments) // { // if (streamId > -1 && !_streams.ContainsKey(streamId)) // _streams.Add(streamId, streamId); // MethodInfo mi = gpuMethodInfo.Method; // if (mi == null) // throw new CudafyHostException(CudafyHostException.csX_NOT_SET, gpuMethodInfo.Name); // bool isStatic = mi.IsStatic; // object instance = isStatic ? null : Activator.CreateInstance(mi.DeclaringType); // if (gpuMethodInfo.IsDummy) // { // mi.Invoke(instance, arguments); // return; // } // //List<Type> paramTypes = new List<Type>(); // //mi.Parameters().ToList().ForEach(p => paramTypes.Add(p.ParameterType)); // MethodInvoker imi = null; // //mi.DeclaringType.DelegateForCallMethod(mi.Name, // //typeof(GThread), typeof(byte[]), typeof(long), typeof(uint[])); //mi.DelegateForCallMethod(); // StaticMethodInvoker smi = null; // if(isStatic) // smi = mi.DelegateForCallStaticMethod(); // else // imi = mi.DelegateForCallMethod(); // GGrid grid = new GGrid(gridSize); // for (int x = 0; x < gridSize.x; x++) // { // for (int y = 0; y < gridSize.y; y++) // { // int totalSize = blockSize.x * blockSize.y * blockSize.z; // Thread[] threads = new Thread[totalSize]; // IAsyncResult[] ars = new IAsyncResult[totalSize]; // GBlock blk2lnch = new GBlock(grid, blockSize, x, y); // int tCtr = 0; // for (int tx = 0; tx < blockSize.x; tx++) // { // for (int ty = 0; ty < blockSize.y; ty++) // { // GThread ht = new GThread(tx, ty, blk2lnch); // object[] pList = BuildParameterList(mi, ht, arguments); //#warning OPTIMIZATION if there is no synchronize then start and join threads in multiple of processor count - check this in disassembly and put flag in gpuMethodInfo // //threads[tCtr] = new Thread(() => // //{ // IAsyncResult ar = null; // if(isStatic) // ar = smi.BeginInvoke(pList, null, null); // else // ar = imi.BeginInvoke(instance, pList, null, null); // //if (mi.IsStatic) // // mi.Call(pList); // //else // // mi.Call(instance, pList); // // }); // //mi.Call(instance, pList); // //threads[tCtr].Name = string.Format("Grid_{0}_{1}_Thread_{2}_{3}", x, y, tx, ty); // //threads[tCtr].Start(); // //if (ctr % 16 == 0) // // Console.WriteLine("Ctr=" + ctr.ToString()); // //ctr++; // ars[tCtr] = ar; // tCtr++; // } // } // for (int i = 0; i < totalSize; i++) // { // //threads[i].Join(); // //Console.WriteLine("Thread {0} exited.", threads[i].Name); // if (isStatic) // smi.EndInvoke(ars[i]); // else // imi.EndInvoke(ars[i]); // } // } // } // } private object[] BuildParameterList2(MethodInfo mi, object[] userArgs, out Dictionary <Array, EmuDevicePtrEx> dic) { dic = new Dictionary <Array, EmuDevicePtrEx>(); List <object> prms = new List <object>(); int iArgs = 0; ParameterInfo[] piArray = mi.GetParameters(); for (int iParams = 0; iParams < piArray.Length; iParams++) { ParameterInfo pi = piArray[iParams]; if (pi.ParameterType == typeof(GThread)) { prms.Add(new GThread(0, 0, null)); } else if (iArgs < userArgs.Length) { object o = userArgs[iArgs++]; if (!(o is GThread)) { if (!pi.ParameterType.IsArray && o.GetType().IsArray&& !pi.IsOut && !pi.ParameterType.IsByRef) { EmuDevicePtrEx ptrEx = (EmuDevicePtrEx)GetDeviceMemory(o); prms.Add(ptrEx.DevPtr.GetValue(0)); //prms.Add((o as Array).GetValue(0)); } else if (pi.IsOut) { throw new CudafyHostException(CudafyHostException.csPARAMETER_PASSED_BY_REFERENCE_X_NOT_CURRENTLY_SUPPORTED, "out"); } else if (pi.ParameterType.IsByRef) { throw new CudafyHostException(CudafyHostException.csPARAMETER_PASSED_BY_REFERENCE_X_NOT_CURRENTLY_SUPPORTED, "ref"); } else if (o.GetType().IsArray) { EmuDevicePtrEx ptrEx = (EmuDevicePtrEx)GetDeviceMemory(o); if (ptrEx.Offset == 0 && ptrEx.DevPtr.Rank == pi.ParameterType.GetArrayRank()) { prms.Add(ptrEx.DevPtr); } else { Array tempArray = Array.CreateInstance(pi.ParameterType.GetElementType(), ptrEx.GetDimensions()); DoCopy(ptrEx.DevPtr, ptrEx.Offset, tempArray, 0, ptrEx.TotalSize, pi.ParameterType.GetElementType()); prms.Add(tempArray); dic.Add(tempArray, ptrEx); } } else { prms.Add(o); } } else { iParams--; } } } return(prms.ToArray()); }
/// <summary> /// Does the launch. /// </summary> /// <param name="gridSize">Size of the grid.</param> /// <param name="blockSize">Size of the block.</param> /// <param name="streamId">Stream id, or -1 for non-async.</param> /// <param name="gpuMethodInfo">The gpu method info.</param> /// <param name="arguments">The arguments.</param> protected override void DoLaunch(dim3 gridSize, dim3 blockSize, int streamId, KernelMethodInfo gpuMethodInfo, params object[] arguments) { if (streamId > -1 && !_streams.ContainsKey(streamId)) { _streams.Add(streamId, streamId); } MethodInfo mi = gpuMethodInfo.Method; if (mi == null) { throw new CudafyHostException(CudafyHostException.csX_NOT_SET, gpuMethodInfo.Name); } object instance = mi.IsStatic ? null : Activator.CreateInstance(mi.DeclaringType); if (gpuMethodInfo.IsDummy) { object[] argsCopy = new object[arguments.Length]; for (int i = 0; i < arguments.Length; i++) { if (arguments[i].GetType().IsArray) { var v = TryGetDeviceMemory(arguments[i]) as EmuDevicePtrEx; if (v != null) { if (v.Offset == 0) { argsCopy[i] = v.DevPtr; } else { throw new CudafyHostException(CudafyHostException.csX_NOT_CURRENTLY_SUPPORTED, "Offsets in arrays passed to dummy functions"); } } else { argsCopy[i] = arguments[i]; } } else { argsCopy[i] = arguments[i]; } } mi.Invoke(instance, argsCopy); return; } GGrid grid = new GGrid(gridSize); Dictionary <Array, EmuDevicePtrEx> dic; object[] pList = BuildParameterList2(mi, arguments, out dic); //object[] pListCopy = new object[0]; if (gridSize.z > 1) { throw new CudafyHostException(CudafyHostException.csX_NOT_SUPPORTED, "3D grid sizes"); } if (blockSize.z > 1) { throw new CudafyHostException(CudafyHostException.csX_NOT_SUPPORTED, "3D block sizes"); } for (int x = 0; x < gridSize.x; x++) { for (int y = 0; y < gridSize.y; y++) { int totalSize = blockSize.x * blockSize.y * blockSize.z; Thread[] threads = new Thread[totalSize]; GBlock blk2lnch = new GBlock(grid, blockSize, x, y); int tCtr = 0; int pListLen = pList.Length; for (int tx = 0; tx < blockSize.x; tx++) { for (int ty = 0; ty < blockSize.y; ty++) { GThread ht = new GThread(tx, ty, blk2lnch); object[] pListCopy = new object[pListLen]; for (int pc = 0; pc < pListLen; pc++) { if (pList[pc] is GThread) { pListCopy[pc] = ht; } else { pListCopy[pc] = pList[pc]; } } #warning OPTIMIZATION if there is no synchronize then start and join threads in multiple of processor count - check this in disassembly and put flag in gpuMethodInfo threads[tCtr] = new Thread(() => { mi.Invoke(instance, pListCopy); }); threads[tCtr].Name = string.Format("Grid_{0}_{1}_Thread_{2}_{3}", x, y, tx, ty); threads[tCtr].Start(); //if (ctr % 16 == 0) // Console.WriteLine("Ctr=" + ctr.ToString()); //ctr++; tCtr++; } } for (int i = 0; i < totalSize; i++) { threads[i].Join(); //Console.WriteLine("Thread {0} exited.", threads[i].Name); } } } int iArgs = 0; ParameterInfo[] piArray = mi.GetParameters(); for (int iParams = 0; iParams < piArray.Length; iParams++) { ParameterInfo pi = piArray[iParams]; if (pi.ParameterType == typeof(GThread)) { continue; } else if (iArgs < pList.Length) { object o = pList[iArgs++]; if (!(o is GThread)) { if (o.GetType().IsArray) { if (dic.ContainsKey(o as Array)) { EmuDevicePtrEx ptrEx = dic[o as Array]; DoCopy(o as Array, 0, ptrEx.DevPtr, ptrEx.Offset, ptrEx.TotalSize, pi.ParameterType.GetElementType()); } } } else { iParams--; } } } }