public DimensionReductionFitness( CudaContext context, IDimensionAccuracy accuracyFunc, int popSize, int genLength ) { this.accuracyFunc = accuracyFunc; this.popSize = popSize; this.context = context; deviceVectorSizes = new CudaDeviceVariable <int>(popSize); fitnessKernel = context.LoadKernel( "kernels/dimensionsReductions.ptx", "fitnessFunction" ); fitnessKernel.GridDimensions = 1; fitnessKernel.BlockDimensions = popSize; Alpha = 0.7f; sizeAndIndecesKernel = context.LoadKernel("kernels/Common.ptx", "countVectorsIndeces"); sizeAndIndecesKernel.SetConstantVariable("genLength", genLength); sizeAndIndecesKernel.GridDimensions = 1; sizeAndIndecesKernel.BlockDimensions = popSize; populationIndeces = new CudaDeviceVariable <int>(genLength * popSize); }
public Layer(FileLayer fl, ref CudaContext ctx) { this.ctx = ctx; type = fl.type; size = fl.size; data = new float[fl.size.Mul]; bias = new float[fl.size.Mul]; error = new float[fl.size.Mul]; forward = ctx.LoadKernel("kernel.ptx", "Forward"); forward.GridDimensions = new dim3(size.x, size.y, size.z); forward.BlockDimensions = new dim3(fl.prevSize.x, fl.prevSize.y, fl.prevSize.z); back = ctx.LoadKernel("kernel.ptx", "Backprop"); back.GridDimensions = new dim3(size.x, size.y, size.z); back.BlockDimensions = new dim3(fl.prevSize.x, fl.prevSize.y, fl.prevSize.z); clear = ctx.LoadKernel("kernel.ptx", "Clear"); clear.GridDimensions = new dim3(size.x, size.y, size.z); activate = ctx.LoadKernel("kernel.ptx", "Activate"); activate.GridDimensions = new dim3(size.x, size.y, size.z); }
protected void InitContext() { var size = ParticlesCount * DimensionsCount; var threadsNum = 32; var blocksNum = ParticlesCount / threadsNum; Ctx = new CudaContext(0); UpdateVelocity = Ctx.LoadKernel("update_velocity_kernel.ptx", "updateVelocityKernel"); UpdateVelocity.GridDimensions = blocksNum; UpdateVelocity.BlockDimensions = threadsNum; Transpose = Ctx.LoadKernel(KernelFile, "transposeKernel"); Transpose.GridDimensions = blocksNum; Transpose.BlockDimensions = threadsNum; HostPositions = Random.RandomVector(size, -5.0, 5.0); HostVelocities = Random.RandomVector(size, -2.0, 2.0); HostPersonalBests = (double[])HostPositions.Clone(); HostPersonalBestValues = Enumerable.Repeat(double.MaxValue, ParticlesCount).ToArray(); HostNeighbors = new int[ParticlesCount * 2]; for (var i = 0; i < ParticlesCount * 2; i += 2) { int left, right; if (i == 0) { left = ParticlesCount - 1; } else { left = i - 1; } if (i == ParticlesCount - 1) { right = 0; } else { right = i + 1; } HostNeighbors[i] = left; HostNeighbors[i + 1] = right; } DevicePositions = HostPositions; DeviceVelocities = HostVelocities; DevicePersonalBests = HostPersonalBests; DevicePersonalBestValues = HostPersonalBestValues; DeviceNeighbors = HostNeighbors; Init(); }
public VectorReductionAccuracy(CudaContext context, DeviceDataSet <int> teaching, DeviceDataSet <int> test, int popSize) { this.teaching = teaching; this.test = test; this.popSize = popSize; this.context = context; calculatedNeabours = new CudaDeviceVariable <int>(teaching.length * test.length); deviceAccuracy = new CudaDeviceVariable <float>(popSize); Profiler.Start("calculate neabours"); Neabours.CalculateNeabours(context, teaching, test, calculatedNeabours, ThreadsPerBlock); Profiler.Stop("calculate neabours"); accuracyKernel = context.LoadKernel("kernels/VectorReduction.ptx", "calculateAccuracy"); dim3 gridDimension = new dim3() { x = (uint)(test.length / ThreadsPerBlock + 1), y = (uint)popSize, z = 1 }; accuracyKernel.GridDimensions = gridDimension; accuracyKernel.BlockDimensions = ThreadsPerBlock; accuracyKernel.SetConstantVariable("testVectorsCount", test.length); accuracyKernel.SetConstantVariable("teachingVectorsCount", teaching.length); accuracyKernel.SetConstantVariable("attributeCount", teaching.attributeCount); accuracyKernel.SetConstantVariable("genLength", teaching.length); K = 3; CountToPass = 2; }
public static void CalculateNeabours <T> (CudaContext context, DeviceDataSet <T> teaching, DeviceDataSet <T> test, CudaDeviceVariable <int> calculatedNeabours, int threadsPerBlock ) where T : struct { var kernel = context.LoadKernel("kernels/VectorReduction.ptx", "calculateNearestNeabours"); kernel.GridDimensions = test.length / threadsPerBlock + 1; kernel.BlockDimensions = threadsPerBlock; kernel.SetConstantVariable("testVectorsCount", test.length); kernel.SetConstantVariable("teachingVectorsCount", teaching.length); kernel.SetConstantVariable("attributeCount", teaching.attributeCount); using (var deviceDistanceMemory = new CudaDeviceVariable <float>(teaching.length * test.length)) { kernel.Run( teaching.vectors.DevicePointer, test.vectors.DevicePointer, deviceDistanceMemory.DevicePointer, calculatedNeabours.DevicePointer ); Thrust.sort_by_key_multiple(deviceDistanceMemory, calculatedNeabours, teaching.length, test.length); } }
// Testing managed CUDA call private static void RunCudaWithAKernel() { // C# Cuda code to call kernel int N = 50000; int deviceID = 0; CudaContext ctx = new CudaContext(deviceID); CudaKernel kernel = ctx.LoadKernel("kernel_x64.ptx", "VecAdd"); int numOfThreads = 256; kernel.GridDimensions = (N + numOfThreads - 1) / numOfThreads; kernel.BlockDimensions = numOfThreads; // allocate memory in host (not gpu) var h_A = InitWithData(N, numOfThreads * 4); var h_B = InitWithData(N, numOfThreads); // Allocate vectors in device memory and copy from host to device. CudaDeviceVariable <float> d_A = h_A; CudaDeviceVariable <float> d_B = h_B; CudaDeviceVariable <float> d_C = new CudaDeviceVariable <float>(N); //Invoke kernel kernel.Run(d_A.DevicePointer, d_B.DevicePointer, d_C.DevicePointer, N); Console.WriteLine("kernel has runeth"); //Copy from memory of device to host. float[] h_C = d_C; }
private void generateKernels(string forwardName, string backName, string clrName, string activeName, dim3 kernelSize) { forward = ctx.LoadKernel("kernel.ptx", forwardName); forward.GridDimensions = new dim3(size.x, size.y, size.z); forward.BlockDimensions = kernelSize; back = ctx.LoadKernel("kernel.ptx", backName); back.GridDimensions = new dim3(size.x, size.y, size.z); back.BlockDimensions = kernelSize; clear = ctx.LoadKernel("kernel.ptx", activeName); clear.GridDimensions = new dim3(size.x, size.y, size.z); activate = ctx.LoadKernel("kernel.ptx", activeName); activate.GridDimensions = new dim3(size.x, size.y, size.z); }
//Test CUDA kernel for complex multiplication public void test(int N) { CudaContext ctx = new CudaContext(); CudaKernel kernel = ctx.LoadKernel("kernel.ptx", "ComplexMultCUDA"); kernel.GridDimensions = N; kernel.BlockDimensions = 1; double2[] a = new double2[N]; double2[] b = new double2[N]; double2[] c = new double2[N]; for (int i = 0; i < N; i++) { a[i].x = 1; a[i].y = 3; b[i].x = 2; b[i].y = 2; } CudaDeviceVariable <double2> d_a = null; CudaDeviceVariable <double2> d_b = null; try { d_a = a; d_b = b; } catch (Exception e) { Console.WriteLine("{0} Exception caught.", e); return; } kernel.Run(d_a.DevicePointer, d_b.DevicePointer, N); c = d_b; Console.WriteLine("C.last()={0}+i{1}", c.Last().x, c.Last().y); }
public float BaseAccuracy() { var baseKernel = context.LoadKernel("kernels/VectorReduction.ptx", "calculateAccuracy"); dim3 gridDimension = new dim3() { x = (uint)(test.length / ThreadsPerBlock + 1), y = (uint)1, z = 1 }; baseKernel.GridDimensions = gridDimension; baseKernel.BlockDimensions = ThreadsPerBlock; baseKernel.SetConstantVariable("testVectorsCount", test.length); baseKernel.SetConstantVariable("teachingVectorsCount", teaching.length); baseKernel.SetConstantVariable("attributeCount", teaching.attributeCount); baseKernel.SetConstantVariable("genLength", teaching.length); var BaseRMSEKernel = context.LoadKernel("kernels/VectorReduction.ptx", "RMSE"); BaseRMSEKernel.GridDimensions = 1; BaseRMSEKernel.BlockDimensions = 1; BaseRMSEKernel.SetConstantVariable("testVectorsCount", test.length); byte[] gen = new byte[teaching.length]; for (int i = 0; i < gen.Length; i++) { gen[i] = 1; } using (CudaDeviceVariable <byte> deviceGen = gen) using (CudaDeviceVariable <float> baseAccuracy = new CudaDeviceVariable <float>(1)) { accuracyKernel.Run( test.classes.DevicePointer, teaching.classes.DevicePointer, deviceGen.DevicePointer, calculatedNeabours.DevicePointer, deviceAccuracy.DevicePointer ); BaseRMSEKernel.Run(baseAccuracy.DevicePointer); float[] host = baseAccuracy; return(host[0]); } }
public List <float> hypotesis(List <double> x, List <double> h, int N) { //int N = 2000000; string path = Path.GetDirectoryName(mv.plugins[0].filename); CudaContext ctx = new CudaContext(); CudaKernel kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA"); kernel.GridDimensions = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024); kernel.BlockDimensions = 1024; double[] temp_y = new double[N + h.Count - 1]; double[] temp_h = new double[N + h.Count - 1]; double[] temp_x = new double[N + h.Count - 1]; double2[] temp_x2 = new double2[N + h.Count - 1]; h.ToArray().CopyTo(temp_h, 0); x.ToArray().CopyTo(temp_x, 0); CudaDeviceVariable <double> d_x = null; CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1); CudaDeviceVariable <double> d_h = new CudaDeviceVariable <double>(N + h.Count - 1); CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1); CudaDeviceVariable <double> d_y = new CudaDeviceVariable <double>(N + h.Count - 1); CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1); CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1); try { d_h = temp_h; planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward); } catch (Exception exp) { mainView.log(exp, "CUDA error: Impulse response FFT", this); return(null); } try { d_x = temp_x; planForward.Exec(d_x.DevicePointer, d_X.DevicePointer); kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1); planInverse.Exec(d_X.DevicePointer, d_y.DevicePointer); } catch (Exception exp) { mainView.log(exp, "Cuda error: kernel run cuda error", this); } temp_y = d_y; return(Array.ConvertAll <double, float>(temp_y, d => (float)d).ToList().GetRange(500, x.Count)); }
public CountVectorKernel(CudaContext context, int vectorCount, int genLength) { this.context = context; kernel = context.LoadKernel("kernels/Common.ptx", "countVectors"); VectorCount = vectorCount; GenLength = genLength; kernel.GridDimensions = 1; }
public void LoadKernels() { performGeneticAlgorythm = context.LoadKernel("kernels/evolutionary2.ptx", "genetic"); performGeneticAlgorythm.GridDimensions = 1; performGeneticAlgorythm.BlockDimensions = popSize; performGeneticAlgorythm.DynamicSharedMemory = (uint)(sizeof(float) * popSize); }
public float BaseAccuracy() { var kernel = context.LoadKernel ( "kernels/dimensionsReductions.ptx", "geneticKnn" ); kernel.GridDimensions = new dim3() { x = (uint)(test.vectors.Size / ThreadsPerBlock) + 1, y = 1, z = 1 }; kernel.BlockDimensions = ThreadsPerBlock; kernel.SetConstantVariable("atributeCount", test.attributeCount); kernel.SetConstantVariable("teachingVectorsCount", teaching.length); kernel.SetConstantVariable("testVectorsCount", test.length); kernel.SetConstantVariable("popSize", 1); kernel.SetConstantVariable("k", K); kernel.SetConstantVariable("countToPass", CountToPass); kernel.DynamicSharedMemory = (uint)(test.attributeCount * sizeof(float)); var vectorSizes = new int[1]; vectorSizes[0] = test.attributeCount; var indeces = Enumerable.Range(0, test.attributeCount).ToArray(); var acc = new float[] { 0f }; var inCashe = new byte[] { 0 }; using (CudaDeviceVariable <int> deviceIndeces = indeces) using (CudaDeviceVariable <int> deviceVectorSizesLocal = vectorSizes) using (CudaDeviceVariable <float> accuracy = acc) using (var heapMem = new CudaDeviceVariable <HeapData>(K)) using (CudaDeviceVariable <byte> deviceIsInCashe = inCashe) { kernel.Run( test.vectors.DevicePointer, test.classes.DevicePointer, teaching.vectors.DevicePointer, teaching.classes.DevicePointer, deviceVectorSizesLocal.DevicePointer, deviceIndeces.DevicePointer, deviceIsInCashe.DevicePointer, heapMem.DevicePointer, accuracy.DevicePointer ); float[] res = accuracy; return(res[0] / test.length); } }
public Layer(Int3 size, CudaContext ctx) { this.size = size; data = new float[size.Mul]; bias = new float[size.Mul]; error = new float[size.Mul]; clear = ctx.LoadKernel("kernel.ptx", "Clear"); clear.GridDimensions = new dim3(size.x, size.y, size.z); }
public Layer(Int3 size, Layer prev, ref CudaContext ctx, int type) { this.ctx = ctx; this.type = type; this.size = size; data = new float[size.Mul]; bias = new float[size.Mul]; error = new float[size.Mul]; generateWeights(size, prev.size, kernelType.fullyConnected); forward = ctx.LoadKernel("kernel.ptx", "Forward"); forward.GridDimensions = new dim3(size.x, size.y, size.z); forward.BlockDimensions = new dim3(prev.size.x, prev.size.y, prev.size.z); back = ctx.LoadKernel("kernel.ptx", "Backprop"); back.GridDimensions = new dim3(size.x, size.y, size.z); back.BlockDimensions = new dim3(prev.size.x, prev.size.y, prev.size.z); clear = ctx.LoadKernel("kernel.ptx", "Clear"); clear.GridDimensions = new dim3(size.x, size.y, size.z); activate = ctx.LoadKernel("kernel.ptx", "Activate"); activate.GridDimensions = new dim3(size.x, size.y, size.z); SoftmaxSigma = ctx.LoadKernel("kernel.ptx", "SoftmaxSigma"); SoftmaxSigma.GridDimensions = new dim3(size.x, size.y, size.z); SoftmaxFinal = ctx.LoadKernel("kernel.ptx", "SoftmaxFinal"); SoftmaxFinal.BlockDimensions = new dim3(size.x, size.y, size.z); SoftmaxVal = new float[] { 0 }; }
public List <float> CUDA_FIR(List <float> x, List <double> h) { CudaContext ctx = new CudaContext(); //alloc data to cuda format double2[] temp_x = new double2[x.Count + h.Count - 1]; double2[] temp_h = new double2[x.Count + h.Count - 1]; double2[] temp_y = new double2[x.Count + h.Count - 1]; //data copy for (int i = 0; i < x.Count; i++) { temp_x[i].x = x[i]; } for (int i = 0; i < h.Count; i++) { temp_h[i].x = h[i]; } CudaDeviceVariable <double2> d_x = null; CudaDeviceVariable <double2> d_h = null; CudaFFTPlan1D plan1D = new CudaFFTPlan1D(x.Count + h.Count - 1, cufftType.Z2Z, 1); CudaKernel kernel = ctx.LoadKernel("kernel.ptx", "ComplexMultCUDA"); kernel.GridDimensions = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / 1024); kernel.BlockDimensions = 1024; try { d_x = temp_x; d_h = temp_h; } catch (Exception e) { //("{0} Exception caught.", e); return(null); } plan1D.Exec(d_x.DevicePointer, TransformDirection.Forward); plan1D.Exec(d_h.DevicePointer, TransformDirection.Forward); kernel.Run(d_h.DevicePointer, d_x.DevicePointer, x.Count + h.Count - 1); plan1D.Exec(d_x.DevicePointer, TransformDirection.Inverse); temp_y = d_x; return(temp_y.Select(data => (float)data.x).ToList().GetRange(h.Count / 2, x.Count)); }
private int[] ApplyRest(CudaContext context, CudaDataSet <int> data) { int vectorsCount = data.Vectors.GetLength(0); int attributeCount = data.Vectors.GetLength(1); var kernel = context.LoadKernel("kernels/drop3.ptx", "findNeighbours"); kernel.GridDimensions = data.Vectors.GetLength(0) / ThreadsPerBlock + 1; kernel.BlockDimensions = ThreadsPerBlock; using (CudaDeviceVariable <int> d_classes = data.Classes) using (CudaDeviceVariable <float> vectors = data.Vectors.Raw) using (var heapMemory = new CudaDeviceVariable <HeapData>(data.Vectors.GetLength(0) * CasheSize)) using (var nearestEnemyDistances = new CudaDeviceVariable <float>(data.Vectors.GetLength(0))) { kernel.Run( vectors.DevicePointer, data.Vectors.GetLength(0), data.Vectors.GetLength(1), CasheSize, d_classes.DevicePointer, heapMemory.DevicePointer, nearestEnemyDistances.DevicePointer ); float[] hostNearestEnemy = nearestEnemyDistances; float[][] hostVectors = data.Vectors.To2d(); var Neighbors = new FlattArray <HeapData>(heapMemory, CasheSize); var nearestNeighbors = new int[vectorsCount][]; for (int i = 0; i < vectorsCount; i++) { nearestNeighbors[i] = new int[CasheSize]; for (int j = 0; j < CasheSize; j++) { nearestNeighbors[i][j] = Neighbors[i, j].label; } } HostDataset host = data.ToHostDataSet(); SortDataDesc(host, nearestNeighbors, hostNearestEnemy); return(proccesData(context, host, nearestNeighbors)); } }
public VectorReductionFitness(CudaContext context, IVectorReductionAccuracy accuracyCalc, int popSize, int teachingCount) { this.teachingCount = teachingCount; this.accuracyCalc = accuracyCalc; this.popSize = popSize; this.context = context; countVectorsKernel = new CountVectorKernel(context, popSize, teachingCount); vectorSizes = new CudaDeviceVariable <int>(popSize); fitnessKernel = context.LoadKernel("kernels/VectorReduction.ptx", "fitnessFunction"); Alpha = 0.7f; fitnessKernel.BlockDimensions = popSize; fitnessKernel.GridDimensions = 1; }
public NeighborFinder(CudaContext context, FlattArray <float> vectors, int countToFind) { heap = new Data[countToFind]; this.vectorCount = vectors.GetLength(0); this.attrCount = vectors.GetLength(1); this.context = context; kernel = context.LoadKernel("kernels/drop3.ptx", "calculateDistances"); kernel.GridDimensions = vectors.GetLength(0) / 256 + 1; kernel.BlockDimensions = 256; results = new float[vectors.GetLength(0)]; deviceVectors = vectors.Raw; deviceResult = new CudaDeviceVariable <float>(vectors.GetLength(0)); deviceIsInDataSet = new CudaDeviceVariable <byte>(vectors.GetLength(0)); }
CudaDataSet <int> Enn(CudaDataSet <int> data, CudaContext context) { var kernel = context.LoadKernel("kernels/kernel.ptx", "enn"); kernel.GridDimensions = data.Vectors.GetLength(0) / ThreadsPerBlock + 1; kernel.BlockDimensions = ThreadsPerBlock; using (CudaDeviceVariable <float> vectors = data.Vectors.Raw) using (CudaDeviceVariable <int> classes = data.Classes) using (var heapMemory = new CudaDeviceVariable <HeapData>(data.Vectors.GetLength(0) * K)) using (var result = new CudaDeviceVariable <byte>(data.Vectors.GetLength(0))) { kernel.Run( vectors.DevicePointer, data.Vectors.GetLength(0), data.Vectors.GetLength(1), classes.DevicePointer, K, EnnCountToPass, heapMemory.DevicePointer, result.DevicePointer ); byte[] hostResult = result; List <int> indeces = new List <int>(); for (int i = 0; i < hostResult.Length; i++) { if (hostResult[i] == 1) { indeces.Add(i); } } return(data.Filter(hostResult.createIndexesToStay())); } }
public List <float> CUDA_FIR_long(List <float> x, List <double> h) { CudaContext ctx = new CudaContext(); string path = Path.GetDirectoryName(mv.plugins[0].filename); int N = 2000000; //alloc data to cuda format double2[][] temp_x = new double2[(int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1))][]; double2[] temp_h = new double2[N + h.Count - 1]; double2[][] temp_y = new double2[(int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1))][]; //data copy System.Threading.Tasks.Parallel.For(0, (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)), j => { temp_x[j] = new double2[N + h.Count - 1]; temp_y[j] = new double2[N + h.Count - 1]; for (int i = 0; (j * N + i) < x.Count && i < N; i++) { temp_x[j][i].x = x[j * N + i]; } }); for (int i = 0; i < h.Count; i++) { temp_h[i].x = h[i]; } CudaDeviceVariable <double2> d_x = null; CudaDeviceVariable <double2> d_h = null; CudaFFTPlan1D plan1D = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2Z, 1); CudaKernel kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA"); kernel.GridDimensions = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024); kernel.BlockDimensions = 1024; try { d_h = temp_h; } catch (Exception e) { //("{0} Exception caught.", e); return(null); } plan1D.Exec(d_h.DevicePointer, TransformDirection.Forward); for (int g = 0; g < (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)); g++) { try { d_x = temp_x[g]; } catch (Exception e) { mainView.log(e, "cuda alloc data error", this); return(null); } try { plan1D.Exec(d_x.DevicePointer, TransformDirection.Forward); kernel.Run(d_h.DevicePointer, d_x.DevicePointer, N + h.Count - 1); plan1D.Exec(d_x.DevicePointer, TransformDirection.Inverse); } catch (Exception exp) { mainView.log(exp, "kernel run cuda error", this); } temp_y[g] = d_x; //this.Invoke((MethodInvoker)delegate //{ // progressBar1.Value = (int)(50/ (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)))*g; //}); d_x.Dispose(); } d_h.Dispose(); plan1D.Dispose(); return(OverlapAdd(temp_y, h.Count).GetRange(h.Count / 2, x.Count)); }
protected void InitContext() { var size = ParticlesCount * DimensionsCount; var threadsNum = 32; var blocksNum = ParticlesCount / threadsNum; Ctx = new CudaContext(0); UpdateVelocity = Ctx.LoadKernel("update_velocity_kernel.ptx", "updateVelocityKernel"); UpdateVelocity.GridDimensions = blocksNum; UpdateVelocity.BlockDimensions = threadsNum; Transpose = Ctx.LoadKernel(KernelFile, "transposeKernel"); Transpose.GridDimensions = blocksNum; Transpose.BlockDimensions = threadsNum; HostPositions = Random.RandomVector(size, -5.0, 5.0); HostVelocities = Random.RandomVector(size, -2.0, 2.0); HostPersonalBests = (double[]) HostPositions.Clone(); HostPersonalBestValues = Enumerable.Repeat(double.MaxValue,ParticlesCount).ToArray(); HostNeighbors = new int[ParticlesCount * 2]; for (var i = 0; i < ParticlesCount*2; i += 2) { int left, right; if (i == 0) left = ParticlesCount - 1; else left = i - 1; if (i == ParticlesCount - 1) right = 0; else right = i + 1; HostNeighbors[i] = left; HostNeighbors[i + 1] = right; } DevicePositions = HostPositions; DeviceVelocities = HostVelocities; DevicePersonalBests = HostPersonalBests; DevicePersonalBestValues = HostPersonalBestValues; DeviceNeighbors = HostNeighbors; Init(); }
public List <float> hypotesis_long(List <double> x, List <double> h, int N) { //int N = 2000000; string path = Path.GetDirectoryName(mv.plugins[0].filename); CudaContext ctx = new CudaContext(); CudaKernel kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA"); kernel.GridDimensions = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024); kernel.BlockDimensions = 1024; int blocks = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)); double[][] temp_y = new double[blocks][]; double[] temp_h = new double[N + h.Count - 1]; double[] temp_x = new double[N + h.Count - 1]; h.ToArray().CopyTo(temp_h, 0); CudaDeviceVariable <double> d_x = null; CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1); CudaDeviceVariable <double> d_h = new CudaDeviceVariable <double>(N + h.Count - 1); CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1); //CudaDeviceVariable<double> d_y = new CudaDeviceVariable<double>(N + h.Count - 1); CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1); CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1); try { d_h = temp_h; planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward); } catch (Exception exp) { mainView.log(exp, "CUDA error: Impulse response FFT", this); return(null); } for (int g = 0; g < blocks; g++) { int P = N; if (x.Count - N * g < N) { P = x.Count - N * g; } x.GetRange(N * g, P).ToArray().CopyTo(temp_x, 0); try { d_x = temp_x; planForward.Exec(d_x.DevicePointer, d_X.DevicePointer); kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1); planInverse.Exec(d_X.DevicePointer, d_x.DevicePointer); } catch (Exception exp) { mainView.log(exp, "Cuda error: kernel run cuda error", this); } temp_y[g] = d_x; } return(OverlapAdd(temp_y, h.Count).GetRange(h.Count / 2, x.Count)); }
public List <float> hypotesis_long_save(List <double> xx, List <double> h, int N) { int n = (int)Math.Ceiling((double)(xx.Count() + 0.000000000001) / N); double[] temp_data = new double[n * (N + h.Count - 1) - (n - 1) * (h.Count - 1)]; xx.CopyTo(temp_data, h.Count - 1); List <double> x = temp_data.ToList(); //int N = 2000000; string path = Path.GetDirectoryName(mv.plugins[0].filename); CudaContext ctx = new CudaContext(); CudaKernel kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA"); kernel.GridDimensions = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024); kernel.BlockDimensions = 1024; int blocks = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)); double[][] temp_y = new double[n][]; double[] temp_h = new double[N + h.Count - 1]; double[] temp_x = new double[N + h.Count - 1]; h.ToArray().CopyTo(temp_h, 0); CudaDeviceVariable <double> d_x = null; CudaDeviceVariable <double> d_h = new CudaDeviceVariable <double>(N + h.Count - 1); CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1); //CudaDeviceVariable<double> d_y = new CudaDeviceVariable<double>(N + h.Count - 1); CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1); CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1); try { d_h = temp_h; planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward); } catch (Exception exp) { mainView.log(exp, "CUDA error: Impulse response FFT", this); return(null); } for (int g = 0; g < n; g++) { CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1); int P = N + h.Count - 1; //if (x.Count - P * g < P) P = x.Count - P * g; int L = h.Count - 1; if (g == 0) { L = 0; } x.CopyTo(P * g - L * g, temp_x, 0, P); try { d_x = temp_x; planForward.Exec(d_x.DevicePointer, d_X.DevicePointer); kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1); planInverse.Exec(d_X.DevicePointer, d_x.DevicePointer); } catch (Exception exp) { mainView.log(exp, "Cuda error: kernel run cuda error", this); } temp_y[g] = d_x; d_x.Dispose(); d_X.Dispose(); } planForward.Dispose(); planInverse.Dispose(); d_x.Dispose(); d_h.Dispose(); d_H.Dispose(); ctx.Dispose(); return(OverlapSave(temp_y, h.Count, N + h.Count - 1).GetRange(h.Count / 2, xx.Count)); }
public DimensionReductionAccuracy( CudaContext context, DeviceDataSet <int> teaching, DeviceDataSet <int> test, int popSize ) { this.popSize = popSize; this.teaching = teaching; this.test = test; this.context = context; accuracyKernel = context.LoadKernel ( "kernels/dimensionsReductions.ptx", "geneticKnn" ); accuracyKernel.GridDimensions = new dim3() { x = (uint)(test.vectors.Size / ThreadsPerBlock) + 1, y = (uint)popSize, z = 1 }; accuracyKernel.BlockDimensions = ThreadsPerBlock; K = 3; CountToPass = 2; accuracyKernel.SetConstantVariable("atributeCount", test.attributeCount); accuracyKernel.SetConstantVariable("teachingVectorsCount", teaching.length); accuracyKernel.SetConstantVariable("testVectorsCount", test.length); accuracyKernel.SetConstantVariable("popSize", popSize); accuracyKernel.DynamicSharedMemory = (uint)(test.attributeCount * sizeof(float)); saveCasheKernel = context.LoadKernel( "kernels/dimensionsReductions.ptx", "saveToCashe" ); saveCasheKernel.GridDimensions = (popSize * 32) / ThreadsPerBlock + 1; saveCasheKernel.BlockDimensions = ThreadsPerBlock; saveCasheKernel.SetConstantVariable("atributeCount", teaching.attributeCount); saveCasheKernel.SetConstantVariable("popSize", teaching.attributeCount); readCasheKernel = context.LoadKernel( "kernels/dimensionsReductions.ptx", "readCashe" ); readCasheKernel.GridDimensions = 1; readCasheKernel.BlockDimensions = popSize; readCasheKernel.SetConstantVariable("atributeCount", teaching.attributeCount); casheTreeRoot = new Node() { mutex = 0, one = (IntPtr)0, zero = (IntPtr)0, }; isInCashe = new CudaDeviceVariable <byte>(popSize); accuracy = new CudaDeviceVariable <float>(popSize); }