Пример #1
0
    public DimensionReductionFitness(
        CudaContext context,
        IDimensionAccuracy accuracyFunc,
        int popSize,
        int genLength
        )
    {
        this.accuracyFunc = accuracyFunc;
        this.popSize      = popSize;
        this.context      = context;

        deviceVectorSizes = new CudaDeviceVariable <int>(popSize);


        fitnessKernel = context.LoadKernel(
            "kernels/dimensionsReductions.ptx",
            "fitnessFunction"
            );
        fitnessKernel.GridDimensions  = 1;
        fitnessKernel.BlockDimensions = popSize;
        Alpha = 0.7f;

        sizeAndIndecesKernel = context.LoadKernel("kernels/Common.ptx", "countVectorsIndeces");
        sizeAndIndecesKernel.SetConstantVariable("genLength", genLength);
        sizeAndIndecesKernel.GridDimensions  = 1;
        sizeAndIndecesKernel.BlockDimensions = popSize;
        populationIndeces = new CudaDeviceVariable <int>(genLength * popSize);
    }
Пример #2
0
        public Layer(FileLayer fl, ref CudaContext ctx)
        {
            this.ctx = ctx;

            type = fl.type;
            size = fl.size;

            data  = new float[fl.size.Mul];
            bias  = new float[fl.size.Mul];
            error = new float[fl.size.Mul];

            forward = ctx.LoadKernel("kernel.ptx", "Forward");
            forward.GridDimensions  = new dim3(size.x, size.y, size.z);
            forward.BlockDimensions = new dim3(fl.prevSize.x, fl.prevSize.y, fl.prevSize.z);

            back = ctx.LoadKernel("kernel.ptx", "Backprop");
            back.GridDimensions  = new dim3(size.x, size.y, size.z);
            back.BlockDimensions = new dim3(fl.prevSize.x, fl.prevSize.y, fl.prevSize.z);

            clear = ctx.LoadKernel("kernel.ptx", "Clear");
            clear.GridDimensions = new dim3(size.x, size.y, size.z);

            activate = ctx.LoadKernel("kernel.ptx", "Activate");
            activate.GridDimensions = new dim3(size.x, size.y, size.z);
        }
Пример #3
0
        protected void InitContext()
        {
            var size = ParticlesCount * DimensionsCount;

            var threadsNum = 32;
            var blocksNum  = ParticlesCount / threadsNum;

            Ctx = new CudaContext(0);

            UpdateVelocity = Ctx.LoadKernel("update_velocity_kernel.ptx", "updateVelocityKernel");
            UpdateVelocity.GridDimensions  = blocksNum;
            UpdateVelocity.BlockDimensions = threadsNum;

            Transpose = Ctx.LoadKernel(KernelFile, "transposeKernel");
            Transpose.GridDimensions  = blocksNum;
            Transpose.BlockDimensions = threadsNum;

            HostPositions          = Random.RandomVector(size, -5.0, 5.0);
            HostVelocities         = Random.RandomVector(size, -2.0, 2.0);
            HostPersonalBests      = (double[])HostPositions.Clone();
            HostPersonalBestValues = Enumerable.Repeat(double.MaxValue, ParticlesCount).ToArray();

            HostNeighbors = new int[ParticlesCount * 2];

            for (var i = 0; i < ParticlesCount * 2; i += 2)
            {
                int left, right;

                if (i == 0)
                {
                    left = ParticlesCount - 1;
                }
                else
                {
                    left = i - 1;
                }

                if (i == ParticlesCount - 1)
                {
                    right = 0;
                }
                else
                {
                    right = i + 1;
                }

                HostNeighbors[i]     = left;
                HostNeighbors[i + 1] = right;
            }

            DevicePositions          = HostPositions;
            DeviceVelocities         = HostVelocities;
            DevicePersonalBests      = HostPersonalBests;
            DevicePersonalBestValues = HostPersonalBestValues;
            DeviceNeighbors          = HostNeighbors;

            Init();
        }
Пример #4
0
    public VectorReductionAccuracy(CudaContext context, DeviceDataSet <int> teaching, DeviceDataSet <int> test, int popSize)
    {
        this.teaching = teaching;
        this.test     = test;
        this.popSize  = popSize;
        this.context  = context;

        calculatedNeabours = new CudaDeviceVariable <int>(teaching.length * test.length);
        deviceAccuracy     = new CudaDeviceVariable <float>(popSize);

        Profiler.Start("calculate neabours");
        Neabours.CalculateNeabours(context, teaching, test, calculatedNeabours, ThreadsPerBlock);
        Profiler.Stop("calculate neabours");


        accuracyKernel = context.LoadKernel("kernels/VectorReduction.ptx", "calculateAccuracy");
        dim3 gridDimension = new dim3()
        {
            x = (uint)(test.length / ThreadsPerBlock + 1),
            y = (uint)popSize,
            z = 1
        };

        accuracyKernel.GridDimensions  = gridDimension;
        accuracyKernel.BlockDimensions = ThreadsPerBlock;

        accuracyKernel.SetConstantVariable("testVectorsCount", test.length);
        accuracyKernel.SetConstantVariable("teachingVectorsCount", teaching.length);
        accuracyKernel.SetConstantVariable("attributeCount", teaching.attributeCount);
        accuracyKernel.SetConstantVariable("genLength", teaching.length);

        K           = 3;
        CountToPass = 2;
    }
Пример #5
0
    public static void CalculateNeabours <T>
        (CudaContext context,
        DeviceDataSet <T> teaching,
        DeviceDataSet <T> test,
        CudaDeviceVariable <int> calculatedNeabours,
        int threadsPerBlock
        ) where T : struct
    {
        var kernel = context.LoadKernel("kernels/VectorReduction.ptx", "calculateNearestNeabours");

        kernel.GridDimensions  = test.length / threadsPerBlock + 1;
        kernel.BlockDimensions = threadsPerBlock;

        kernel.SetConstantVariable("testVectorsCount", test.length);
        kernel.SetConstantVariable("teachingVectorsCount", teaching.length);
        kernel.SetConstantVariable("attributeCount", teaching.attributeCount);

        using (var deviceDistanceMemory =
                   new CudaDeviceVariable <float>(teaching.length * test.length))
        {
            kernel.Run(
                teaching.vectors.DevicePointer,
                test.vectors.DevicePointer,
                deviceDistanceMemory.DevicePointer,
                calculatedNeabours.DevicePointer
                );
            Thrust.sort_by_key_multiple(deviceDistanceMemory, calculatedNeabours, teaching.length, test.length);
        }
    }
Пример #6
0
        // Testing managed CUDA call
        private static void RunCudaWithAKernel()
        {
            // C# Cuda code to call kernel

            int         N            = 50000;
            int         deviceID     = 0;
            CudaContext ctx          = new CudaContext(deviceID);
            CudaKernel  kernel       = ctx.LoadKernel("kernel_x64.ptx", "VecAdd");
            int         numOfThreads = 256;

            kernel.GridDimensions  = (N + numOfThreads - 1) / numOfThreads;
            kernel.BlockDimensions = numOfThreads;

            // allocate memory in host (not gpu)
            var h_A = InitWithData(N, numOfThreads * 4);
            var h_B = InitWithData(N, numOfThreads);

            // Allocate vectors in device memory and copy from host to device.
            CudaDeviceVariable <float> d_A = h_A;
            CudaDeviceVariable <float> d_B = h_B;
            CudaDeviceVariable <float> d_C = new CudaDeviceVariable <float>(N);

            //Invoke kernel
            kernel.Run(d_A.DevicePointer, d_B.DevicePointer, d_C.DevicePointer, N);

            Console.WriteLine("kernel has runeth");
            //Copy from memory of device to host.
            float[] h_C = d_C;
        }
Пример #7
0
        private void generateKernels(string forwardName, string backName, string clrName, string activeName, dim3 kernelSize)
        {
            forward = ctx.LoadKernel("kernel.ptx", forwardName);
            forward.GridDimensions  = new dim3(size.x, size.y, size.z);
            forward.BlockDimensions = kernelSize;

            back = ctx.LoadKernel("kernel.ptx", backName);
            back.GridDimensions  = new dim3(size.x, size.y, size.z);
            back.BlockDimensions = kernelSize;

            clear = ctx.LoadKernel("kernel.ptx", activeName);
            clear.GridDimensions = new dim3(size.x, size.y, size.z);

            activate = ctx.LoadKernel("kernel.ptx", activeName);
            activate.GridDimensions = new dim3(size.x, size.y, size.z);
        }
Пример #8
0
        //Test CUDA kernel for complex multiplication
        public void test(int N)
        {
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel("kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = N;
            kernel.BlockDimensions = 1;
            double2[] a = new double2[N];
            double2[] b = new double2[N];
            double2[] c = new double2[N];
            for (int i = 0; i < N; i++)
            {
                a[i].x = 1;
                a[i].y = 3;
                b[i].x = 2;
                b[i].y = 2;
            }

            CudaDeviceVariable <double2> d_a = null;
            CudaDeviceVariable <double2> d_b = null;

            try
            {
                d_a = a;
                d_b = b;
            }
            catch (Exception e)
            {
                Console.WriteLine("{0} Exception caught.", e);
                return;
            }
            kernel.Run(d_a.DevicePointer, d_b.DevicePointer, N);
            c = d_b;
            Console.WriteLine("C.last()={0}+i{1}", c.Last().x, c.Last().y);
        }
Пример #9
0
    public float BaseAccuracy()
    {
        var  baseKernel    = context.LoadKernel("kernels/VectorReduction.ptx", "calculateAccuracy");
        dim3 gridDimension = new dim3()
        {
            x = (uint)(test.length / ThreadsPerBlock + 1),
            y = (uint)1,
            z = 1
        };

        baseKernel.GridDimensions  = gridDimension;
        baseKernel.BlockDimensions = ThreadsPerBlock;

        baseKernel.SetConstantVariable("testVectorsCount", test.length);
        baseKernel.SetConstantVariable("teachingVectorsCount", teaching.length);
        baseKernel.SetConstantVariable("attributeCount", teaching.attributeCount);
        baseKernel.SetConstantVariable("genLength", teaching.length);

        var BaseRMSEKernel = context.LoadKernel("kernels/VectorReduction.ptx", "RMSE");

        BaseRMSEKernel.GridDimensions  = 1;
        BaseRMSEKernel.BlockDimensions = 1;
        BaseRMSEKernel.SetConstantVariable("testVectorsCount", test.length);

        byte[] gen = new byte[teaching.length];
        for (int i = 0; i < gen.Length; i++)
        {
            gen[i] = 1;
        }

        using (CudaDeviceVariable <byte> deviceGen = gen)
            using (CudaDeviceVariable <float> baseAccuracy = new CudaDeviceVariable <float>(1))
            {
                accuracyKernel.Run(
                    test.classes.DevicePointer,
                    teaching.classes.DevicePointer,
                    deviceGen.DevicePointer,
                    calculatedNeabours.DevicePointer,
                    deviceAccuracy.DevicePointer
                    );

                BaseRMSEKernel.Run(baseAccuracy.DevicePointer);

                float[] host = baseAccuracy;
                return(host[0]);
            }
    }
Пример #10
0
        public List <float> hypotesis(List <double> x, List <double> h, int N)
        {
            //int N = 2000000;

            string      path   = Path.GetDirectoryName(mv.plugins[0].filename);
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            double[] temp_y = new double[N + h.Count - 1];
            double[] temp_h = new double[N + h.Count - 1];
            double[] temp_x = new double[N + h.Count - 1];

            double2[] temp_x2 = new double2[N + h.Count - 1];


            h.ToArray().CopyTo(temp_h, 0);
            x.ToArray().CopyTo(temp_x, 0);

            CudaDeviceVariable <double>  d_x = null;
            CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1);

            CudaDeviceVariable <double>  d_h = new CudaDeviceVariable <double>(N + h.Count - 1);
            CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1);

            CudaDeviceVariable <double> d_y = new CudaDeviceVariable <double>(N + h.Count - 1);


            CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1);
            CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1);

            try
            {
                d_h = temp_h;
                planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "CUDA error: Impulse response FFT", this);
                return(null);
            }

            try
            {
                d_x = temp_x;
                planForward.Exec(d_x.DevicePointer, d_X.DevicePointer);
                kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1);
                planInverse.Exec(d_X.DevicePointer, d_y.DevicePointer);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "Cuda error: kernel run cuda error", this);
            }
            temp_y = d_y;

            return(Array.ConvertAll <double, float>(temp_y, d => (float)d).ToList().GetRange(500, x.Count));
        }
 public CountVectorKernel(CudaContext context, int vectorCount, int genLength)
 {
     this.context          = context;
     kernel                = context.LoadKernel("kernels/Common.ptx", "countVectors");
     VectorCount           = vectorCount;
     GenLength             = genLength;
     kernel.GridDimensions = 1;
 }
Пример #12
0
 public void LoadKernels()
 {
     performGeneticAlgorythm = context.LoadKernel("kernels/evolutionary2.ptx", "genetic");
     performGeneticAlgorythm.GridDimensions      = 1;
     performGeneticAlgorythm.BlockDimensions     = popSize;
     performGeneticAlgorythm.DynamicSharedMemory =
         (uint)(sizeof(float) * popSize);
 }
    public float BaseAccuracy()
    {
        var kernel = context.LoadKernel
                     (
            "kernels/dimensionsReductions.ptx",
            "geneticKnn"
                     );

        kernel.GridDimensions = new dim3()
        {
            x = (uint)(test.vectors.Size / ThreadsPerBlock) + 1,
            y = 1,
            z = 1
        };
        kernel.BlockDimensions = ThreadsPerBlock;

        kernel.SetConstantVariable("atributeCount", test.attributeCount);
        kernel.SetConstantVariable("teachingVectorsCount", teaching.length);
        kernel.SetConstantVariable("testVectorsCount", test.length);
        kernel.SetConstantVariable("popSize", 1);
        kernel.SetConstantVariable("k", K);
        kernel.SetConstantVariable("countToPass", CountToPass);

        kernel.DynamicSharedMemory = (uint)(test.attributeCount * sizeof(float));

        var vectorSizes = new int[1];

        vectorSizes[0] = test.attributeCount;

        var indeces = Enumerable.Range(0, test.attributeCount).ToArray();
        var acc     = new float[] { 0f };
        var inCashe = new byte[] { 0 };

        using (CudaDeviceVariable <int> deviceIndeces = indeces)
            using (CudaDeviceVariable <int> deviceVectorSizesLocal = vectorSizes)
                using (CudaDeviceVariable <float> accuracy = acc)
                    using (var heapMem = new CudaDeviceVariable <HeapData>(K))
                        using (CudaDeviceVariable <byte> deviceIsInCashe = inCashe)
                        {
                            kernel.Run(
                                test.vectors.DevicePointer,
                                test.classes.DevicePointer,
                                teaching.vectors.DevicePointer,
                                teaching.classes.DevicePointer,
                                deviceVectorSizesLocal.DevicePointer,
                                deviceIndeces.DevicePointer,
                                deviceIsInCashe.DevicePointer,
                                heapMem.DevicePointer,
                                accuracy.DevicePointer
                                );

                            float[] res = accuracy;
                            return(res[0] / test.length);
                        }
    }
Пример #14
0
        public Layer(Int3 size, CudaContext ctx)
        {
            this.size = size;

            data  = new float[size.Mul];
            bias  = new float[size.Mul];
            error = new float[size.Mul];

            clear = ctx.LoadKernel("kernel.ptx", "Clear");
            clear.GridDimensions = new dim3(size.x, size.y, size.z);
        }
Пример #15
0
        public Layer(Int3 size, Layer prev, ref CudaContext ctx, int type)
        {
            this.ctx = ctx;

            this.type = type;
            this.size = size;

            data  = new float[size.Mul];
            bias  = new float[size.Mul];
            error = new float[size.Mul];

            generateWeights(size, prev.size, kernelType.fullyConnected);

            forward = ctx.LoadKernel("kernel.ptx", "Forward");
            forward.GridDimensions  = new dim3(size.x, size.y, size.z);
            forward.BlockDimensions = new dim3(prev.size.x, prev.size.y, prev.size.z);

            back = ctx.LoadKernel("kernel.ptx", "Backprop");
            back.GridDimensions  = new dim3(size.x, size.y, size.z);
            back.BlockDimensions = new dim3(prev.size.x, prev.size.y, prev.size.z);

            clear = ctx.LoadKernel("kernel.ptx", "Clear");
            clear.GridDimensions = new dim3(size.x, size.y, size.z);

            activate = ctx.LoadKernel("kernel.ptx", "Activate");
            activate.GridDimensions = new dim3(size.x, size.y, size.z);

            SoftmaxSigma = ctx.LoadKernel("kernel.ptx", "SoftmaxSigma");
            SoftmaxSigma.GridDimensions = new dim3(size.x, size.y, size.z);

            SoftmaxFinal = ctx.LoadKernel("kernel.ptx", "SoftmaxFinal");
            SoftmaxFinal.BlockDimensions = new dim3(size.x, size.y, size.z);

            SoftmaxVal = new float[] { 0 };
        }
Пример #16
0
        public List <float> CUDA_FIR(List <float> x, List <double> h)
        {
            CudaContext ctx = new CudaContext();

            //alloc data to cuda format
            double2[] temp_x = new double2[x.Count + h.Count - 1];
            double2[] temp_h = new double2[x.Count + h.Count - 1];
            double2[] temp_y = new double2[x.Count + h.Count - 1];

            //data copy
            for (int i = 0; i < x.Count; i++)
            {
                temp_x[i].x = x[i];
            }
            for (int i = 0; i < h.Count; i++)
            {
                temp_h[i].x = h[i];
            }


            CudaDeviceVariable <double2> d_x = null;
            CudaDeviceVariable <double2> d_h = null;


            CudaFFTPlan1D plan1D = new CudaFFTPlan1D(x.Count + h.Count - 1, cufftType.Z2Z, 1);
            CudaKernel    kernel = ctx.LoadKernel("kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            try
            {
                d_x = temp_x;
                d_h = temp_h;
            }
            catch (Exception e)
            {
                //("{0} Exception caught.", e);
                return(null);
            }

            plan1D.Exec(d_x.DevicePointer, TransformDirection.Forward);
            plan1D.Exec(d_h.DevicePointer, TransformDirection.Forward);
            kernel.Run(d_h.DevicePointer, d_x.DevicePointer, x.Count + h.Count - 1);
            plan1D.Exec(d_x.DevicePointer, TransformDirection.Inverse);
            temp_y = d_x;


            return(temp_y.Select(data => (float)data.x).ToList().GetRange(h.Count / 2, x.Count));
        }
    private int[] ApplyRest(CudaContext context, CudaDataSet <int> data)
    {
        int vectorsCount   = data.Vectors.GetLength(0);
        int attributeCount = data.Vectors.GetLength(1);

        var kernel = context.LoadKernel("kernels/drop3.ptx", "findNeighbours");

        kernel.GridDimensions  = data.Vectors.GetLength(0) / ThreadsPerBlock + 1;
        kernel.BlockDimensions = ThreadsPerBlock;

        using (CudaDeviceVariable <int> d_classes = data.Classes)
            using (CudaDeviceVariable <float> vectors = data.Vectors.Raw)
                using (var heapMemory = new CudaDeviceVariable <HeapData>(data.Vectors.GetLength(0) * CasheSize))
                    using (var nearestEnemyDistances = new CudaDeviceVariable <float>(data.Vectors.GetLength(0)))
                    {
                        kernel.Run(
                            vectors.DevicePointer,
                            data.Vectors.GetLength(0),
                            data.Vectors.GetLength(1),
                            CasheSize,
                            d_classes.DevicePointer,
                            heapMemory.DevicePointer,
                            nearestEnemyDistances.DevicePointer
                            );



                        float[]   hostNearestEnemy = nearestEnemyDistances;
                        float[][] hostVectors      = data.Vectors.To2d();

                        var Neighbors        = new FlattArray <HeapData>(heapMemory, CasheSize);
                        var nearestNeighbors = new int[vectorsCount][];
                        for (int i = 0; i < vectorsCount; i++)
                        {
                            nearestNeighbors[i] = new int[CasheSize];

                            for (int j = 0; j < CasheSize; j++)
                            {
                                nearestNeighbors[i][j] = Neighbors[i, j].label;
                            }
                        }

                        HostDataset host = data.ToHostDataSet();
                        SortDataDesc(host, nearestNeighbors, hostNearestEnemy);


                        return(proccesData(context, host, nearestNeighbors));
                    }
    }
    public VectorReductionFitness(CudaContext context, IVectorReductionAccuracy accuracyCalc, int popSize, int teachingCount)
    {
        this.teachingCount = teachingCount;
        this.accuracyCalc  = accuracyCalc;
        this.popSize       = popSize;
        this.context       = context;
        countVectorsKernel = new CountVectorKernel(context, popSize, teachingCount);
        vectorSizes        = new CudaDeviceVariable <int>(popSize);


        fitnessKernel = context.LoadKernel("kernels/VectorReduction.ptx", "fitnessFunction");
        Alpha         = 0.7f;
        fitnessKernel.BlockDimensions = popSize;
        fitnessKernel.GridDimensions  = 1;
    }
    public NeighborFinder(CudaContext context, FlattArray <float> vectors, int countToFind)
    {
        heap             = new Data[countToFind];
        this.vectorCount = vectors.GetLength(0);
        this.attrCount   = vectors.GetLength(1);
        this.context     = context;

        kernel = context.LoadKernel("kernels/drop3.ptx", "calculateDistances");
        kernel.GridDimensions  = vectors.GetLength(0) / 256 + 1;
        kernel.BlockDimensions = 256;
        results = new float[vectors.GetLength(0)];

        deviceVectors     = vectors.Raw;
        deviceResult      = new CudaDeviceVariable <float>(vectors.GetLength(0));
        deviceIsInDataSet = new CudaDeviceVariable <byte>(vectors.GetLength(0));
    }
    CudaDataSet <int> Enn(CudaDataSet <int> data, CudaContext context)
    {
        var kernel = context.LoadKernel("kernels/kernel.ptx", "enn");

        kernel.GridDimensions  = data.Vectors.GetLength(0) / ThreadsPerBlock + 1;
        kernel.BlockDimensions = ThreadsPerBlock;


        using (CudaDeviceVariable <float> vectors = data.Vectors.Raw)
            using (CudaDeviceVariable <int> classes = data.Classes)
                using (var heapMemory = new CudaDeviceVariable <HeapData>(data.Vectors.GetLength(0) * K))
                    using (var result = new CudaDeviceVariable <byte>(data.Vectors.GetLength(0)))
                    {
                        kernel.Run(
                            vectors.DevicePointer,
                            data.Vectors.GetLength(0),
                            data.Vectors.GetLength(1),
                            classes.DevicePointer,
                            K,
                            EnnCountToPass,
                            heapMemory.DevicePointer,
                            result.DevicePointer
                            );

                        byte[]     hostResult = result;
                        List <int> indeces    = new List <int>();
                        for (int i = 0; i < hostResult.Length; i++)
                        {
                            if (hostResult[i] == 1)
                            {
                                indeces.Add(i);
                            }
                        }

                        return(data.Filter(hostResult.createIndexesToStay()));
                    }
    }
Пример #21
0
        public List <float> CUDA_FIR_long(List <float> x, List <double> h)
        {
            CudaContext ctx  = new CudaContext();
            string      path = Path.GetDirectoryName(mv.plugins[0].filename);


            int N = 2000000;

            //alloc data to cuda format
            double2[][] temp_x = new double2[(int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1))][];
            double2[]   temp_h = new double2[N + h.Count - 1];
            double2[][] temp_y = new double2[(int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1))][];


            //data copy

            System.Threading.Tasks.Parallel.For(0, (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)), j => {
                temp_x[j] = new double2[N + h.Count - 1];
                temp_y[j] = new double2[N + h.Count - 1];
                for (int i = 0; (j * N + i) < x.Count && i < N; i++)
                {
                    temp_x[j][i].x = x[j * N + i];
                }
            });

            for (int i = 0; i < h.Count; i++)
            {
                temp_h[i].x = h[i];
            }



            CudaDeviceVariable <double2> d_x = null;
            CudaDeviceVariable <double2> d_h = null;


            CudaFFTPlan1D plan1D = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2Z, 1);

            CudaKernel kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            try
            {
                d_h = temp_h;
            }
            catch (Exception e)
            {
                //("{0} Exception caught.", e);
                return(null);
            }
            plan1D.Exec(d_h.DevicePointer, TransformDirection.Forward);

            for (int g = 0; g < (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)); g++)
            {
                try
                {
                    d_x = temp_x[g];
                }
                catch (Exception e)
                {
                    mainView.log(e, "cuda alloc data error", this);
                    return(null);
                }

                try
                {
                    plan1D.Exec(d_x.DevicePointer, TransformDirection.Forward);
                    kernel.Run(d_h.DevicePointer, d_x.DevicePointer, N + h.Count - 1);
                    plan1D.Exec(d_x.DevicePointer, TransformDirection.Inverse);
                }
                catch (Exception exp)
                {
                    mainView.log(exp, "kernel run cuda error", this);
                }
                temp_y[g] = d_x;

                //this.Invoke((MethodInvoker)delegate
                //{
                //    progressBar1.Value = (int)(50/ (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)))*g;

                //});
                d_x.Dispose();
            }
            d_h.Dispose();

            plan1D.Dispose();

            return(OverlapAdd(temp_y, h.Count).GetRange(h.Count / 2, x.Count));
        }
Пример #22
0
        protected void InitContext()
        {
            var size = ParticlesCount * DimensionsCount;

            var threadsNum = 32;
            var blocksNum = ParticlesCount / threadsNum;
            Ctx = new CudaContext(0);

            UpdateVelocity = Ctx.LoadKernel("update_velocity_kernel.ptx", "updateVelocityKernel");
            UpdateVelocity.GridDimensions = blocksNum;
            UpdateVelocity.BlockDimensions = threadsNum;

            Transpose = Ctx.LoadKernel(KernelFile, "transposeKernel");
            Transpose.GridDimensions = blocksNum;
            Transpose.BlockDimensions = threadsNum;

            HostPositions = Random.RandomVector(size, -5.0, 5.0);
            HostVelocities = Random.RandomVector(size, -2.0, 2.0);
            HostPersonalBests = (double[]) HostPositions.Clone();
            HostPersonalBestValues = Enumerable.Repeat(double.MaxValue,ParticlesCount).ToArray();

            HostNeighbors = new int[ParticlesCount * 2];

            for (var i = 0; i < ParticlesCount*2; i += 2)
            {
                int left, right;

                if (i == 0)
                    left = ParticlesCount - 1;
                else
                    left = i - 1;

                if (i == ParticlesCount - 1)
                    right = 0;
                else
                    right = i + 1;

                HostNeighbors[i] = left;
                HostNeighbors[i + 1] = right;
            }

            DevicePositions = HostPositions;
            DeviceVelocities = HostVelocities;
            DevicePersonalBests = HostPersonalBests;
            DevicePersonalBestValues = HostPersonalBestValues;
            DeviceNeighbors = HostNeighbors;

            Init();
        }
Пример #23
0
        public List <float> hypotesis_long(List <double> x, List <double> h, int N)
        {
            //int N = 2000000;

            string      path   = Path.GetDirectoryName(mv.plugins[0].filename);
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            int blocks = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1));

            double[][] temp_y = new double[blocks][];
            double[]   temp_h = new double[N + h.Count - 1];
            double[]   temp_x = new double[N + h.Count - 1];


            h.ToArray().CopyTo(temp_h, 0);


            CudaDeviceVariable <double>  d_x = null;
            CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1);

            CudaDeviceVariable <double>  d_h = new CudaDeviceVariable <double>(N + h.Count - 1);
            CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1);

            //CudaDeviceVariable<double> d_y = new CudaDeviceVariable<double>(N + h.Count - 1);


            CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1);
            CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1);

            try
            {
                d_h = temp_h;
                planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "CUDA error: Impulse response FFT", this);
                return(null);
            }

            for (int g = 0; g < blocks; g++)
            {
                int P = N;
                if (x.Count - N * g < N)
                {
                    P = x.Count - N * g;
                }

                x.GetRange(N * g, P).ToArray().CopyTo(temp_x, 0);

                try
                {
                    d_x = temp_x;
                    planForward.Exec(d_x.DevicePointer, d_X.DevicePointer);
                    kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1);
                    planInverse.Exec(d_X.DevicePointer, d_x.DevicePointer);
                }
                catch (Exception exp)
                {
                    mainView.log(exp, "Cuda error: kernel run cuda error", this);
                }

                temp_y[g] = d_x;
            }

            return(OverlapAdd(temp_y, h.Count).GetRange(h.Count / 2, x.Count));
        }
Пример #24
0
        public List <float> hypotesis_long_save(List <double> xx, List <double> h, int N)
        {
            int n = (int)Math.Ceiling((double)(xx.Count() + 0.000000000001) / N);

            double[] temp_data = new double[n * (N + h.Count - 1) - (n - 1) * (h.Count - 1)];
            xx.CopyTo(temp_data, h.Count - 1);
            List <double> x = temp_data.ToList();
            //int N = 2000000;

            string      path   = Path.GetDirectoryName(mv.plugins[0].filename);
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            int blocks = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1));

            double[][] temp_y = new double[n][];
            double[]   temp_h = new double[N + h.Count - 1];
            double[]   temp_x = new double[N + h.Count - 1];


            h.ToArray().CopyTo(temp_h, 0);


            CudaDeviceVariable <double> d_x = null;



            CudaDeviceVariable <double>  d_h = new CudaDeviceVariable <double>(N + h.Count - 1);
            CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1);

            //CudaDeviceVariable<double> d_y = new CudaDeviceVariable<double>(N + h.Count - 1);


            CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1);
            CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1);

            try
            {
                d_h = temp_h;
                planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "CUDA error: Impulse response FFT", this);
                return(null);
            }


            for (int g = 0; g < n; g++)
            {
                CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1);
                int P = N + h.Count - 1;
                //if (x.Count - P * g < P) P = x.Count - P * g;
                int L = h.Count - 1;
                if (g == 0)
                {
                    L = 0;
                }

                x.CopyTo(P * g - L * g, temp_x, 0, P);

                try
                {
                    d_x = temp_x;
                    planForward.Exec(d_x.DevicePointer, d_X.DevicePointer);
                    kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1);
                    planInverse.Exec(d_X.DevicePointer, d_x.DevicePointer);
                }
                catch (Exception exp)
                {
                    mainView.log(exp, "Cuda error: kernel run cuda error", this);
                }

                temp_y[g] = d_x;
                d_x.Dispose();
                d_X.Dispose();
            }
            planForward.Dispose();
            planInverse.Dispose();
            d_x.Dispose();

            d_h.Dispose();
            d_H.Dispose();
            ctx.Dispose();

            return(OverlapSave(temp_y, h.Count, N + h.Count - 1).GetRange(h.Count / 2, xx.Count));
        }
    public DimensionReductionAccuracy(
        CudaContext context,
        DeviceDataSet <int> teaching,
        DeviceDataSet <int> test,
        int popSize
        )
    {
        this.popSize  = popSize;
        this.teaching = teaching;
        this.test     = test;
        this.context  = context;

        accuracyKernel = context.LoadKernel
                         (
            "kernels/dimensionsReductions.ptx",
            "geneticKnn"
                         );

        accuracyKernel.GridDimensions = new dim3()
        {
            x = (uint)(test.vectors.Size / ThreadsPerBlock) + 1,
            y = (uint)popSize,
            z = 1
        };
        accuracyKernel.BlockDimensions = ThreadsPerBlock;

        K           = 3;
        CountToPass = 2;
        accuracyKernel.SetConstantVariable("atributeCount", test.attributeCount);
        accuracyKernel.SetConstantVariable("teachingVectorsCount", teaching.length);
        accuracyKernel.SetConstantVariable("testVectorsCount", test.length);
        accuracyKernel.SetConstantVariable("popSize", popSize);
        accuracyKernel.DynamicSharedMemory = (uint)(test.attributeCount * sizeof(float));


        saveCasheKernel = context.LoadKernel(
            "kernels/dimensionsReductions.ptx",
            "saveToCashe"
            );
        saveCasheKernel.GridDimensions  = (popSize * 32) / ThreadsPerBlock + 1;
        saveCasheKernel.BlockDimensions = ThreadsPerBlock;
        saveCasheKernel.SetConstantVariable("atributeCount", teaching.attributeCount);
        saveCasheKernel.SetConstantVariable("popSize", teaching.attributeCount);


        readCasheKernel = context.LoadKernel(
            "kernels/dimensionsReductions.ptx",
            "readCashe"
            );
        readCasheKernel.GridDimensions  = 1;
        readCasheKernel.BlockDimensions = popSize;
        readCasheKernel.SetConstantVariable("atributeCount", teaching.attributeCount);


        casheTreeRoot = new Node()
        {
            mutex = 0,
            one   = (IntPtr)0,
            zero  = (IntPtr)0,
        };

        isInCashe = new CudaDeviceVariable <byte>(popSize);
        accuracy  = new CudaDeviceVariable <float>(popSize);
    }