Esempio n. 1
0
        private bool ProcessInternal <T>(IAudioDescription input, IntPtr samples, short channels, int length, IMediaSample output) where T : struct
        {
            UpdateGpuResources(length);

            var sampleCount = length / channels;

            try
            {
                var devInputSamples = GetDevInputSamples <T>(length);
                var devInputResult  = GetDevNormSamples(channels, sampleCount);
                var devOutputResult = GetDevOutputSamples <T>(length);
                Gpu.CopyToDevice(samples, 0, devInputSamples, 0, length);
                Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("GetSamples{0}", typeof(T).Name),
                           devInputSamples, devInputResult);
                Process(input, devInputResult, channels, sampleCount);
                output.GetPointer(out samples);
                Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("PutSamples{0}", typeof(T).Name),
                           devInputResult, devOutputResult);
                Gpu.CopyFromDevice(devOutputResult, 0, samples, 0, length);
            }
            catch (Exception ex)
            {
                Trace.WriteLine(ex);
                return(false);
            }
            return(true);
        }
Esempio n. 2
0
        public override void Conv2DInputGradient(Tensor gradient, Tensor rotKernels, int stride, int paddingX, int paddingY, Tensor inputGradients)
        {
            GpuShape[] shapes = new[] { new GpuShape(gradient.Shape),
                                        new GpuShape(rotKernels.Shape),
                                        new GpuShape(inputGradients.Shape),
                                        new GpuShape(rotKernels.Width, rotKernels.Height, 1, rotKernels.BatchSize) };

            float[]    devGradient   = Gpu.CopyToDevice(gradient.Values);
            float[]    devRotKernels = Gpu.CopyToDevice(rotKernels.Values);
            GpuShape[] devShapes     = Gpu.CopyToDevice(shapes);

            int threadsRequiredPerResultElem = rotKernels.BatchSize * rotKernels.Height * rotKernels.Width;

            float[,] resultPartials    = new float[inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)];
            float[,] devResultPartials = Gpu.Allocate(resultPartials);

            // simulate
            //GpuConv2DInputGradient(GetSimulatedThread(blockSize, new dim3(bx, by, bz), new dim3(tx, ty, tz)), gradient.Values, rotKernels.Values, resultPartials, shapes, paddingX, paddingY, stride);

            Gpu.Launch(new dim3(inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)), THREADS_PER_BLOCK).GpuConv2DInputGradient(devGradient, devRotKernels, devResultPartials, devShapes, paddingX, paddingY, stride);
            Gpu.Synchronize();

            Gpu.CopyFromDevice(devResultPartials, resultPartials);

            Gpu.FreeAll();

            for (int k = 0; k < resultPartials.GetLength(0); ++k)
            {
                for (int partialId = 0; partialId < resultPartials.GetLength(1); ++partialId)
                {
                    inputGradients.Values[k] += resultPartials[k, partialId];
                }
            }
        }
Esempio n. 3
0
        //public override void Add(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.Length;
        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuAdd(devT1, devT2, devResult);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        //public override void Sub(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.Length;
        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuSub(devT1, devT2, devResult);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        //public override void Mul(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.BatchSize * t1.Depth * t1.Height * t2.Width;
        //    GpuShape[] shapes = new [] { new GpuShape(t1.Shape), new GpuShape(t2.Shape), new GpuShape(result.Shape) };

        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);
        //    GpuShape[] devShapes = Gpu.CopyToDevice(shapes);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuMul(devT1, devT2, devResult, devShapes);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        public override void Conv2D(Tensor t, Tensor kernels, int stride, int paddingX, int paddingY, Tensor result)
        {
            int threadsRequired = t.BatchSize * kernels.BatchSize * result.Width * result.Height;

            GpuShape[] shapes = new[] { new GpuShape(t.Shape), new GpuShape(kernels.Shape), new GpuShape(result.Shape) };

            float[]    devT       = Gpu.CopyToDevice(t.Values);
            float[]    devKernels = Gpu.CopyToDevice(kernels.Values);
            float[]    devResult  = Gpu.Allocate(result.Values);
            GpuShape[] devShapes  = Gpu.CopyToDevice(shapes);

            Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuConv2D(devT, devKernels, devResult, devShapes, paddingX, paddingY, stride);
            Gpu.Synchronize();

            Gpu.CopyFromDevice(devResult, result.Values);
            Gpu.FreeAll();
        }
Esempio n. 4
0
 private float[,] GetInputSamplesCpu <T>(IntPtr samples, int channels, int length) where T : struct
 {
     var result = new float[channels, length / channels];
     try
     {
         var devSamples = GetDevInputSamples <T>(length);
         var devOutput  = GetDevNormSamples(channels, length / channels);
         Gpu.CopyToDevice(samples, 0, devSamples, 0, length);
         Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("GetSamples{0}", typeof(T).Name),
                    devSamples, devOutput);
         Gpu.CopyFromDevice(devOutput, result);
     }
     catch (Exception ex)
     {
         Trace.WriteLine(ex.Message);
         return(null);
     }
     return(result);
 }
Esempio n. 5
0
        private bool PutOutputSamplesCpu <T>(float[,] samples, IntPtr output) where T : struct
        {
            var sampleCount = samples.GetLength(1);
            var channels    = samples.GetLength(0);
            var length      = sampleCount * channels;

            try
            {
                var devSamples = GetDevNormSamples(channels, sampleCount);
                var devOutput  = GetDevOutputSamples <T>(length);
                Gpu.CopyToDevice(samples, devSamples);
                Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("PutSamples{0}", typeof(T).Name),
                           devSamples, devOutput);
                Gpu.CopyFromDevice(devOutput, 0, output, 0, length);
            }
            catch (Exception ex)
            {
                Trace.WriteLine(ex.Message);
                return(false);
            }
            return(true);
        }
            private void Compress(float[,] samples, int sampleCount, float thresholddB, float ratio, float makeupGaindB)
            {
                const int threadCount = 512;

                var makeupGainLin = Decibels.ToLinear(makeupGaindB);

                AllocateGpuResources(sampleCount);
                var devOverdBs = m_DevOverdBs;

                Gpu.Launch(threadCount, 1).GetOverDecibels(samples, thresholddB, devOverdBs);
                var overdBs = new float[sampleCount];

                Gpu.CopyFromDevice(devOverdBs, overdBs);

                // This bit is serial, can't be done on GPU
                for (int i = 0; i < sampleCount; i++)
                {
                    // attack/release
                    var overdB = overdBs[i];
                    // assumes that:
                    // positive delta = attack
                    // negative delta = release
                    // good for linear & log values
                    if (overdB > m_EnvdB)
                    {
                        m_Attack.Run(overdB, ref m_EnvdB); // attack
                    }
                    else
                    {
                        m_Release.Run(overdB, ref m_EnvdB); // release
                    }
                    overdBs[i] = m_EnvdB;
                }

                Gpu.CopyToDevice(overdBs, devOverdBs);
                Gpu.Launch(threadCount, 1).ApplyGains(samples, devOverdBs, ratio, makeupGainLin);
            }
 public void CopyToGPU()
 {
     Gpu.CopyToDevice(CPUValues, GPUValues);
     Gpu.CopyToDevice(CPUIndices, GPUIndices);
 }
Esempio n. 8
0
 public void CopyToGpu()
 {
     Gpu.CopyToDevice(CPUArray, GPUArray);
 }