private bool ProcessInternal <T>(IAudioDescription input, IntPtr samples, short channels, int length, IMediaSample output) where T : struct { UpdateGpuResources(length); var sampleCount = length / channels; try { var devInputSamples = GetDevInputSamples <T>(length); var devInputResult = GetDevNormSamples(channels, sampleCount); var devOutputResult = GetDevOutputSamples <T>(length); Gpu.CopyToDevice(samples, 0, devInputSamples, 0, length); Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("GetSamples{0}", typeof(T).Name), devInputSamples, devInputResult); Process(input, devInputResult, channels, sampleCount); output.GetPointer(out samples); Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("PutSamples{0}", typeof(T).Name), devInputResult, devOutputResult); Gpu.CopyFromDevice(devOutputResult, 0, samples, 0, length); } catch (Exception ex) { Trace.WriteLine(ex); return(false); } return(true); }
public override void Conv2DInputGradient(Tensor gradient, Tensor rotKernels, int stride, int paddingX, int paddingY, Tensor inputGradients) { GpuShape[] shapes = new[] { new GpuShape(gradient.Shape), new GpuShape(rotKernels.Shape), new GpuShape(inputGradients.Shape), new GpuShape(rotKernels.Width, rotKernels.Height, 1, rotKernels.BatchSize) }; float[] devGradient = Gpu.CopyToDevice(gradient.Values); float[] devRotKernels = Gpu.CopyToDevice(rotKernels.Values); GpuShape[] devShapes = Gpu.CopyToDevice(shapes); int threadsRequiredPerResultElem = rotKernels.BatchSize * rotKernels.Height * rotKernels.Width; float[,] resultPartials = new float[inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)]; float[,] devResultPartials = Gpu.Allocate(resultPartials); // simulate //GpuConv2DInputGradient(GetSimulatedThread(blockSize, new dim3(bx, by, bz), new dim3(tx, ty, tz)), gradient.Values, rotKernels.Values, resultPartials, shapes, paddingX, paddingY, stride); Gpu.Launch(new dim3(inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)), THREADS_PER_BLOCK).GpuConv2DInputGradient(devGradient, devRotKernels, devResultPartials, devShapes, paddingX, paddingY, stride); Gpu.Synchronize(); Gpu.CopyFromDevice(devResultPartials, resultPartials); Gpu.FreeAll(); for (int k = 0; k < resultPartials.GetLength(0); ++k) { for (int partialId = 0; partialId < resultPartials.GetLength(1); ++partialId) { inputGradients.Values[k] += resultPartials[k, partialId]; } } }
//public override void Add(Tensor t1, Tensor t2, Tensor result) //{ // int threadsRequired = result.Length; // float[] devT1 = Gpu.CopyToDevice(t1.Values); // float[] devT2 = Gpu.CopyToDevice(t2.Values); // float[] devResult = Gpu.Allocate(result.Values); // Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuAdd(devT1, devT2, devResult); // Gpu.Synchronize(); // Gpu.CopyFromDevice(devResult, result.Values); // Gpu.FreeAll(); //} //public override void Sub(Tensor t1, Tensor t2, Tensor result) //{ // int threadsRequired = result.Length; // float[] devT1 = Gpu.CopyToDevice(t1.Values); // float[] devT2 = Gpu.CopyToDevice(t2.Values); // float[] devResult = Gpu.Allocate(result.Values); // Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuSub(devT1, devT2, devResult); // Gpu.Synchronize(); // Gpu.CopyFromDevice(devResult, result.Values); // Gpu.FreeAll(); //} //public override void Mul(Tensor t1, Tensor t2, Tensor result) //{ // int threadsRequired = result.BatchSize * t1.Depth * t1.Height * t2.Width; // GpuShape[] shapes = new [] { new GpuShape(t1.Shape), new GpuShape(t2.Shape), new GpuShape(result.Shape) }; // float[] devT1 = Gpu.CopyToDevice(t1.Values); // float[] devT2 = Gpu.CopyToDevice(t2.Values); // float[] devResult = Gpu.Allocate(result.Values); // GpuShape[] devShapes = Gpu.CopyToDevice(shapes); // Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuMul(devT1, devT2, devResult, devShapes); // Gpu.Synchronize(); // Gpu.CopyFromDevice(devResult, result.Values); // Gpu.FreeAll(); //} public override void Conv2D(Tensor t, Tensor kernels, int stride, int paddingX, int paddingY, Tensor result) { int threadsRequired = t.BatchSize * kernels.BatchSize * result.Width * result.Height; GpuShape[] shapes = new[] { new GpuShape(t.Shape), new GpuShape(kernels.Shape), new GpuShape(result.Shape) }; float[] devT = Gpu.CopyToDevice(t.Values); float[] devKernels = Gpu.CopyToDevice(kernels.Values); float[] devResult = Gpu.Allocate(result.Values); GpuShape[] devShapes = Gpu.CopyToDevice(shapes); Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuConv2D(devT, devKernels, devResult, devShapes, paddingX, paddingY, stride); Gpu.Synchronize(); Gpu.CopyFromDevice(devResult, result.Values); Gpu.FreeAll(); }
private float[,] GetInputSamplesCpu <T>(IntPtr samples, int channels, int length) where T : struct { var result = new float[channels, length / channels]; try { var devSamples = GetDevInputSamples <T>(length); var devOutput = GetDevNormSamples(channels, length / channels); Gpu.CopyToDevice(samples, 0, devSamples, 0, length); Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("GetSamples{0}", typeof(T).Name), devSamples, devOutput); Gpu.CopyFromDevice(devOutput, result); } catch (Exception ex) { Trace.WriteLine(ex.Message); return(null); } return(result); }
private bool PutOutputSamplesCpu <T>(float[,] samples, IntPtr output) where T : struct { var sampleCount = samples.GetLength(1); var channels = samples.GetLength(0); var length = sampleCount * channels; try { var devSamples = GetDevNormSamples(channels, sampleCount); var devOutput = GetDevOutputSamples <T>(length); Gpu.CopyToDevice(samples, devSamples); Gpu.Launch(AudioProc.THREAD_COUNT, 1, string.Format("PutSamples{0}", typeof(T).Name), devSamples, devOutput); Gpu.CopyFromDevice(devOutput, 0, output, 0, length); } catch (Exception ex) { Trace.WriteLine(ex.Message); return(false); } return(true); }
private void Compress(float[,] samples, int sampleCount, float thresholddB, float ratio, float makeupGaindB) { const int threadCount = 512; var makeupGainLin = Decibels.ToLinear(makeupGaindB); AllocateGpuResources(sampleCount); var devOverdBs = m_DevOverdBs; Gpu.Launch(threadCount, 1).GetOverDecibels(samples, thresholddB, devOverdBs); var overdBs = new float[sampleCount]; Gpu.CopyFromDevice(devOverdBs, overdBs); // This bit is serial, can't be done on GPU for (int i = 0; i < sampleCount; i++) { // attack/release var overdB = overdBs[i]; // assumes that: // positive delta = attack // negative delta = release // good for linear & log values if (overdB > m_EnvdB) { m_Attack.Run(overdB, ref m_EnvdB); // attack } else { m_Release.Run(overdB, ref m_EnvdB); // release } overdBs[i] = m_EnvdB; } Gpu.CopyToDevice(overdBs, devOverdBs); Gpu.Launch(threadCount, 1).ApplyGains(samples, devOverdBs, ratio, makeupGainLin); }
public void CopyToGPU() { Gpu.CopyToDevice(CPUValues, GPUValues); Gpu.CopyToDevice(CPUIndices, GPUIndices); }
public void CopyToGpu() { Gpu.CopyToDevice(CPUArray, GPUArray); }