public float[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { NormalizeInPlace(samples); int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/ float[][] frames = new float[width][]; float[] complexSignal = new float[2 * configuration.WdftSize]; /*even - Re, odd - Img*/ double[] window = windowFunction.GetWindow(configuration.WdftSize); int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); for (int i = 0; i < width; i++) { // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < configuration.WdftSize /*2048*/; j++) { complexSignal[(2 * j)] = (float)(window[j] * samples[(i * configuration.Overlap) + j]); /*Weight by Hann Window*/ complexSignal[(2 * j) + 1] = 0; } // FFT transform for gathering the spectrum Fourier.FFT(complexSignal, configuration.WdftSize, FourierDirection.Forward); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return frames; }
public void ComputeInverseComirvaMatrixUsingLomontRealFFT(Comirva.Audio.Util.Maths.Matrix m, int column, ref double[] signal, int winsize, int hopsize) { double[] spectrogramWindow = m.GetColumn(column); // extend window with the inverse duplicate array int len = spectrogramWindow.Length; double[] extendedWindow = new double[len * 2]; Array.Copy(spectrogramWindow, extendedWindow, len); for (int i = 1; i < len; i++) { extendedWindow[len + i] = spectrogramWindow[len - i]; } // ifft input must contain the FFT values // r0, r(n/2), r1, i1, r2, i2 ... // Perform the ifft and take just the real part double[] ifft = new double[winsize * 2]; ifft[0] = extendedWindow[0]; ifft[1] = extendedWindow[winsize / 2]; for (int i = 1; i < extendedWindow.Length; i++) { ifft[2 * i] = extendedWindow[i]; } lomonFFT.RealFFT(ifft, false); double[] window = win.GetWindow(); // multiply by window w/ overlap-add int N = ifft.Length / 2; double[] returnArray = new double[N]; for (int j = 0; j < N; j++) { double re = ifft[2 * j] / Math.Sqrt(winsize); returnArray[j] = re * window[j]; // smooth yet another time (also did this when doing FFT) // overlap-add method // scale with 5 just because the volume got so much lower when using a second smoothing filter when reconstrcting signal[j + hopsize * column] = signal[j + hopsize * column] + returnArray[j] * 5; } }
public double[][] CreateSpectrogram(string pathToFilename, IWindowFunction windowFunction, int sampleRate, int overlap, int wdftSize) { // read 5512 Hz, Mono, PCM, with a specific proxy float[] samples = ReadMonoFromFile(pathToFilename, sampleRate, 0, 0); NormalizeInPlace(samples); int width = (samples.Length - wdftSize) / overlap; /*width of the image*/ double[][] frames = new double[width][]; double[] complexSignal = new double[2 * wdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/ double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < wdftSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); // When the input is purely real, its transform is Hermitian, // i.e., the component at frequency f_k is the complex conjugate of the component // at frequency -f_k, which means that for real inputs there is no information // in the negative frequency components that is not already available from the // positive frequency components. // Thus, n input points produce n/2+1 complex output points. // The inverses of this family assumes the same symmetry of its input, // and for an output of n points uses n/2+1 input points. // Transform output contains, for a transform of size N, // N/2+1 complex numbers, i.e. 2*(N/2+1) real numbers // our transform is of size N+1, because the histogram has n+1 bins double[] band = new double[(wdftSize / 2)]; // Don't add te last band, i.e. + 1 is removed for (int j = 0; j < (wdftSize / 2); j++) // Don't add te last band, i.e. + 1 is removed { double re = complexSignal[2 * j]; double img = complexSignal[(2 * j) + 1]; band[j] = Math.Sqrt(((re * re) + (img * img)) * wdftSize); } frames[i] = band; } return(frames); }
public double[][] CreateSpectrogram(string pathToFilename, IWindowFunction windowFunction, int sampleRate, int overlap, int wdftSize) { // read 5512 Hz, Mono, PCM, with a specific proxy float[] samples = ReadMonoFromFile(pathToFilename, sampleRate, 0, 0); NormalizeInPlace(samples); int width = (samples.Length - wdftSize) / overlap; /*width of the image*/ double[][] frames = new double[width][]; double[] complexSignal = new double[2 * wdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/ double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < wdftSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); // When the input is purely real, its transform is Hermitian, // i.e., the component at frequency f_k is the complex conjugate of the component // at frequency -f_k, which means that for real inputs there is no information // in the negative frequency components that is not already available from the // positive frequency components. // Thus, n input points produce n/2+1 complex output points. // The inverses of this family assumes the same symmetry of its input, // and for an output of n points uses n/2+1 input points. // Transform output contains, for a transform of size N, // N/2+1 complex numbers, i.e. 2*(N/2+1) real numbers // our transform is of size N+1, because the histogram has n+1 bins double[] band = new double[(wdftSize / 2)]; // Don't add te last band, i.e. + 1 is removed for (int j = 0; j < (wdftSize / 2); j++) // Don't add te last band, i.e. + 1 is removed { double re = complexSignal[2 * j]; double img = complexSignal[(2 * j) + 1]; band[j] = Math.Sqrt( ((re * re) + (img * img)) * wdftSize); } frames[i] = band; } return frames; }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { DbgTimer t = new DbgTimer(); t.Start(); if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512) // or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000) for (int j = 0; j < configuration.WindowSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } Dbg.WriteLine("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return(frames); }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; //MathUtils.Multiply(ref samples, Analyzer.AUDIO_MULTIPLIER); // 65536 if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); //double[] window = windowFunction.GetWindow(configuration.WdftSize); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < configuration.WdftSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return frames; }
public float[][] CreateSpectrogram(string pathToFilename, IWindowFunction windowFunction, int sampleRate, int overlap, int wdftSize) { // read 5512 Hz, Mono, PCM, with a specific proxy float[] samples = ReadMonoFromFile(pathToFilename, sampleRate, 0, 0); NormalizeInPlace(samples); int width = (samples.Length - wdftSize) / overlap; /*width of the image*/ float[][] frames = new float[width][]; float[] complexSignal = new float[2 * wdftSize]; /*even - Re, odd - Img*/ double[] window = windowFunction.GetWindow(wdftSize); for (int i = 0; i < width; i++) { // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < wdftSize; j++) { complexSignal[2 * j] = (float)(window[j] * samples[(i * overlap) + j]); /*Weight by Hann Window*/ complexSignal[(2 * j) + 1] = 0; } Fourier.FFT(complexSignal, wdftSize, FourierDirection.Forward); float[] band = new float[(wdftSize / 2) + 1]; for (int j = 0; j < (wdftSize / 2) + 1; j++) { double re = complexSignal[2 * j]; double img = complexSignal[(2 * j) + 1]; re /= (float)wdftSize / 2; img /= (float)wdftSize / 2; band[j] = (float)Math.Sqrt((re * re) + (img * img)); } frames[i] = band; } return frames; }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { DbgTimer t = new DbgTimer(); t.Start (); if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512) // or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000) for (int j = 0; j < configuration.WindowSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } Dbg.WriteLine ("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return frames; }