public float[][] CreateLogSpectrogram(
            float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
        {
            NormalizeInPlace(samples);
            int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/
            float[][] frames = new float[width][];
            float[] complexSignal = new float[2 * configuration.WdftSize]; /*even - Re, odd - Img*/
            double[] window = windowFunction.GetWindow(configuration.WdftSize);
            int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
            for (int i = 0; i < width; i++)
            {
                // take 371 ms each 11.6 ms (2048 samples each 64 samples)
                for (int j = 0; j < configuration.WdftSize /*2048*/; j++)
                {
                    complexSignal[(2 * j)] = (float)(window[j] * samples[(i * configuration.Overlap) + j]);
                    /*Weight by Hann Window*/
                    complexSignal[(2 * j) + 1] = 0;
                }

                // FFT transform for gathering the spectrum
                Fourier.FFT(complexSignal, configuration.WdftSize, FourierDirection.Forward);
                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            return frames;
        }
Example #2
0
        public void ComputeInverseComirvaMatrixUsingLomontRealFFT(Comirva.Audio.Util.Maths.Matrix m, int column, ref double[] signal, int winsize, int hopsize)
        {
            double[] spectrogramWindow = m.GetColumn(column);

            // extend window with the inverse duplicate array
            int len = spectrogramWindow.Length;

            double[] extendedWindow = new double[len * 2];
            Array.Copy(spectrogramWindow, extendedWindow, len);
            for (int i = 1; i < len; i++)
            {
                extendedWindow[len + i] = spectrogramWindow[len - i];
            }

            // ifft input must contain the FFT values
            // r0, r(n/2), r1, i1, r2, i2 ...

            // Perform the ifft and take just the real part
            double[] ifft = new double[winsize * 2];
            ifft[0] = extendedWindow[0];
            ifft[1] = extendedWindow[winsize / 2];
            for (int i = 1; i < extendedWindow.Length; i++)
            {
                ifft[2 * i] = extendedWindow[i];
            }

            lomonFFT.RealFFT(ifft, false);

            double[] window = win.GetWindow();

            // multiply by window w/ overlap-add
            int N = ifft.Length / 2;

            double[] returnArray = new double[N];
            for (int j = 0; j < N; j++)
            {
                double re = ifft[2 * j] / Math.Sqrt(winsize);
                returnArray[j] = re * window[j];                 // smooth yet another time (also did this when doing FFT)

                // overlap-add method
                // scale with 5 just because the volume got so much lower when using a second smoothing filter when reconstrcting
                signal[j + hopsize * column] = signal[j + hopsize * column] + returnArray[j] * 5;
            }
        }
Example #3
0
        public double[][] CreateSpectrogram(string pathToFilename, IWindowFunction windowFunction, int sampleRate, int overlap, int wdftSize)
        {
            // read 5512 Hz, Mono, PCM, with a specific proxy
            float[] samples = ReadMonoFromFile(pathToFilename, sampleRate, 0, 0);

            NormalizeInPlace(samples);

            int width = (samples.Length - wdftSize) / overlap;             /*width of the image*/

            double[][] frames        = new double[width][];
            double[]   complexSignal = new double[2 * wdftSize];           /*even - Re, odd - Img, thats how Exocortex works*/
            double[]   window        = windowFunction.GetWindow();
            for (int i = 0; i < width; i++)
            {
                // take 371 ms each 11.6 ms (2048 samples each 64 samples)
                for (int j = 0; j < wdftSize; j++)
                {
                    // Weight by Hann Window
                    complexSignal[2 * j] = window[j] * samples[(i * overlap) + j];

                    // need to clear out as fft modifies buffer (phase)
                    complexSignal[(2 * j) + 1] = 0;
                }

                lomonFFT.TableFFT(complexSignal, true);

                // When the input is purely real, its transform is Hermitian,
                // i.e., the component at frequency f_k is the complex conjugate of the component
                // at frequency -f_k, which means that for real inputs there is no information
                // in the negative frequency components that is not already available from the
                // positive frequency components.
                // Thus, n input points produce n/2+1 complex output points.
                // The inverses of this family assumes the same symmetry of its input,
                // and for an output of n points uses n/2+1 input points.

                // Transform output contains, for a transform of size N,
                // N/2+1 complex numbers, i.e. 2*(N/2+1) real numbers
                // our transform is of size N+1, because the histogram has n+1 bins
                double[] band = new double[(wdftSize / 2)];                 // Don't add te last band, i.e. + 1 is removed
                for (int j = 0; j < (wdftSize / 2); j++)                    // Don't add te last band, i.e. + 1 is removed
                {
                    double re  = complexSignal[2 * j];
                    double img = complexSignal[(2 * j) + 1];

                    band[j] = Math.Sqrt(((re * re) + (img * img)) * wdftSize);
                }

                frames[i] = band;
            }

            return(frames);
        }
		public double[][] CreateSpectrogram(string pathToFilename, IWindowFunction windowFunction, int sampleRate, int overlap, int wdftSize)
		{
			// read 5512 Hz, Mono, PCM, with a specific proxy
			float[] samples = ReadMonoFromFile(pathToFilename, sampleRate, 0, 0);
			
			NormalizeInPlace(samples);

			int width = (samples.Length - wdftSize) / overlap; /*width of the image*/
			double[][] frames = new double[width][];
			double[] complexSignal = new double[2 * wdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/
			double[] window = windowFunction.GetWindow();
			for (int i = 0; i < width; i++)
			{
				// take 371 ms each 11.6 ms (2048 samples each 64 samples)
				for (int j = 0; j < wdftSize; j++)
				{
					// Weight by Hann Window
					complexSignal[2 * j] = window[j] * samples[(i * overlap) + j];

					// need to clear out as fft modifies buffer (phase)
					complexSignal[(2 * j) + 1] = 0;
				}

				lomonFFT.TableFFT(complexSignal, true);

				// When the input is purely real, its transform is Hermitian,
				// i.e., the component at frequency f_k is the complex conjugate of the component
				// at frequency -f_k, which means that for real inputs there is no information
				// in the negative frequency components that is not already available from the
				// positive frequency components.
				// Thus, n input points produce n/2+1 complex output points.
				// The inverses of this family assumes the same symmetry of its input,
				// and for an output of n points uses n/2+1 input points.
				
				// Transform output contains, for a transform of size N,
				// N/2+1 complex numbers, i.e. 2*(N/2+1) real numbers
				// our transform is of size N+1, because the histogram has n+1 bins
				double[] band = new double[(wdftSize / 2)]; // Don't add te last band, i.e. + 1 is removed
				for (int j = 0; j < (wdftSize / 2); j++)	// Don't add te last band, i.e. + 1 is removed
				{
					double re = complexSignal[2 * j];
					double img = complexSignal[(2 * j) + 1];

					band[j] = Math.Sqrt( ((re * re) + (img * img)) * wdftSize);
				}

				frames[i] = band;
			}

			return frames;
		}
Example #5
0
        public double[][] CreateLogSpectrogram(
            float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            if (configuration.NormalizeSignal)
            {
                NormalizeInPlace(samples);
            }

            int width = (samples.Length - configuration.WindowSize) / configuration.Overlap;             /*width of the image*/

            double[][] frames = new double[width][];
            int[]      logFrequenciesIndexes = GenerateLogFrequencies(configuration);
            double[]   window = windowFunction.GetWindow();
            for (int i = 0; i < width; i++)
            {
                double[] complexSignal = new double[2 * configuration.WindowSize];                 /*even - Re, odd - Img, thats how Exocortex works*/

                // take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512)
                // or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000)
                for (int j = 0; j < configuration.WindowSize; j++)
                {
                    // Weight by Hann Window
                    complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j];

                    // need to clear out as fft modifies buffer (phase)
                    complexSignal[(2 * j) + 1] = 0;
                }

                lomonFFT.TableFFT(complexSignal, true);

                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            Dbg.WriteLine("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return(frames);
        }
Example #6
0
        public double[][] CreateLogSpectrogram(
			float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
        {
            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            //MathUtils.Multiply(ref samples, Analyzer.AUDIO_MULTIPLIER); // 65536

            if (configuration.NormalizeSignal)
            {
                NormalizeInPlace(samples);
            }

            int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/
            double[][] frames = new double[width][];
            int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
            //double[] window = windowFunction.GetWindow(configuration.WdftSize);
            double[] window = windowFunction.GetWindow();
            for (int i = 0; i < width; i++)
            {
                double[] complexSignal = new double[2 * configuration.WdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/

                // take 371 ms each 11.6 ms (2048 samples each 64 samples)
                for (int j = 0; j < configuration.WdftSize; j++)
                {
                    // Weight by Hann Window
                    complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j];

                    // need to clear out as fft modifies buffer (phase)
                    complexSignal[(2 * j) + 1] = 0;
                }

                lomonFFT.TableFFT(complexSignal, true);

                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            return frames;
        }
        public float[][] CreateSpectrogram(string pathToFilename, IWindowFunction windowFunction, int sampleRate, int overlap, int wdftSize)
        {
            // read 5512 Hz, Mono, PCM, with a specific proxy
            float[] samples = ReadMonoFromFile(pathToFilename, sampleRate, 0, 0);

            NormalizeInPlace(samples);

            int width = (samples.Length - wdftSize) / overlap; /*width of the image*/
            float[][] frames = new float[width][];
            float[] complexSignal = new float[2 * wdftSize]; /*even - Re, odd - Img*/
            double[] window = windowFunction.GetWindow(wdftSize);
            for (int i = 0; i < width; i++)
            {
                // take 371 ms each 11.6 ms (2048 samples each 64 samples)
                for (int j = 0; j < wdftSize; j++)
                {
                    complexSignal[2 * j] = (float)(window[j] * samples[(i * overlap) + j]);
                    /*Weight by Hann Window*/
                    complexSignal[(2 * j) + 1] = 0;
                }

                Fourier.FFT(complexSignal, wdftSize, FourierDirection.Forward);

                float[] band = new float[(wdftSize / 2) + 1];
                for (int j = 0; j < (wdftSize / 2) + 1; j++)
                {
                    double re = complexSignal[2 * j];
                    double img = complexSignal[(2 * j) + 1];
                    re /= (float)wdftSize / 2;
                    img /= (float)wdftSize / 2;
                    band[j] = (float)Math.Sqrt((re * re) + (img * img));
                }

                frames[i] = band;
            }

            return frames;
        }
		public double[][] CreateLogSpectrogram(
			float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
		{
			DbgTimer t = new DbgTimer();
			t.Start ();

			if (configuration.NormalizeSignal)
			{
				NormalizeInPlace(samples);
			}

			int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/
			double[][] frames = new double[width][];
			int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
			double[] window = windowFunction.GetWindow();
			for (int i = 0; i < width; i++)
			{
				double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/

				// take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512)
				// or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000)
				for (int j = 0; j < configuration.WindowSize; j++)
				{
					// Weight by Hann Window
					complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j];
					
					// need to clear out as fft modifies buffer (phase)
					complexSignal[(2 * j) + 1] = 0;
				}
				
				lomonFFT.TableFFT(complexSignal, true);
				
				frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
			}
			
			Dbg.WriteLine ("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return frames;
		}