public void STFTTest() { UInt32 fs = 44100; double[] wavein = mdsplib.DSP.Generate.Sine(110, fs, 2048); var stft = STFT.Direct(wavein); double[] reconst = STFT.Inverse(stft, 2048, 0); double[] error = wavein.Subtract(reconst); }
public void Generate() { IAudioStream audioStream = inputTrack.File ? AudioStreamFactory.FromFileInfoIeee32(inputTrack.FileInfo) : inputTrack.Stream; audioStream = new MonoStream(audioStream); audioStream = new ResamplingStream(audioStream, ResamplingQuality.Medium, profile.SampleRate); STFT stft = new STFT(audioStream, profile.FrameSize, profile.FrameStep, WindowType.Hann, STFT.OutputFormat.Decibel, this.bufferSize); index = 0; indices = stft.WindowCount; frameBuffer = new float[profile.FrameSize / 2]; List <SubFingerprint> subFingerprints = new List <SubFingerprint>(); while (stft.HasNext()) { // Get FFT spectrum stft.ReadFrame(frameBuffer); // Sum FFT bins into target frequency bands profile.MapFrequencies(frameBuffer, bands); CalculateSubFingerprint(bandsPrev, bands, subFingerprints); CommonUtil.Swap <float[]>(ref bands, ref bandsPrev); index++; // Output subfingerprints every once in a while if (index % this.eventInterval == 0 && SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(inputTrack, subFingerprints, index, indices)); subFingerprints.Clear(); } } // Output remaining subfingerprints if (SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(inputTrack, subFingerprints, index, indices)); } if (Completed != null) { Completed(this, EventArgs.Empty); } audioStream.Close(); }
public void Generate(AudioTrack track) { IAudioStream audioStream = new ResamplingStream( new MonoStream(AudioStreamFactory.FromFileInfoIeee32(track.FileInfo)), ResamplingQuality.Medium, profile.SamplingRate); STFT stft = new STFT(audioStream, profile.WindowSize, profile.HopSize, WindowType.Hann, STFT.OutputFormat.Decibel); int index = 0; int indices = stft.WindowCount; int processedFrames = 0; float[] spectrum = new float[profile.WindowSize / 2]; float[] smoothedSpectrum = new float[spectrum.Length - profile.SpectrumSmoothingLength + 1]; // the smooved frequency spectrum of the current frame var spectrumSmoother = new SimpleMovingAverage(profile.SpectrumSmoothingLength); float[] spectrumTemporalAverage = new float[spectrum.Length]; // a running average of each spectrum bin over time float[] spectrumResidual = new float[spectrum.Length]; // the difference between the current spectrum and the moving average spectrum var peakHistory = new PeakHistory(1 + profile.TargetZoneDistance + profile.TargetZoneLength, spectrum.Length / 2); var peakPairs = new List <PeakPair>(profile.PeaksPerFrame * profile.PeakFanout); // keep a single instance of the list to avoid instantiation overhead var subFingerprints = new List <SubFingerprint>(); while (stft.HasNext()) { // Get the FFT spectrum stft.ReadFrame(spectrum); // Skip frames whose average spectrum volume is below the threshold // This skips silent frames (zero samples) that only contain very low noise from the FFT // and that would screw up the temporal spectrum average below for the following frames. if (spectrum.Average() < spectrumMinThreshold) { index++; continue; } // Smooth the frequency spectrum to remove small peaks if (profile.SpectrumSmoothingLength > 0) { spectrumSmoother.Clear(); for (int i = 0; i < spectrum.Length; i++) { var avg = spectrumSmoother.Add(spectrum[i]); if (i >= profile.SpectrumSmoothingLength) { smoothedSpectrum[i - profile.SpectrumSmoothingLength] = avg; } } } // Update the temporal moving bin average if (processedFrames == 0) { // Init averages on first frame for (int i = 0; i < spectrum.Length; i++) { spectrumTemporalAverage[i] = spectrum[i]; } } else { // Update averages on all subsequent frames for (int i = 0; i < spectrum.Length; i++) { spectrumTemporalAverage[i] = ExponentialMovingAverage.UpdateMovingAverage( spectrumTemporalAverage[i], profile.SpectrumTemporalSmoothingCoefficient, spectrum[i]); } } // Calculate the residual // The residual is the difference of the current spectrum to the temporal average spectrum. The higher // a bin residual is, the steeper the increase in energy in that peak. for (int i = 0; i < spectrum.Length; i++) { spectrumResidual[i] = spectrum[i] - spectrumTemporalAverage[i] - 90f; } // Find local peaks in the residual // The advantage of finding peaks in the residual instead of the spectrum is that spectrum energy is usually // concentrated in the low frequencies, resulting in a clustering of the highest peaks in the lows. Getting // peaks from the residual distributes the peaks more evenly across the spectrum. var peaks = peakHistory.List; // take oldest list, peaks.Clear(); // clear it, and FindLocalMaxima(spectrumResidual, peaks); // refill with new peaks // Pick the largest n peaks int numMaxima = Math.Min(peaks.Count, profile.PeaksPerFrame); if (numMaxima > 0) { peaks.Sort((p1, p2) => p1.Value == p2.Value ? 0 : p1.Value < p2.Value ? 1 : -1); // order peaks by height if (peaks.Count > numMaxima) { peaks.RemoveRange(numMaxima, peaks.Count - numMaxima); // select the n tallest peaks by deleting the rest } peaks.Sort((p1, p2) => p1.Index == p2.Index ? 0 : p1.Index < p2.Index ? -1 : 1); // sort peaks by index (not really necessary) } peakHistory.Add(index, peaks); if (FrameProcessed != null) { // Mark peaks as 0dB for spectrogram display purposes foreach (var peak in peaks) { spectrum[peak.Index] = 0; spectrumResidual[peak.Index] = 0; } FrameProcessed(this, new FrameProcessedEventArgs { AudioTrack = track, Index = index, Indices = indices, Spectrum = spectrum, SpectrumResidual = spectrumResidual }); } processedFrames++; index++; if (processedFrames >= peakHistory.Length) { peakPairs.Clear(); FindPairsWithMaxEnergy(peakHistory, peakPairs); ConvertPairsToSubFingerprints(peakPairs, subFingerprints); } if (subFingerprints.Count > 512) { FireFingerprintHashesGenerated(track, indices, subFingerprints); subFingerprints.Clear(); } } // Flush the remaining peaks of the last frames from the history to get all remaining pairs for (int i = 0; i < profile.TargetZoneLength; i++) { var peaks = peakHistory.List; peaks.Clear(); peakHistory.Add(-1, peaks); peakPairs.Clear(); FindPairsWithMaxEnergy(peakHistory, peakPairs); ConvertPairsToSubFingerprints(peakPairs, subFingerprints); } FireFingerprintHashesGenerated(track, indices, subFingerprints); audioStream.Close(); }
public void TestFFTAudioMatrixMethod() { // harmor_HQ.bmp = 1645 (width) x 511 (height) 32 bit // test variables const string outputDirectoryFilePath = "test"; var audioSystem = BassProxy.Instance; // 0. Get Audio Data float[] audioSamples = BassProxy.ReadMonoFromFile(WAVE_INPUT_FILEPATH, SAMPLING_RATE); int width = 1645; //int width = (audioSamples.Length - WINDOW_SIZE)/ OVERLAP; int OVERLAP = (int)((double)(audioSamples.Length - WINDOW_SIZE) / (double)width); // 1. Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audioSamples, AUDIO_MULTIPLIER); // zero pad if the audio file is too short to perform a fft if (audioSamples.Length < (WINDOW_SIZE + OVERLAP)) { int lenNew = WINDOW_SIZE + OVERLAP; Array.Resize <float>(ref audioSamples, lenNew); } // 2. Windowing // 3. FFT #region Windowing and FFT var stft = new STFT(FFTWindowType.HANNING, WINDOW_SIZE, OVERLAP); var stftdata = stft.Apply(audioSamples); // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdata.DrawMatrixImageLogValues(outputDirectoryFilePath + "_specgram.png", true, false, -1, -1, false); var spect2 = FFTUtils.CreateSpectrogramFFTW(audioSamples, WINDOW_SIZE, OVERLAP); var stftdata2 = new Matrix(spect2).Transpose(); // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdata2.DrawMatrixImageLogValues(outputDirectoryFilePath + "_specgram2.png", true, false, -1, -1, false); var spect3 = FFTUtils.CreateSpectrogramLomont(audioSamples, WINDOW_SIZE, OVERLAP); var stftdata3 = new Matrix(spect3).Transpose(); // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdata3.DrawMatrixImageLogValues(outputDirectoryFilePath + "_specgram3.png", true, false, -1, -1, false); #endregion // the matrix are too different so comparing them always fails! //Assert.That(stftdata2, Is.EqualTo(stftdata3).AsCollection.Within(0.001), "fail at [0]"); #region Inverse FFT // Perform inverse stft as well double[] audiodata_inverse_stft = stft.InverseStft(stftdata); // divide or normalize //MathUtils.Divide(ref audiodata_inverse_stft, AUDIO_MULTIPLIER); MathUtils.Normalize(ref audiodata_inverse_stft); Export.DrawGraph(audiodata_inverse_stft, outputDirectoryFilePath + "_audiodata_inverse_stft.png"); float[] audiodata_inverse_float = MathUtils.DoubleToFloat(audiodata_inverse_stft); BassProxy.SaveFile(audiodata_inverse_float, outputDirectoryFilePath + "_inverse_stft.wav", 1, SAMPLING_RATE, 32); #endregion Assert.Pass("This test was succesful."); }