예제 #1
0
        public void STFTTest()
        {
            UInt32 fs = 44100;

            double[] wavein = mdsplib.DSP.Generate.Sine(110, fs, 2048);
            var      stft   = STFT.Direct(wavein);

            double[] reconst = STFT.Inverse(stft, 2048, 0);
            double[] error   = wavein.Subtract(reconst);
        }
예제 #2
0
        public void Generate()
        {
            IAudioStream audioStream = inputTrack.File ?
                                       AudioStreamFactory.FromFileInfoIeee32(inputTrack.FileInfo) :
                                       inputTrack.Stream;

            audioStream = new MonoStream(audioStream);
            audioStream = new ResamplingStream(audioStream, ResamplingQuality.Medium, profile.SampleRate);

            STFT stft = new STFT(audioStream, profile.FrameSize, profile.FrameStep, WindowType.Hann, STFT.OutputFormat.Decibel, this.bufferSize);

            index   = 0;
            indices = stft.WindowCount;

            frameBuffer = new float[profile.FrameSize / 2];
            List <SubFingerprint> subFingerprints = new List <SubFingerprint>();

            while (stft.HasNext())
            {
                // Get FFT spectrum
                stft.ReadFrame(frameBuffer);

                // Sum FFT bins into target frequency bands
                profile.MapFrequencies(frameBuffer, bands);

                CalculateSubFingerprint(bandsPrev, bands, subFingerprints);

                CommonUtil.Swap <float[]>(ref bands, ref bandsPrev);
                index++;

                // Output subfingerprints every once in a while
                if (index % this.eventInterval == 0 && SubFingerprintsGenerated != null)
                {
                    SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(inputTrack, subFingerprints, index, indices));
                    subFingerprints.Clear();
                }
            }

            // Output remaining subfingerprints
            if (SubFingerprintsGenerated != null)
            {
                SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(inputTrack, subFingerprints, index, indices));
            }

            if (Completed != null)
            {
                Completed(this, EventArgs.Empty);
            }

            audioStream.Close();
        }
예제 #3
0
        public void Generate(AudioTrack track)
        {
            IAudioStream audioStream = new ResamplingStream(
                new MonoStream(AudioStreamFactory.FromFileInfoIeee32(track.FileInfo)),
                ResamplingQuality.Medium, profile.SamplingRate);

            STFT stft            = new STFT(audioStream, profile.WindowSize, profile.HopSize, WindowType.Hann, STFT.OutputFormat.Decibel);
            int  index           = 0;
            int  indices         = stft.WindowCount;
            int  processedFrames = 0;

            float[] spectrum         = new float[profile.WindowSize / 2];
            float[] smoothedSpectrum = new float[spectrum.Length - profile.SpectrumSmoothingLength + 1]; // the smooved frequency spectrum of the current frame
            var     spectrumSmoother = new SimpleMovingAverage(profile.SpectrumSmoothingLength);

            float[] spectrumTemporalAverage = new float[spectrum.Length]; // a running average of each spectrum bin over time
            float[] spectrumResidual        = new float[spectrum.Length]; // the difference between the current spectrum and the moving average spectrum

            var peakHistory = new PeakHistory(1 + profile.TargetZoneDistance + profile.TargetZoneLength, spectrum.Length / 2);
            var peakPairs   = new List <PeakPair>(profile.PeaksPerFrame * profile.PeakFanout); // keep a single instance of the list to avoid instantiation overhead

            var subFingerprints = new List <SubFingerprint>();

            while (stft.HasNext())
            {
                // Get the FFT spectrum
                stft.ReadFrame(spectrum);

                // Skip frames whose average spectrum volume is below the threshold
                // This skips silent frames (zero samples) that only contain very low noise from the FFT
                // and that would screw up the temporal spectrum average below for the following frames.
                if (spectrum.Average() < spectrumMinThreshold)
                {
                    index++;
                    continue;
                }

                // Smooth the frequency spectrum to remove small peaks
                if (profile.SpectrumSmoothingLength > 0)
                {
                    spectrumSmoother.Clear();
                    for (int i = 0; i < spectrum.Length; i++)
                    {
                        var avg = spectrumSmoother.Add(spectrum[i]);
                        if (i >= profile.SpectrumSmoothingLength)
                        {
                            smoothedSpectrum[i - profile.SpectrumSmoothingLength] = avg;
                        }
                    }
                }

                // Update the temporal moving bin average
                if (processedFrames == 0)
                {
                    // Init averages on first frame
                    for (int i = 0; i < spectrum.Length; i++)
                    {
                        spectrumTemporalAverage[i] = spectrum[i];
                    }
                }
                else
                {
                    // Update averages on all subsequent frames
                    for (int i = 0; i < spectrum.Length; i++)
                    {
                        spectrumTemporalAverage[i] = ExponentialMovingAverage.UpdateMovingAverage(
                            spectrumTemporalAverage[i], profile.SpectrumTemporalSmoothingCoefficient, spectrum[i]);
                    }
                }

                // Calculate the residual
                // The residual is the difference of the current spectrum to the temporal average spectrum. The higher
                // a bin residual is, the steeper the increase in energy in that peak.
                for (int i = 0; i < spectrum.Length; i++)
                {
                    spectrumResidual[i] = spectrum[i] - spectrumTemporalAverage[i] - 90f;
                }

                // Find local peaks in the residual
                // The advantage of finding peaks in the residual instead of the spectrum is that spectrum energy is usually
                // concentrated in the low frequencies, resulting in a clustering of the highest peaks in the lows. Getting
                // peaks from the residual distributes the peaks more evenly across the spectrum.
                var peaks = peakHistory.List;             // take oldest list,
                peaks.Clear();                            // clear it, and
                FindLocalMaxima(spectrumResidual, peaks); // refill with new peaks

                // Pick the largest n peaks
                int numMaxima = Math.Min(peaks.Count, profile.PeaksPerFrame);
                if (numMaxima > 0)
                {
                    peaks.Sort((p1, p2) => p1.Value == p2.Value ? 0 : p1.Value < p2.Value ? 1 : -1); // order peaks by height
                    if (peaks.Count > numMaxima)
                    {
                        peaks.RemoveRange(numMaxima, peaks.Count - numMaxima);                       // select the n tallest peaks by deleting the rest
                    }
                    peaks.Sort((p1, p2) => p1.Index == p2.Index ? 0 : p1.Index < p2.Index ? -1 : 1); // sort peaks by index (not really necessary)
                }

                peakHistory.Add(index, peaks);

                if (FrameProcessed != null)
                {
                    // Mark peaks as 0dB for spectrogram display purposes
                    foreach (var peak in peaks)
                    {
                        spectrum[peak.Index]         = 0;
                        spectrumResidual[peak.Index] = 0;
                    }

                    FrameProcessed(this, new FrameProcessedEventArgs {
                        AudioTrack = track, Index = index, Indices = indices,
                        Spectrum   = spectrum, SpectrumResidual = spectrumResidual
                    });
                }

                processedFrames++;
                index++;

                if (processedFrames >= peakHistory.Length)
                {
                    peakPairs.Clear();
                    FindPairsWithMaxEnergy(peakHistory, peakPairs);
                    ConvertPairsToSubFingerprints(peakPairs, subFingerprints);
                }

                if (subFingerprints.Count > 512)
                {
                    FireFingerprintHashesGenerated(track, indices, subFingerprints);
                    subFingerprints.Clear();
                }
            }

            // Flush the remaining peaks of the last frames from the history to get all remaining pairs
            for (int i = 0; i < profile.TargetZoneLength; i++)
            {
                var peaks = peakHistory.List;
                peaks.Clear();
                peakHistory.Add(-1, peaks);
                peakPairs.Clear();
                FindPairsWithMaxEnergy(peakHistory, peakPairs);
                ConvertPairsToSubFingerprints(peakPairs, subFingerprints);
            }
            FireFingerprintHashesGenerated(track, indices, subFingerprints);

            audioStream.Close();
        }
예제 #4
0
        public void TestFFTAudioMatrixMethod()
        {
            // harmor_HQ.bmp = 1645 (width) x 511 (height) 32 bit

            // test variables
            const string outputDirectoryFilePath = "test";
            var          audioSystem             = BassProxy.Instance;

            // 0. Get Audio Data
            float[] audioSamples = BassProxy.ReadMonoFromFile(WAVE_INPUT_FILEPATH, SAMPLING_RATE);

            int width = 1645;
            //int width = (audioSamples.Length - WINDOW_SIZE)/ OVERLAP;
            int OVERLAP = (int)((double)(audioSamples.Length - WINDOW_SIZE) / (double)width);

            // 1. Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            MathUtils.Multiply(ref audioSamples, AUDIO_MULTIPLIER);

            // zero pad if the audio file is too short to perform a fft
            if (audioSamples.Length < (WINDOW_SIZE + OVERLAP))
            {
                int lenNew = WINDOW_SIZE + OVERLAP;
                Array.Resize <float>(ref audioSamples, lenNew);
            }

            // 2. Windowing
            // 3. FFT
            #region Windowing and FFT
            var stft     = new STFT(FFTWindowType.HANNING, WINDOW_SIZE, OVERLAP);
            var stftdata = stft.Apply(audioSamples);

            // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
            stftdata.DrawMatrixImageLogValues(outputDirectoryFilePath + "_specgram.png", true, false, -1, -1, false);

            var spect2    = FFTUtils.CreateSpectrogramFFTW(audioSamples, WINDOW_SIZE, OVERLAP);
            var stftdata2 = new Matrix(spect2).Transpose();

            // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
            stftdata2.DrawMatrixImageLogValues(outputDirectoryFilePath + "_specgram2.png", true, false, -1, -1, false);

            var spect3    = FFTUtils.CreateSpectrogramLomont(audioSamples, WINDOW_SIZE, OVERLAP);
            var stftdata3 = new Matrix(spect3).Transpose();

            // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
            stftdata3.DrawMatrixImageLogValues(outputDirectoryFilePath + "_specgram3.png", true, false, -1, -1, false);
            #endregion

            // the matrix are too different so comparing them always fails!
            //Assert.That(stftdata2, Is.EqualTo(stftdata3).AsCollection.Within(0.001), "fail at [0]");

            #region Inverse FFT
            // Perform inverse stft as well
            double[] audiodata_inverse_stft = stft.InverseStft(stftdata);

            // divide or normalize
            //MathUtils.Divide(ref audiodata_inverse_stft, AUDIO_MULTIPLIER);
            MathUtils.Normalize(ref audiodata_inverse_stft);

            Export.DrawGraph(audiodata_inverse_stft, outputDirectoryFilePath + "_audiodata_inverse_stft.png");

            float[] audiodata_inverse_float = MathUtils.DoubleToFloat(audiodata_inverse_stft);
            BassProxy.SaveFile(audiodata_inverse_float, outputDirectoryFilePath + "_inverse_stft.wav", 1, SAMPLING_RATE, 32);
            #endregion

            Assert.Pass("This test was succesful.");
        }