public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration) { int wdftSize = configuration.WdftSize; int width = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap; if (width < 1) { return(new List <SpectralImage>()); } float[] frames = new float[width * configuration.LogBins]; ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration); float[] window = configuration.Window.GetWindow(wdftSize); float[] samples = audioSamples.Samples; unsafe { Parallel.For(0, width, index => { float *fftArray = stackalloc float[wdftSize]; CopyAndWindow(fftArray, samples, index * configuration.Overlap, window); fftServiceUnsafe.FFTForwardInPlace(fftArray, wdftSize); ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index); }); } var images = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration); ScaleFullSpectrum(images, configuration); return(images); }
public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration) { int width = (audioSamples.Samples.Length - configuration.WdftSize) / configuration.Overlap; if (width < 1) { return(new List <SpectralImage>()); } float[][] frames = new float[width][]; int[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration); for (int i = 0; i < width; i++) { float[] complexSignal = fftService.FFTForward(audioSamples.Samples, i * configuration.Overlap, configuration.WdftSize); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return(CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration)); }
public float[][] CreateLogSpectrogram(float[] samples, IFingerprintConfiguration configuration) { if (configuration.NormalizeSignal) { audioSamplesNormalizer.NormalizeInPlace(samples); } int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/ float[][] frames = new float[width][]; int[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(configuration); for (int i = 0; i < width; i++) { float[] complexSignal = fftService.FFTForward(samples, i * configuration.Overlap, configuration.WdftSize); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return(frames); }
public void GenerateLogFrequenciesRangesTest() { var defaultConfig = new DefaultSpectrogramConfig { UseDynamicLogBase = false, LogBase = 10 }; float[] logSpacedFrequencies = new[] // generated in matlab with logspace(2.50242712, 3.3010299957, 33) { 318.00f, 336.81f, 356.73f, 377.83f, 400.18f, 423.85f, 448.92f, 475.47f, 503.59f, 533.38f, 564.92f, 598.34f, 633.73f, 671.21f, 710.91f, 752.96f, 797.50f, 844.67f, 894.63f, 947.54f, 1003.58f, 1062.94f, 1125.81f, 1192.40f, 1262.93f, 1337.63f, 1416.75f, 1500.54f, 1589.30f, 1683.30f, 1782.86f, 1888.31f, 2000f }; int[] indexes = logUtility.GenerateLogFrequenciesRanges(defaultFingerprintConfiguration.SampleRate, defaultConfig); for (int i = 0; i < logSpacedFrequencies.Length; i++) { var logSpacedFrequency = logSpacedFrequencies[i]; int index = logUtility.FrequencyToSpectrumIndex(logSpacedFrequency, defaultFingerprintConfiguration.SampleRate, defaultConfig.WdftSize); Assert.AreEqual(index, indexes[i]); } }
public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration) { using (new DebugTimer("CreateLogSpectrogram()")) { int wdftSize = configuration.WdftSize; int width = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap; if (width < 1) { return(new List <SpectralImage>()); } float[] frames = new float[width * configuration.LogBins]; ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration); float[] window = configuration.Window.GetWindow(wdftSize); float[] samples = audioSamples.Samples; // PIN: reverted the following FFT to use lomontFFT with managed code (not the unsafe changed made by the original author due to the issues on my computers) // NOTE! When using Parallell.For the result becomes different from time to time // when running in Release mode. // Therefore make sure to use for loop instead for (int index = 0; index < width; index++) // Parallel.For(0, width, index => { var fftArray = CopyAndWindow(samples, index * configuration.Overlap, window); lomontFFT.RealFFT(fftArray, true); // after the lomont realfft the fft input array will contain the FFT values // r0, r(n/2), r1, i1, r2, i2 ... // since the extract log bins method only uses lowBound index above 2 we can ignore the fact // that the first and second values are "special": r0, r(n/2) // see https://github.com/perivar/FindSimilar/blob/6b658b1c54d1504136e25e933f39b7c303da5d9e/Mirage/Fft.cs ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index); } // ); if (configuration.Verbosity == Verbosity.Verbose) { var imageService = new FindSimilarImageService(); using (Image image = imageService.GetSpectrogramImage(frames, width, configuration.LogBins, width, configuration.LogBins)) { var fileName = Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectrogram.png")); if (fileName != null) { image.Save(fileName, ImageFormat.Png); } } WriteOutputUtils.WriteCSV(frames, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_frames.csv"))); } var spectralImages = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration); if (configuration.Verbosity == Verbosity.Verbose) { if (spectralImages.Count > 0) { var spectralImageList = new List <float[]>(); foreach (var spectralImage in spectralImages) { spectralImageList.Add(spectralImage.Image); } var spectralImageArray = spectralImageList.ToArray(); WriteOutputUtils.WriteCSV(spectralImageArray, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectral_images.csv")), ";"); } } ScaleFullSpectrum(spectralImages, configuration); return(spectralImages); } }