public void ShouldGenerateLogSpectrumWithBiggerOverlap() { int Fs = 5512; int seconds = 10; float[] samples = new float[Fs * seconds]; for (int i = 0; i < samples.Length; ++i) { float value = (float)(1.3 * System.Math.Sin(2 * System.Math.PI * 15 * i / Fs)); samples[i] = value; } var audio = new AudioSamples(samples, "test", Fs); var config = new DefaultSpectrogramConfig { Stride = new IncrementalStaticStride(Fs), Overlap = 32, ImageLength = 128 }; var spectralImages = spectrumService.CreateLogSpectrogram(audio, config); Assert.AreEqual((10 * Fs - config.WdftSize) / Fs, spectralImages.Count); }
public FingerprintSignature CreateAudioFingerprint(string key, string filename, int startPositionInMS, int toReadInMS) { SpectrogramConfig spectrogramConfig = new DefaultSpectrogramConfig(); AudioSamples samples = null; try { // First read audio file and downsample it to mono 5512hz samples = audioEngine.ReadMonoFromFile(filename, spectrogramConfig.SampleRate, startPositionInMS, toReadInMS); } catch { return(null); } // No slice the audio is chunks seperated by 11,6 ms (5512hz 11,6ms = 64 samples!) // An with length of 371ms (5512kHz 371ms = 2048 samples [rounded]) FingerprintSignature fingerprint = audioEngine.CreateFingerprint(samples, spectrogramConfig); if (fingerprint != null) { fingerprint.Reference = key; } return(fingerprint); }
public float[][] CreateSpectrogram(AudioSamples audioSamples, int overlap, int wdftSize) { float[] window = new DefaultSpectrogramConfig().Window.GetWindow(wdftSize); float[] samples = audioSamples.Samples; int width = (samples.Length - wdftSize) / overlap; float[][] frames = new float[width][]; for (int i = 0; i < width; i++) { float[] complexSignal = fftService.FFTForward(samples, i * overlap, wdftSize, window); float[] band = new float[(wdftSize / 2) + 1]; for (int j = 0; j < (wdftSize / 2) + 1; j++) { double re = complexSignal[2 * j]; double img = complexSignal[(2 * j) + 1]; re /= (float)wdftSize / 2; img /= (float)wdftSize / 2; band[j] = (float)((re * re) + (img * img)); } frames[i] = band; } return(frames); }
public DefaultFingerprintConfiguration() { SpectrogramConfig = new DefaultSpectrogramConfig(); HashingConfig = HashingConfig.Default; TopWavelets = 200; SampleRate = 5512; NormalizeSignal = false; }
public LomontFFT() { A = 0; B = 1; var config = new DefaultSpectrogramConfig(); Initialize(config.WdftSize); }
private void SetupFftService(DefaultSpectrogramConfig configuration) { logUtility.Setup(utility => utility.GenerateLogFrequenciesRanges(SampleRate, configuration)) .Returns(new ushort[] { 118, 125, 133, 141, 149, 158, 167, 177, 187, 198, 210, 223, 236, 250, 264, 280, 297, 314, 333, 352, 373, 395, 419, 443, 470, 497, 527, 558, 591, 626, 663, 702, 744, }); }
public void CreateLogSpectrogramFromSamplesLessThanFourierTransformWindowLength() { var configuration = new DefaultSpectrogramConfig(); var samples = TestUtilities.GenerateRandomAudioSamples(configuration.WdftSize - 1); var result = spectrumService.CreateLogSpectrogram(samples, configuration); Assert.AreEqual(0, result.Count); }
private void SetupFftService(DefaultSpectrogramConfig configuration, AudioSamples samples) { logUtility.Setup(utility => utility.GenerateLogFrequenciesRanges(SampleRate, configuration)) .Returns(new[] { 118, 125, 133, 141, 149, 158, 167, 177, 187, 198, 210, 223, 236, 250, 264, 280, 297, 314, 333, 352, 373, 395, 419, 443, 470, 497, 527, 558, 591, 626, 663, 702, 744, }); fftService.Setup(service => service.FFTForward(samples.Samples, It.IsAny <int>(), configuration.WdftSize, It.IsAny <float[]>())) .Returns(TestUtilities.GenerateRandomFloatArray(2048)); }
public void CutLogarithmizedSpectrumOfJustOneFingerprintTest() { var stride = new StaticStride(0, 0); var configuration = new DefaultSpectrogramConfig { Stride = stride }; int logSpectrumLength = configuration.ImageLength; // 128 var logSpectrum = GetLogSpectrum(logSpectrumLength); var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, SampleRate, configuration); Assert.AreEqual(1, cutLogarithmizedSpectrum.Count); }
public void CutLogarithmizedSpectrumWithSpectrumWhichIsLessThanMinimalLengthOfOneFingerprintTest() { var stride = new StaticStride(0, 0); var config = new DefaultSpectrogramConfig { Stride = stride }; int logSpectrumLength = config.ImageLength - 1; var logSpectrum = GetLogSpectrum(logSpectrumLength); var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, SampleRate, config); Assert.AreEqual(0, cutLogarithmizedSpectrum.Count); }
public void CreateLogSpectrogramFromMinimalSamplesLengthTest() { var configuration = new DefaultSpectrogramConfig(); var samples = TestUtilities.GenerateRandomAudioSamples(new DefaultFingerprintConfiguration().SamplesPerFingerprint + configuration.WdftSize); // 8192 + 2048 this.SetupFftService(configuration); var result = spectrumService.CreateLogSpectrogram(samples, configuration); logUtility.Verify(utility => utility.GenerateLogFrequenciesRanges(SampleRate, configuration), Times.Once()); Assert.AreEqual(1, result.Count); Assert.AreEqual(configuration.ImageLength, result[0].Rows); }
public void ShouldCreateCorrectNumberOfSubFingerprints() { var configuration = new DefaultSpectrogramConfig { Stride = new StaticStride(0) }; const int TenMinutes = 10 * 60; var samples = TestUtilities.GenerateRandomAudioSamples(TenMinutes * SampleRate); this.SetupFftService(configuration); var result = spectrumService.CreateLogSpectrogram(samples, configuration); Assert.AreEqual((TenMinutes * SampleRate) / (configuration.ImageLength * configuration.Overlap), result.Count); }
public void CustomSpectrumValuesInheritFromDefault() { SpectrogramConfig defaultConfiguration = new DefaultSpectrogramConfig(); SpectrogramConfig customConfiguration = new CustomSpectrogramConfig(); Assert.AreEqual(defaultConfiguration.ImageLength, customConfiguration.ImageLength); Assert.AreEqual(defaultConfiguration.LogBase, customConfiguration.LogBase); Assert.AreEqual(defaultConfiguration.LogBins, customConfiguration.LogBins); Assert.AreEqual(defaultConfiguration.FrequencyRange.Max, customConfiguration.FrequencyRange.Max); Assert.AreEqual(defaultConfiguration.FrequencyRange.Min, customConfiguration.FrequencyRange.Min); Assert.AreEqual(defaultConfiguration.UseDynamicLogBase, customConfiguration.UseDynamicLogBase); Assert.AreEqual(defaultConfiguration.WdftSize, customConfiguration.WdftSize); Assert.AreEqual(defaultConfiguration.Overlap, customConfiguration.Overlap); }
public void ShouldGenerateLogSpectrumFromAudioSamples() { int Fs = 5512; int seconds = 10; float[] samples = new float[Fs * seconds]; float f1 = 410; float f2 = 1400; for (int t = 0; t < samples.Length; ++t) { samples[t] = (float)System.Math.Sin(2 * System.Math.PI * f1 / Fs * t) + (float)System.Math.Sin(2 * System.Math.PI * f2 / Fs * t); } var audio = new AudioSamples(samples, "410Hz", 5512); var config = new DefaultSpectrogramConfig { Stride = new IncrementalStaticStride(5512) }; var spectralImages = spectrumService.CreateLogSpectrogram(audio, config); Assert.AreEqual((seconds * Fs - config.WdftSize) / Fs, spectralImages.Count); // check with logspace(log10(318), log10(2000), 33), 410Hz are located in 4th bin, 1400Hz at 25th (0 indexed) int tf1 = 4; int tf2 = 25; foreach (var image in spectralImages) { float[] spectrum = image.Image; for (int row = 0; row < image.Rows; ++row) { for (int col = 0; col < image.Cols; ++col) { int index = row * image.Cols + col; if (col == tf1 || col == tf2) { Assert.AreEqual(col == tf1 ? 1 : 0.78, spectrum[index], 0.01); } else { Assert.AreEqual(0, spectrum[index], 0.001); } } } } }
public void CreateLogSpectrogramTest() { var configuration = new DefaultSpectrogramConfig { ImageLength = 2048 }; var samples = TestUtilities.GenerateRandomAudioSamples((configuration.Overlap * configuration.WdftSize) + configuration.WdftSize); // 64 * 2048 this.SetupFftService(configuration); var result = spectrumService.CreateLogSpectrogram(samples, configuration); logUtility.Verify(utility => utility.GenerateLogFrequenciesRanges(SampleRate, configuration), Times.Once()); Assert.AreEqual(1, result.Count); Assert.AreEqual(configuration.ImageLength, result[0].Rows); Assert.AreEqual(configuration.LogBins, result[0].Cols); }
public void CreateLogSpectrumFromTwoEntries() { int stride = 256; var configuration = new DefaultSpectrogramConfig { Stride = new IncrementalStaticStride(stride) }; var samples = TestUtilities.GenerateRandomAudioSamples(new DefaultFingerprintConfiguration().SamplesPerFingerprint + configuration.WdftSize + stride); SetupFftService(configuration); var result = spectrumService.CreateLogSpectrogram(samples, configuration); logUtility.Verify(utility => utility.GenerateLogFrequenciesRanges(SampleRate, configuration), Times.Once()); Assert.AreEqual(2, result.Count); }
public void CutLogarithmizedSpectrumTest() { var configuration = new DefaultSpectrogramConfig { Stride = new StaticStride(0, 0) }; const int LogSpectrumLength = 1024; var logSpectrum = GetLogSpectrum(LogSpectrumLength); var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, SampleRate, configuration); Assert.AreEqual(8, cutLogarithmizedSpectrum.Count); double lengthOfOneFingerprint = (double)configuration.ImageLength * configuration.Overlap / SampleRate; for (int i = 0; i < cutLogarithmizedSpectrum.Count; i++) { Assert.IsTrue(System.Math.Abs(cutLogarithmizedSpectrum[i].StartsAt - (i * lengthOfOneFingerprint)) < Epsilon); } }
public void CutLogarithmizedSpectrumWithDefaultStride() { var config = new DefaultSpectrogramConfig(); int logSpectrumlength = config.ImageLength * 10; var logSpectrum = GetLogSpectrum(logSpectrumlength); var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, SampleRate, config); // Default stride between 2 consecutive images is 1536, but because of rounding issues and the fact // that minimal step is 11.6 ms, timestamp is roughly .37155 sec const double TimestampOfFingerprints = (double)1536 / SampleRate; Assert.AreEqual(49, cutLogarithmizedSpectrum.Count); for (int i = 0; i < cutLogarithmizedSpectrum.Count; i++) { Assert.IsTrue(System.Math.Abs(cutLogarithmizedSpectrum[i].StartsAt - (i * TimestampOfFingerprints)) < Epsilon); } }
public void CutLogarithmizedSpectrumWithAnIncrementalStaticStride() { var stride = new IncrementalStaticStride(new DefaultFingerprintConfiguration().SamplesPerFingerprint / 2); var config = new DefaultSpectrogramConfig { Stride = stride }; int logSpectrumLength = (config.ImageLength * 24) + config.Overlap; var logSpectrum = GetLogSpectrum(logSpectrumLength); var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, SampleRate, config); Assert.AreEqual(48, cutLogarithmizedSpectrum.Count); double lengthOfOneFingerprint = (double)config.ImageLength * config.Overlap / SampleRate; for (int i = 0; i < cutLogarithmizedSpectrum.Count; i++) { Assert.IsTrue(System.Math.Abs(cutLogarithmizedSpectrum[i].StartsAt - (i * lengthOfOneFingerprint / 2)) < Epsilon); } }
/// <summary> /// Read a audio file (remember for sub fingerprints no more than 15 seconds) /// Downsample it to mono and 5512Hz /// Use the samples to create a fingerprint /// /// return a fingerprint signature. /// </summary> private FingerprintSignature CreateSubFingerprintFromAudio(string filename) { DateTime startTime = DateTime.Now; SpectrogramConfig spectrogramConfig = new DefaultSpectrogramConfig(); // First read audio file and downsample it to mono 5512hz AudioSamples samples = audioEngine.ReadMonoFromFile(filename, spectrogramConfig.SampleRate, 0, -1); Console.WriteLine(string.Format("Resample tot mono {0}hz : {1:##0.000} sec.", spectrogramConfig.SampleRate, (DateTime.Now - startTime).TotalMilliseconds / 1000)); startTime = DateTime.Now; // Now slice the audio in chunks seperated by 11,6 ms (5512hz 11,6ms = 64 samples!) // An with length of 371ms (5512kHz 371ms = 2048 samples [rounded]) FingerprintSignature fsQuery = audioEngine.CreateFingerprint(samples, spectrogramConfig); Console.WriteLine(string.Format("Hashing audio to fingerprint : {0:##0.000} sec.", (DateTime.Now - startTime).TotalMilliseconds / 1000)); return(fsQuery); }
private FingerprintSignature MakeSubFingerID(string key, string filename) { FingerprintSignature fingerprint = null; AudioEngine audioEngine = new AudioEngine(); try { SpectrogramConfig spectrogramConfig = new DefaultSpectrogramConfig(); AudioSamples samples = null; try { // First read audio file and downsample it to mono 5512hz samples = audioEngine.ReadMonoFromFile(filename, spectrogramConfig.SampleRate, 0, -1); } catch { return(null); } // No slice the audio is chunks seperated by 11,6 ms (5512hz 11,6ms = 64 samples!) // An with length of 371ms (5512kHz 371ms = 2048 samples [rounded]) fingerprint = audioEngine.CreateFingerprint(samples, spectrogramConfig); if (fingerprint != null) { fingerprint.Reference = key; } } finally { if (audioEngine != null) { audioEngine.Close(); audioEngine = null; } } return(fingerprint); }
public void GenerateLogFrequenciesRangesTest() { var defaultConfig = new DefaultSpectrogramConfig { UseDynamicLogBase = false, LogBase = 10 }; float[] logSpacedFrequencies = new[] // generated in matlab with logspace(2.50242712, 3.3010299957, 33) { 318.00f, 336.81f, 356.73f, 377.83f, 400.18f, 423.85f, 448.92f, 475.47f, 503.59f, 533.38f, 564.92f, 598.34f, 633.73f, 671.21f, 710.91f, 752.96f, 797.50f, 844.67f, 894.63f, 947.54f, 1003.58f, 1062.94f, 1125.81f, 1192.40f, 1262.93f, 1337.63f, 1416.75f, 1500.54f, 1589.30f, 1683.30f, 1782.86f, 1888.31f, 2000f }; int[] indexes = logUtility.GenerateLogFrequenciesRanges(defaultFingerprintConfiguration.SampleRate, defaultConfig); for (int i = 0; i < logSpacedFrequencies.Length; i++) { var logSpacedFrequency = logSpacedFrequencies[i]; int index = logUtility.FrequencyToSpectrumIndex(logSpacedFrequency, defaultFingerprintConfiguration.SampleRate, defaultConfig.WdftSize); Assert.AreEqual(index, indexes[i]); } }