public void ShouldBufferCorrectly() { // below instruction repeat 128 times // thus, last entry has to have at least 2048 elements // window ----- // ----- // ----- // last one // 128 * 64 (overlap) = 8192 + 2048 // // the length of 1 fingerprint is 10240 as this is the minimal length that will allow generating full 128 * 32 log-image var stride = new IncrementalStaticStride(256); const int minSize = 10240; var realtimeAggregator = new RealtimeAudioSamplesAggregator(stride, minSize); float[] prev = new float[minSize]; for (int i = 0; i < 100; ++i) { float[] next = TestUtilities.GenerateRandomFloatArray(minSize); var audioSamples = realtimeAggregator.Aggregate(new AudioSamples(next, "cnn", 5512)); if (i == 0) { Assert.AreSame(audioSamples.Samples, next); prev = next; continue; } VerifyEndingsAreAttached(prev, next, audioSamples, minSize, stride.NextStride); prev = next; } }
public DefaultSpectrogramConfig() { Overlap = 64; WdftSize = 2048; FrequencyRange = new FrequencyRange { Min = 318, Max = 2000 }; LogBase = 2; LogBins = 32; ImageLength = 128; UseDynamicLogBase = false; NormalizeSignal = false; Stride = new IncrementalStaticStride(5115, ImageLength * Overlap); }
public void CutLogarithmizedSpectrumWithAnIncrementalStaticStride() { DefaultFingerprintConfiguration config = new DefaultFingerprintConfiguration(); int logSpectrumLength = (config.FingerprintLength * 24) + config.Overlap; var stride = new IncrementalStaticStride(config.SamplesPerFingerprint / 2, config.SamplesPerFingerprint, 0); var logSpectrum = new float[logSpectrumLength][]; for (int i = 0; i < logSpectrumLength; i++) { logSpectrum[i] = new float[32]; } var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, stride, config.FingerprintLength, config.Overlap); Assert.AreEqual(48, cutLogarithmizedSpectrum.Count); }
public DefaultFingerprintConfiguration() { FingerprintLength = 128; Overlap = 64; WdftSize = 2048; MinFrequency = 318; MaxFrequency = 2000; TopWavelets = 200; SampleRate = 5512; LogBase = 2; Stride = new IncrementalStaticStride(5115, FingerprintLength * Overlap); LogBins = 32; NormalizeSignal = false; UseDynamicLogBase = false; NumberOfLSHTables = 25; NumberOfMinHashesPerTable = 4; }
public void CutLogarithmizedSpectrumWithAnIncrementalStaticStride() { var stride = new IncrementalStaticStride(new DefaultFingerprintConfiguration().SamplesPerFingerprint / 2); var config = new DefaultSpectrogramConfig { Stride = stride }; int logSpectrumLength = (config.ImageLength * 24) + config.Overlap; var logSpectrum = GetLogSpectrum(logSpectrumLength); var cutLogarithmizedSpectrum = spectrumService.CutLogarithmizedSpectrum(logSpectrum, SampleRate, config); Assert.AreEqual(48, cutLogarithmizedSpectrum.Count); double lengthOfOneFingerprint = (double)config.ImageLength * config.Overlap / SampleRate; for (int i = 0; i < cutLogarithmizedSpectrum.Count; i++) { Assert.IsTrue(System.Math.Abs(cutLogarithmizedSpectrum[i].StartsAt - (i * lengthOfOneFingerprint / 2)) < Epsilon); } }
public DefaultFingerprintingConfiguration() { // The parameters used in these transformation steps will be equal to those that have been found to work well in other audio fingerprinting studies // (specifically in A Highly Robust Audio Fingerprinting System): // audio frames that are 371 ms long (2048 samples), // taken every 11.6 ms (64 samples), // thus having an overlap of 31/32 FingerprintLength = 128; WindowSize = 2048; // 2048/5512 = 371 ms Overlap = 64; // 64/5512 = 11,6 ms SamplesPerFingerprint = FingerprintLength * Overlap; MinFrequency = 318; MaxFrequency = 2000; SampleRate = 5512; LogBase = 2; // 2 or 10; // In Content Fingerprinting Using Wavelets, a static 928 ms stride was used in database creation, // and a random 0-46 ms stride was used in querying (random stride was used in order to minimize the coarse effect of unlucky time alignment). Stride = new IncrementalStaticStride(5115, FingerprintLength * Overlap); // 5115 / 5512 = 0,928 sec TopWavelets = 200; LogBins = 32; // Each fingerprint will be LogBins x FingerprintLength x 2 Bits long // e.g. 128 x 32 x 2 = 8192 StartFingerprintIndex = 0; EndFingerprintIndex = LogBins * FingerprintLength * 2; WindowFunction = new HannWindow(WindowSize); NormalizeSignal = true; UseDynamicLogBase = false; // Number of LSH tables NumberOfHashTables = 25; // Number of Min Hash keys per 1 hash function (1 LSH table) NumberOfKeys = 4; }
public HighPrecisionFingerprintConfiguration() { Stride = new IncrementalStaticStride(1024); }
public void IncrementalStaticStrideTest() { IncrementalStaticStride incrementalStatic = new IncrementalStaticStride(5115); Assert.AreEqual(5115 - 8192, incrementalStatic.NextStride); }
public LowLatencyFingerprintConfiguration() { Stride = new IncrementalStaticStride(2048); }
public FullFrequencyFingerprintingConfiguration(bool useRandomStride = false) { // http://www.codeproject.com/Articles/206507/Duplicates-detector-via-audio-fingerprinting // The parameters used in these transformation steps will be close to those that have been found to work well in other audio fingerprinting studies // (specifically in A Highly Robust Audio Fingerprinting System): // audio frames that are 371 ms long // taken every 11.6 ms, // thus having an overlap of 31/32 // 371 ms is 2048/5512 or 16384/44100 or 11889/32000 // The closest power of 2 in 2's complement format: 8192 / 32000 = 256 ms // 4096 / 32000 = 128 ms //WindowSize = 8192; WindowSize = 4096; // due to using this on many small samples, we need to reduce the window and overlap sizes // 11,6 ms is 64/5512 or 512/44100 or 372/32000 // The closest power of 2 in 2's complement format: 512 / 32000 = 16 ms // 1024 / 32000 = 32 ms // 256 / 32000 = 8 ms //Overlap = 1024; Overlap = 256; // Gets number of samples to read in order to create single signature. // The granularity is 1.48 seconds (11,6 ms * 128) for SR 5512 hz // The granularity is 2.048 seconds (16 ms * 128) for SR 32000 hz // 512 * 128 = 65536 FingerprintLength = 128; SamplesPerFingerprint = FingerprintLength * Overlap; // (Originally this was 32, but 40 seems to work better with SCMS?!) //LogBins = 40; LogBins = 32; // Each fingerprint will be LogBins x FingerprintLength x 2 Bits long // e.g. 128 x 32 x 2 = 8192 // or 128 x 40 x 2 = 10240 StartFingerprintIndex = 0; EndFingerprintIndex = LogBins * FingerprintLength * 2; // Reduce the frequency range MinFrequency = 40; // 318; Full Frequency: 20 MaxFrequency = 16000; // 2000; Full Frequency: 22050 // Using 32000 (instead of 44100) gives us a max of 16 khz resolution, which is OK for normal adult human hearing SampleRate = 32000; // 5512 or 44100 LogBase = 2; // Math.E, 2 or 10; // In Content Fingerprinting Using Wavelets, a static 928 ms stride was used in database creation, // and a random 0-46 ms stride was used in querying (random stride was used in order to minimize the coarse effect of unlucky time alignment). if (useRandomStride) { // 0,046 sec is 2028 / 44100 or 1472/32000 // use a 128 ms random stride instead = 4096, since every 46 ms gives way too many fingerprints to query efficiently Stride = new IncrementalRandomStride(1, 4096, SamplesPerFingerprint); } else { // 0,928 sec is 5115 / 5512 or 40924 / 44100 or 29695/32000 Stride = new IncrementalStaticStride(29695, SamplesPerFingerprint); } TopWavelets = 200; WindowFunction = new HannWindow(WindowSize); NormalizeSignal = true; // true; UseDynamicLogBase = false; // false; // Number of LSH tables NumberOfHashTables = 25; // Number of Min Hash keys per 1 hash function (1 LSH table) NumberOfKeys = 4; }
public void IncrementalStaticStrideTest() { IncrementalStaticStride incrementalStatic = new IncrementalStaticStride(5115, SamplesPerFingerprint); Assert.AreEqual(5115 - SamplesPerFingerprint, incrementalStatic.GetNextStride()); }