public List <bool[]> CreateFingerprintsFromAudioSamples(float[] samples, WorkUnitParameterObject param, out double[][] logSpectrogram) { IFingerprintingConfiguration configuration = param.FingerprintingConfiguration; AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration { LogBins = configuration.LogBins, LogBase = configuration.LogBase, MaxFrequency = configuration.MaxFrequency, MinFrequency = configuration.MinFrequency, Overlap = configuration.Overlap, SampleRate = configuration.SampleRate, WindowSize = configuration.WindowSize, NormalizeSignal = configuration.NormalizeSignal, UseDynamicLogBase = configuration.UseDynamicLogBase }; // store the log spectrogram in the out variable logSpectrogram = AudioService.CreateLogSpectrogram( samples, configuration.WindowFunction, audioServiceConfiguration); return(this.CreateFingerprintsFromLogSpectrum( logSpectrogram, configuration.Stride, configuration.FingerprintLength, configuration.Overlap, configuration.TopWavelets)); }
/// <summary> /// Get logarithmically spaced indices /// </summary> /// <param name="configuration"> /// The configuration for log frequencies /// </param> /// <returns> /// Log indexes /// </returns> private int[] GenerateLogFrequencies(AudioServiceConfiguration configuration) { if (configuration.UseDynamicLogBase) { return(GenerateLogFrequenciesDynamicBase(configuration)); } return(GenerateStaticLogFrequencies(configuration)); }
private int[] GenerateLogFrequenciesDynamicBase(AudioServiceConfiguration configuration) { double logBase = Math.Exp( Math.Log((double)configuration.MaxFrequency / configuration.MinFrequency) / configuration.LogBins); double mincoef = (double)configuration.WindowSize / configuration.SampleRate * configuration.MinFrequency; int[] indexes = new int[configuration.LogBins + 1]; for (int j = 0; j < configuration.LogBins + 1; j++) { int start = (int)((Math.Pow(logBase, j) - 1.0) * mincoef); int end = (int)((Math.Pow(logBase, j + 1.0f) - 1.0) * mincoef); indexes[j] = start + (int)mincoef; } return(indexes); }
private int[] GenerateStaticLogFrequencies(AudioServiceConfiguration configuration) { double logMin = Math.Log(configuration.MinFrequency, configuration.LogBase); double logMax = Math.Log(configuration.MaxFrequency, configuration.LogBase); double delta = (logMax - logMin) / configuration.LogBins; int[] indexes = new int[configuration.LogBins + 1]; double accDelta = 0; for (int i = 0; i <= configuration.LogBins /*32 octaves*/; ++i) { double freq = (double)Math.Pow(configuration.LogBase, logMin + accDelta); accDelta += delta; // accDelta = delta * i /*Find the start index in array from which to start the summation*/ indexes[i] = FreqToIndex(freq, configuration.SampleRate, configuration.WindowSize); } return(indexes); }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { DbgTimer t = new DbgTimer(); t.Start(); if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512) // or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000) for (int j = 0; j < configuration.WindowSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } Dbg.WriteLine("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return(frames); }
public double[][] CreateLogSpectrogram(string pathToFile, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { float[] samples = ReadMonoFromFile(pathToFile, configuration.SampleRate, 0, 0); return(CreateLogSpectrogram(samples, windowFunction, configuration)); }