public List<bool[]> CreateFingerprintsFromAudioSamples(float[] samples, WorkUnitParameterObject param, out double[][] logSpectrogram, out List<double[][]> spectralImages) { IFingerprintingConfiguration configuration = param.FingerprintingConfiguration; AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration { LogBins = configuration.LogBins, LogBase = configuration.LogBase, MaxFrequency = configuration.MaxFrequency, MinFrequency = configuration.MinFrequency, Overlap = configuration.Overlap, SampleRate = configuration.SampleRate, WindowSize = configuration.WindowSize, NormalizeSignal = configuration.NormalizeSignal, UseDynamicLogBase = configuration.UseDynamicLogBase }; // store the log spectrogram in the out variable logSpectrogram = AudioService.CreateLogSpectrogram( samples, configuration.WindowFunction, audioServiceConfiguration); return this.CreateFingerprintsFromLogSpectrum( logSpectrogram, configuration.Stride, configuration.FingerprintLength, configuration.Overlap, configuration.TopWavelets, out spectralImages); }
public float[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { NormalizeInPlace(samples); int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/ float[][] frames = new float[width][]; float[] complexSignal = new float[2 * configuration.WdftSize]; /*even - Re, odd - Img*/ double[] window = windowFunction.GetWindow(configuration.WdftSize); int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); for (int i = 0; i < width; i++) { // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < configuration.WdftSize /*2048*/; j++) { complexSignal[(2 * j)] = (float)(window[j] * samples[(i * configuration.Overlap) + j]); /*Weight by Hann Window*/ complexSignal[(2 * j) + 1] = 0; } // FFT transform for gathering the spectrum Fourier.FFT(complexSignal, configuration.WdftSize, FourierDirection.Forward); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return frames; }
private List<bool[]> CreateFingerprintsFromAudioSamples(float[] samples, WorkUnitParameterObject param) { IFingerprintingConfiguration configuration = param.FingerprintingConfiguration; AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration { LogBins = configuration.LogBins, LogBase = configuration.LogBase, MaxFrequency = configuration.MaxFrequency, MinFrequency = configuration.MinFrequency, Overlap = configuration.Overlap, SampleRate = configuration.SampleRate, WdftSize = configuration.WdftSize }; float[][] spectrum = audioService.CreateLogSpectrogram( samples, configuration.WindowFunction, audioServiceConfiguration); return CreateFingerprintsFromSpectrum( spectrum, configuration.Stride, configuration.FingerprintLength, configuration.Overlap, configuration.LogBins, configuration.TopWavelets); }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; //MathUtils.Multiply(ref samples, Analyzer.AUDIO_MULTIPLIER); // 65536 if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); //double[] window = windowFunction.GetWindow(configuration.WdftSize); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples) for (int j = 0; j < configuration.WdftSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return frames; }
public double[][] CreateLogSpectrogram(string pathToFile, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { float[] samples = ReadMonoFromFile(pathToFile, configuration.SampleRate, 0, 0); return CreateLogSpectrogram(samples, windowFunction, configuration); }
private int[] GenerateStaticLogFrequencies(AudioServiceConfiguration configuration) { double logMin = Math.Log(configuration.MinFrequency, configuration.LogBase); double logMax = Math.Log(configuration.MaxFrequency, configuration.LogBase); double delta = (logMax - logMin) / configuration.LogBins; int[] indexes = new int[configuration.LogBins + 1]; double accDelta = 0; for (int i = 0; i <= configuration.LogBins /*32 octaves*/; ++i) { double freq = (double)Math.Pow(configuration.LogBase, logMin + accDelta); accDelta += delta; // accDelta = delta * i /*Find the start index in array from which to start the summation*/ indexes[i] = FreqToIndex(freq, configuration.SampleRate, configuration.WdftSize); } return indexes; }
private int[] GenerateLogFrequenciesDynamicBase(AudioServiceConfiguration configuration) { double logBase = Math.Exp( Math.Log((double)configuration.MaxFrequency / configuration.MinFrequency) / configuration.LogBins); double mincoef = (double)configuration.WdftSize / configuration.SampleRate * configuration.MinFrequency; int[] indexes = new int[configuration.LogBins + 1]; for (int j = 0; j < configuration.LogBins + 1; j++) { int start = (int)((Math.Pow(logBase, j) - 1.0) * mincoef); int end = (int)((Math.Pow(logBase, j + 1.0f) - 1.0) * mincoef); indexes[j] = start + (int)mincoef; } return indexes; }
/// <summary> /// Get logarithmically spaced indices /// </summary> /// <param name="configuration"> /// The configuration for log frequencies /// </param> /// <returns> /// Log indexes /// </returns> private int[] GenerateLogFrequencies(AudioServiceConfiguration configuration) { if(configuration.UseDynamicLogBase) { return GenerateLogFrequenciesDynamicBase(configuration); } return GenerateStaticLogFrequencies(configuration); }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { DbgTimer t = new DbgTimer(); t.Start (); if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512) // or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000) for (int j = 0; j < configuration.WindowSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } Dbg.WriteLine ("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return frames; }
private static void TestSoundfingerprintingAlgorithm(string filename, string name) { // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.PathToAudioFile = filename; param.StartAtMilliseconds = 0; param.MillisecondsToProcess = 0; param.FingerprintingConfiguration = fingerprintingConfig; // Soundfingerprinting Service FingerprintService fingerprintService = GetSoundfingerprintingService(); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // Configuration AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration { LogBins = fingerprintingConfig.LogBins, LogBase = fingerprintingConfig.LogBase, MaxFrequency = fingerprintingConfig.MaxFrequency, MinFrequency = fingerprintingConfig.MinFrequency, Overlap = fingerprintingConfig.Overlap, SampleRate = fingerprintingConfig.SampleRate, WdftSize = fingerprintingConfig.WdftSize, NormalizeSignal = fingerprintingConfig.NormalizeSignal, UseDynamicLogBase = fingerprintingConfig.UseDynamicLogBase }; double[][] spectrogram = fingerprintService.AudioService.CreateSpectrogram(filename, new Mirage.HannWindow(fingerprintingConfig.WdftSize), fingerprintingConfig.SampleRate, fingerprintingConfig.Overlap, fingerprintingConfig.WdftSize); imageService.GetSpectrogramImage(spectrogram, 600, 400).Save("imageservice_" + name + "_specgram.png"); /* Comirva.Audio.Util.Maths.Matrix stftdata = new Comirva.Audio.Util.Maths.Matrix(spectrogram).Transpose(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) { stftdata.WriteAscii(name + "_stftdata2.ascii"); stftdata.WriteCSV(name + "_stftdata2.csv", ";"); } // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdata.DrawMatrixImageLogValues(name + "_specgram2.png", true); // spec gram with log values for the y axis (frequency) stftdata.DrawMatrixImageLogY(name + "_specgramlog2.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE); } #endif */ double[][] logSpectrogram = fingerprintService.AudioService.CreateLogSpectrogram(filename, new Mirage.HannWindow(fingerprintingConfig.WdftSize), audioServiceConfiguration); imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save("imageservice_" + name + "_specgram_logimages.png"); Comirva.Audio.Util.Maths.Matrix stftdataLog = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram).Transpose(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) { stftdataLog.WriteAscii(name + "_stftdataLog.ascii"); stftdataLog.WriteCSV(name + "_stftdataLog.csv", ";"); } // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdataLog.DrawMatrixImageLogValues(name + "_stftdataLog.png", true); } #endif // Get fingerprints double[][] LogSpectrogram; List<bool[]> fingerprints = fingerprintService.CreateFingerprintsFromAudioFile(param, out LogSpectrogram); int width = fingerprintingConfig.FingerprintLength; int height = fingerprintingConfig.LogBins; imageService.GetImageForFingerprints(fingerprints, width, height, 2).Save("imageservice_" + name + "_fingerprints.png"); /* IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Soundfingerprinting.DuplicatesDetector.DataAccess.Repository repository = new Soundfingerprinting.DuplicatesDetector.DataAccess.Repository(permutations); // Define track Soundfingerprinting.DuplicatesDetector.Model.Track track = new Soundfingerprinting.DuplicatesDetector.Model.Track { Title = name, Path = filename }; // Get the HashSignatures List<Soundfingerprinting.DuplicatesDetector.Model.HashSignature> signatures = repository.GetSignatures(fingerprints, track, 25, 4); return signatures; */ }