public List<bool[]> CreateFingerprintsFromAudioSamples(float[] samples, WorkUnitParameterObject param, out double[][] logSpectrogram, out List<double[][]> spectralImages)
        {
            IFingerprintingConfiguration configuration = param.FingerprintingConfiguration;
            AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration
            {
                LogBins = configuration.LogBins,
                LogBase = configuration.LogBase,
                MaxFrequency = configuration.MaxFrequency,
                MinFrequency = configuration.MinFrequency,
                Overlap = configuration.Overlap,
                SampleRate = configuration.SampleRate,
                WindowSize = configuration.WindowSize,
                NormalizeSignal = configuration.NormalizeSignal,
                UseDynamicLogBase = configuration.UseDynamicLogBase
            };

            // store the log spectrogram in the out variable
            logSpectrogram = AudioService.CreateLogSpectrogram(
                samples, configuration.WindowFunction, audioServiceConfiguration);

            return this.CreateFingerprintsFromLogSpectrum(
                logSpectrogram,
                configuration.Stride,
                configuration.FingerprintLength,
                configuration.Overlap,
                configuration.TopWavelets,
                out spectralImages);
        }
        public float[][] CreateLogSpectrogram(
            float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
        {
            NormalizeInPlace(samples);
            int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/
            float[][] frames = new float[width][];
            float[] complexSignal = new float[2 * configuration.WdftSize]; /*even - Re, odd - Img*/
            double[] window = windowFunction.GetWindow(configuration.WdftSize);
            int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
            for (int i = 0; i < width; i++)
            {
                // take 371 ms each 11.6 ms (2048 samples each 64 samples)
                for (int j = 0; j < configuration.WdftSize /*2048*/; j++)
                {
                    complexSignal[(2 * j)] = (float)(window[j] * samples[(i * configuration.Overlap) + j]);
                    /*Weight by Hann Window*/
                    complexSignal[(2 * j) + 1] = 0;
                }

                // FFT transform for gathering the spectrum
                Fourier.FFT(complexSignal, configuration.WdftSize, FourierDirection.Forward);
                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            return frames;
        }
        private List<bool[]> CreateFingerprintsFromAudioSamples(float[] samples, WorkUnitParameterObject param)
        {
            IFingerprintingConfiguration configuration = param.FingerprintingConfiguration;
            AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration
                {
                    LogBins = configuration.LogBins,
                    LogBase = configuration.LogBase,
                    MaxFrequency = configuration.MaxFrequency,
                    MinFrequency = configuration.MinFrequency,
                    Overlap = configuration.Overlap,
                    SampleRate = configuration.SampleRate,
                    WdftSize = configuration.WdftSize
                };

            float[][] spectrum = audioService.CreateLogSpectrogram(
                samples, configuration.WindowFunction, audioServiceConfiguration);

            return CreateFingerprintsFromSpectrum(
                spectrum,
                configuration.Stride,
                configuration.FingerprintLength,
                configuration.Overlap,
                configuration.LogBins,
                configuration.TopWavelets);
        }
示例#4
0
        public double[][] CreateLogSpectrogram(
			float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
        {
            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            //MathUtils.Multiply(ref samples, Analyzer.AUDIO_MULTIPLIER); // 65536

            if (configuration.NormalizeSignal)
            {
                NormalizeInPlace(samples);
            }

            int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/
            double[][] frames = new double[width][];
            int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
            //double[] window = windowFunction.GetWindow(configuration.WdftSize);
            double[] window = windowFunction.GetWindow();
            for (int i = 0; i < width; i++)
            {
                double[] complexSignal = new double[2 * configuration.WdftSize]; /*even - Re, odd - Img, thats how Exocortex works*/

                // take 371 ms each 11.6 ms (2048 samples each 64 samples)
                for (int j = 0; j < configuration.WdftSize; j++)
                {
                    // Weight by Hann Window
                    complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j];

                    // need to clear out as fft modifies buffer (phase)
                    complexSignal[(2 * j) + 1] = 0;
                }

                lomonFFT.TableFFT(complexSignal, true);

                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            return frames;
        }
示例#5
0
 public double[][] CreateLogSpectrogram(string pathToFile, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
 {
     float[] samples = ReadMonoFromFile(pathToFile, configuration.SampleRate, 0, 0);
     return CreateLogSpectrogram(samples, windowFunction, configuration);
 }
示例#6
0
        private int[] GenerateStaticLogFrequencies(AudioServiceConfiguration configuration)
        {
            double logMin = Math.Log(configuration.MinFrequency, configuration.LogBase);
            double logMax = Math.Log(configuration.MaxFrequency, configuration.LogBase);

            double delta = (logMax - logMin) / configuration.LogBins;

            int[] indexes = new int[configuration.LogBins + 1];
            double accDelta = 0;
            for (int i = 0; i <= configuration.LogBins /*32 octaves*/; ++i)
            {
                double freq = (double)Math.Pow(configuration.LogBase, logMin + accDelta);
                accDelta += delta; // accDelta = delta * i
                /*Find the start index in array from which to start the summation*/
                indexes[i] = FreqToIndex(freq, configuration.SampleRate, configuration.WdftSize);
            }

            return indexes;
        }
示例#7
0
        private int[] GenerateLogFrequenciesDynamicBase(AudioServiceConfiguration configuration)
        {
            double logBase =
                Math.Exp(
                    Math.Log((double)configuration.MaxFrequency / configuration.MinFrequency) / configuration.LogBins);
            double mincoef = (double)configuration.WdftSize / configuration.SampleRate * configuration.MinFrequency;
            int[] indexes = new int[configuration.LogBins + 1];
            for (int j = 0; j < configuration.LogBins + 1; j++)
            {
                int start = (int)((Math.Pow(logBase, j) - 1.0) * mincoef);
                int end = (int)((Math.Pow(logBase, j + 1.0f) - 1.0) * mincoef);
                indexes[j] = start + (int)mincoef;
            }

            return indexes;
        }
示例#8
0
        /// <summary>
        /// Get logarithmically spaced indices
        /// </summary>
        /// <param name="configuration">
        /// The configuration for log frequencies
        /// </param>
        /// <returns>
        /// Log indexes
        /// </returns>
        private int[] GenerateLogFrequencies(AudioServiceConfiguration configuration)
        {
            if(configuration.UseDynamicLogBase)
            {
                return GenerateLogFrequenciesDynamicBase(configuration);
            }

            return GenerateStaticLogFrequencies(configuration);
        }
		public double[][] CreateLogSpectrogram(
			float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
		{
			DbgTimer t = new DbgTimer();
			t.Start ();

			if (configuration.NormalizeSignal)
			{
				NormalizeInPlace(samples);
			}

			int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/
			double[][] frames = new double[width][];
			int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
			double[] window = windowFunction.GetWindow();
			for (int i = 0; i < width; i++)
			{
				double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/

				// take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512)
				// or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000)
				for (int j = 0; j < configuration.WindowSize; j++)
				{
					// Weight by Hann Window
					complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j];
					
					// need to clear out as fft modifies buffer (phase)
					complexSignal[(2 * j) + 1] = 0;
				}
				
				lomonFFT.TableFFT(complexSignal, true);
				
				frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
			}
			
			Dbg.WriteLine ("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return frames;
		}
示例#10
0
        private static void TestSoundfingerprintingAlgorithm(string filename, string name)
        {
            // work config
            WorkUnitParameterObject param = new WorkUnitParameterObject();
            param.PathToAudioFile = filename;
            param.StartAtMilliseconds = 0;
            param.MillisecondsToProcess = 0;
            param.FingerprintingConfiguration = fingerprintingConfig;

            // Soundfingerprinting Service
            FingerprintService fingerprintService = GetSoundfingerprintingService();

            // Image Service
            ImageService imageService = new ImageService(
                fingerprintService.SpectrumService,
                fingerprintService.WaveletService);

            // Configuration
            AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration
            {
                LogBins = fingerprintingConfig.LogBins,
                LogBase = fingerprintingConfig.LogBase,
                MaxFrequency = fingerprintingConfig.MaxFrequency,
                MinFrequency = fingerprintingConfig.MinFrequency,
                Overlap = fingerprintingConfig.Overlap,
                SampleRate = fingerprintingConfig.SampleRate,
                WdftSize = fingerprintingConfig.WdftSize,
                NormalizeSignal = fingerprintingConfig.NormalizeSignal,
                UseDynamicLogBase = fingerprintingConfig.UseDynamicLogBase
            };

            double[][] spectrogram = fingerprintService.AudioService.CreateSpectrogram(filename, new Mirage.HannWindow(fingerprintingConfig.WdftSize), fingerprintingConfig.SampleRate, fingerprintingConfig.Overlap, fingerprintingConfig.WdftSize);
            imageService.GetSpectrogramImage(spectrogram, 600, 400).Save("imageservice_" + name + "_specgram.png");

            /*
            Comirva.Audio.Util.Maths.Matrix stftdata = new Comirva.Audio.Util.Maths.Matrix(spectrogram).Transpose();
            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) {
                    stftdata.WriteAscii(name + "_stftdata2.ascii");
                    stftdata.WriteCSV(name + "_stftdata2.csv", ";");
                }

                // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
                stftdata.DrawMatrixImageLogValues(name + "_specgram2.png", true);

                // spec gram with log values for the y axis (frequency)
                stftdata.DrawMatrixImageLogY(name + "_specgramlog2.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE);
            }
            #endif
             */

            double[][] logSpectrogram = fingerprintService.AudioService.CreateLogSpectrogram(filename, new Mirage.HannWindow(fingerprintingConfig.WdftSize), audioServiceConfiguration);
            imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save("imageservice_" + name + "_specgram_logimages.png");

            Comirva.Audio.Util.Maths.Matrix stftdataLog = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram).Transpose();
            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) {
                    stftdataLog.WriteAscii(name + "_stftdataLog.ascii");
                    stftdataLog.WriteCSV(name + "_stftdataLog.csv", ";");
                }

                // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
                stftdataLog.DrawMatrixImageLogValues(name + "_stftdataLog.png", true);
            }
            #endif

            // Get fingerprints
            double[][] LogSpectrogram;
            List<bool[]> fingerprints = fingerprintService.CreateFingerprintsFromAudioFile(param, out LogSpectrogram);
            int width = fingerprintingConfig.FingerprintLength;
            int height = fingerprintingConfig.LogBins;
            imageService.GetImageForFingerprints(fingerprints, width, height, 2).Save("imageservice_" + name + "_fingerprints.png");

            /*
            IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ",");
            Soundfingerprinting.DuplicatesDetector.DataAccess.Repository repository = new Soundfingerprinting.DuplicatesDetector.DataAccess.Repository(permutations);

            // Define track
            Soundfingerprinting.DuplicatesDetector.Model.Track track
                = new Soundfingerprinting.DuplicatesDetector.Model.Track {
                Title = name,
                Path = filename
            };

            // Get the HashSignatures
            List<Soundfingerprinting.DuplicatesDetector.Model.HashSignature> signatures = repository.GetSignatures(fingerprints, track, 25, 4);
            return signatures;
             */
        }