Exemple #1
0
		/// <summary>
		/// Computes a Scms model from the MFCC representation of a song.
		/// </summary>
		/// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param>
		/// <returns></returns>
		public static Scms GetScms(Comirva.Audio.Util.Maths.Matrix mfccs, string name)
		{
			DbgTimer t = new DbgTimer();
			t.Start();
			
			Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2);

			#if DEBUG
			if (Analyzer.DEBUG_INFO_VERBOSE) {
				if (Analyzer.DEBUG_OUTPUT_TEXT) mean.WriteText(name + "_mean.txt");
				mean.DrawMatrixGraph(name + "_mean.png");
			}
			#endif

			// Covariance
			Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean);
			#if DEBUG
			if (Analyzer.DEBUG_INFO_VERBOSE) {
				if (Analyzer.DEBUG_OUTPUT_TEXT) covarMatrix.WriteText(name + "_covariance.txt");
				covarMatrix.DrawMatrixGraph(name + "_covariance.png");
			}
			#endif

			// Inverse Covariance
			Comirva.Audio.Util.Maths.Matrix covarMatrixInv;
			try {
				covarMatrixInv = covarMatrix.InverseGausJordan();
			} catch (Exception) {
				Dbg.WriteLine("MatrixSingularException - Scms failed!");
				return null;
			}
			#if DEBUG
			if (Analyzer.DEBUG_INFO_VERBOSE) {
				if (Analyzer.DEBUG_OUTPUT_TEXT) covarMatrixInv.WriteAscii(name + "_inverse_covariance.ascii");
				covarMatrixInv.DrawMatrixGraph(name + "_inverse_covariance.png");
			}
			#endif
			
			// Store the Mean, Covariance, Inverse Covariance in an optimal format.
			int dim = mean.Rows;
			Scms s = new Scms(dim);
			int l = 0;
			for (int i = 0; i < dim; i++) {
				s.mean[i] = (float) mean.MatrixData[i][0];
				for (int j = i; j < dim; j++) {
					s.cov[l] = (float) covarMatrix.MatrixData[i][j];
					s.icov[l] = (float) covarMatrixInv.MatrixData[i][j];
					l++;
				}
			}

			Dbg.WriteLine("Compute Scms - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return s;
		}
Exemple #2
0
        /// <summary>
        /// Computes the perceptual hash of an audio file as a bitstring using the mfcc matrix
        /// </summary>
        /// <param name="mfcc">mfcc Matrix</param>
        /// <returns>Returns a 'binary string' (aka bitstring) (like. 001010111011100010) which is easy to do a hamming distance on.</returns>
        private static string GetBitString(Comirva.Audio.Util.Maths.Matrix mfcc)
        {
            int rows = mfcc.Rows;
            int columns = mfcc.Columns;

            // 5. Compute the average value.
            // Compute the mean DCT value (using only
            // the 8x8 DCT low-frequency values and excluding the first term
            // since the DC coefficient can be significantly different from
            // the other values and will throw off the average).
            double total = 0;
            for (int x = 0; x < rows; x++) {
                for (int y = 0; y < columns; y++) {
                    total += mfcc.MatrixData[x][y];
                }
            }
            total -= mfcc.MatrixData[0][0];

            double avg = total / (double)((rows * columns) - 1);

            // 6. Further reduce the DCT.
            // This is the magic step. Set the 64 hash bits to 0 or 1
            // depending on whether each of the 64 DCT values is above or
            // below the average value. The result doesn't tell us the
            // actual low frequencies; it just tells us the very-rough
            // relative scale of the frequencies to the mean. The result
            // will not vary as long as the overall structure of the image
            // remains the same; this can survive gamma and color histogram
            // adjustments without a problem.
            string hash = "";
            for (int x = 0; x < rows; x++) {
                for (int y = 0; y < columns; y++) {
                    if (x != 0 && y != 0) {
                        hash += (mfcc.MatrixData[x][y] > avg ? "1" : "0");
                    }
                }
            }
            return hash;
        }
Exemple #3
0
        /// <summary>
        /// Add the log spectrogram matrix as a Statistical Cluster Model Similarity class to the database
        /// </summary>
        /// <param name="logSpectrogramMatrix">log spectrogram matrix</param>
        /// <param name="fileName">clean filename without extension</param>
        /// <param name="fullFilePath">full file path</param>
        /// <param name="duration">duration in ms</param>
        /// <param name="db">database</param>
        /// <param name="trackId">track id to insert</param>
        /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param>
        /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param>
        /// <returns>true if successful</returns>
        private static bool AnalyseAndAddScmsUsingLogSpectrogram(Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix,
		                                                         WorkUnitParameterObject param,
		                                                         Db db,
		                                                         int trackId,
		                                                         bool doOutputDebugInfo=DEFAULT_DEBUG_INFO,
		                                                         bool useHaarWavelet = true)
        {
            // Insert Statistical Cluster Model Similarity Audio Feature
            string fileName = param.FileName;

            Comirva.Audio.Util.Maths.Matrix scmsMatrix = null;
            if (useHaarWavelet) {
                #region Wavelet Transform
                int lastHeight = 0;
                int lastWidth = 0;
                scmsMatrix = mfccMirage.ApplyWaveletCompression(ref logSpectrogramMatrix, out lastHeight, out lastWidth);

                #if DEBUG
                if (DEBUG_INFO_VERBOSE) {
                    if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(fileName + "_waveletdata.ascii");
                }
                #endif

                if (doOutputDebugInfo) {
                    scmsMatrix.DrawMatrixImageLogValues(fileName + "_waveletdata.png", true);
                }

                #if DEBUG
                if (DEBUG_DO_INVERSE_TESTS) {
                    #region Inverse Wavelet
                    // try to do an inverse wavelet transform
                    Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseWaveletCompression(ref scmsMatrix, lastHeight, lastWidth, logSpectrogramMatrix.Rows, logSpectrogramMatrix.Columns);

                    if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(fileName + "_specgramlog_inverse_wavelet.csv", ";");
                    stftdata_inverse_wavelet.DrawMatrixImageLogValues(fileName + "_specgramlog_inverse_wavelet.png", true);
                    #endregion
                }
                #endif
                #endregion
            } else {
                #region DCT Transform
                // It seems the Mirage way of applying the DCT is slightly faster than the
                // Comirva way due to less loops
                scmsMatrix = mfccMirage.ApplyDCT(ref logSpectrogramMatrix);

                #if DEBUG
                if (DEBUG_INFO_VERBOSE) {
                    if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(fileName + "_mfccdata.ascii");
                }
                #endif

                if (doOutputDebugInfo) {
                    scmsMatrix.DrawMatrixImageLogValues(fileName + "_mfccdata.png", true);
                }

                #if DEBUG
                if (DEBUG_DO_INVERSE_TESTS) {
                    #region Inverse MFCC
                    // try to do an inverse mfcc
                    Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseDCT(ref scmsMatrix);

                    if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(fileName + "_stftdata_inverse_mfcc.csv", ";");
                    stftdata_inverse_mfcc.DrawMatrixImageLogValues(fileName + "_specgramlog_inverse_mfcc.png", true);
                    #endregion
                }
                #endif
                #endregion
            }

            // Store in a Statistical Cluster Model Similarity class.
            // i.e. a Gaussian representation of a song
            Scms audioFeature = Scms.GetScms(scmsMatrix, fileName);

            if (audioFeature != null) {

                // Store image if debugging
                if (doOutputDebugInfo) {
                    audioFeature.Image = scmsMatrix.DrawMatrixImageLogValues(fileName + "_featuredata.png", true, false, 0, 0, true);
                }

                // Store bitstring hash as well
                string hashString = GetBitString(scmsMatrix);
                audioFeature.BitString = hashString;

                // Store duration
                audioFeature.Duration = (long) param.DurationInMs;

                // Store file name
                audioFeature.Name = param.PathToAudioFile;

                // Add to database
                int id = trackId;
                if (db.AddTrack(ref id, audioFeature) == -1) {
                    Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName);
                    return false;
                } else {
                    return true;
                }
            } else {
                Console.Out.WriteLine("Error! Could not compute the Scms for '{0}'!", fileName);
                return false;
            }
        }
Exemple #4
0
		public void ComputeInverseComirvaMatrixUsingLomontTableFFT(Comirva.Audio.Util.Maths.Matrix m, int column, ref double[] signal, int winsize, int hopsize) {

			double[] spectrogramWindow = m.GetColumn(column);

			// extend window with the inverse duplicate array
			int len = spectrogramWindow.Length;
			double[] extendedWindow = new double[len * 2];
			Array.Copy(spectrogramWindow, extendedWindow, len);
			for (int i = 1; i < len; i++) {
				extendedWindow[len+i] = spectrogramWindow[len-i];
			}
			
			double[] complexSignal = FFTUtilsLomont.DoubleToComplexDouble(extendedWindow);
			lomonFFT.TableFFT(complexSignal, false);
			
			double[] window = win.GetWindow();

			// multiply by window w/ overlap-add
			int N = complexSignal.Length / 2;
			double[] returnArray = new double[N];
			for (int j = 0; j < N; j++) {
				double re = complexSignal[2*j] / Math.Sqrt(winsize);
				//double img = complexSignal[2*j + 1];
				returnArray[j] = re * window[j]; // smooth yet another time (also did this when doing FFT)
				
				// overlap-add method
				// scale with 2 just because the volume got so much lower when using a second smoothing filter when reconstrcting
				signal[j+hopsize*column] = signal[j+hopsize*column] + returnArray[j] * 2;
			}
		}
Exemple #5
0
		public void ComputeInverseComirvaMatrixUsingLomontRealFFT(Comirva.Audio.Util.Maths.Matrix m, int column, ref double[] signal, int winsize, int hopsize) {
			
			double[] spectrogramWindow = m.GetColumn(column);

			// extend window with the inverse duplicate array
			int len = spectrogramWindow.Length;
			double[] extendedWindow = new double[len * 2];
			Array.Copy(spectrogramWindow, extendedWindow, len);
			for (int i = 1; i < len; i++) {
				extendedWindow[len+i] = spectrogramWindow[len-i];
			}

			// ifft input must contain the FFT values
			// r0, r(n/2), r1, i1, r2, i2 ...

			// Perform the ifft and take just the real part
			double[] ifft = new double[winsize*2];
			ifft[0] = extendedWindow[0];
			ifft[1] = extendedWindow[winsize/2];
			for (int i = 1; i < extendedWindow.Length; i++) {
				ifft[2 * i] = extendedWindow[i];
			}

			lomonFFT.RealFFT(ifft, false);

			double[] window = win.GetWindow();

			// multiply by window w/ overlap-add
			int N = ifft.Length / 2;
			double[] returnArray = new double[N];
			for (int j = 0; j < N; j++) {
				double re = ifft[2*j] / Math.Sqrt(winsize);
				returnArray[j] = re * window[j]; // smooth yet another time (also did this when doing FFT)
				
				// overlap-add method
				// scale with 5 just because the volume got so much lower when using a second smoothing filter when reconstrcting
				signal[j+hopsize*column] = signal[j+hopsize*column] + returnArray[j] * 5;
			}
		}
Exemple #6
0
		public void ComputeComirvaMatrixUsingLomontTableFFT(ref Comirva.Audio.Util.Maths.Matrix m, int column, float[] audiodata, int pos) {

			// apply the window method (e.g HammingWindow, HannWindow etc)
			win.Apply(ref data, audiodata, pos);
			
			double[] complexSignal = FFTUtilsLomont.FloatToComplexDouble(data);
			lomonFFT.TableFFT(complexSignal, true);
			
			int row = 0;
			for (int i = 0; i < complexSignal.Length/4; i += 2) {
				double re = complexSignal[2*i];
				double img = complexSignal[2*i + 1];
				m.MatrixData[row][column] = Math.Sqrt( (re*re + img*img) * complexSignal.Length/2);
				row++;
			}
		}
Exemple #7
0
		public void ComputeComirvaMatrixUsingLomontRealFFT(ref Comirva.Audio.Util.Maths.Matrix m, int column, float[] audiodata, int pos) {

			// apply the window method (e.g HammingWindow, HannWindow etc)
			win.Apply(ref data, audiodata, pos);
			
			double[] fft = new double[data.Length/2];
			Array.Copy(data, fft, data.Length/2);
			lomonFFT.RealFFT(fft, true);
			
			// fft input will now contain the FFT values
			// r0, r(n/2), r1, i1, r2, i2 ...
			m.MatrixData[0][column] = Math.Sqrt(fft[0] * fft[0] * winsize);
			m.MatrixData[winsize/2-1][column] = Math.Sqrt(fft[1] * fft[1] * winsize);
			for (int row = 1; row < winsize/2; row++) {
				// amplitude (or magnitude) is the square root of the power spectrum
				// the magnitude spectrum is abs(fft), i.e. Math.Sqrt(re*re + img*img)
				// use 20*log10(Y) to get dB from amplitude
				// the power spectrum is the magnitude spectrum squared
				// use 10*log10(Y) to get dB from power spectrum
				m.MatrixData[row][column] = Math.Sqrt((fft[2 * row] * fft[2 * row] +
				                                       fft[2 * row + 1] * fft[2 * row + 1]) * winsize);
			}
		}
Exemple #8
0
		public void ComputeComirvaMatrixUsingFftw(ref Comirva.Audio.Util.Maths.Matrix m, int j, float[] audiodata, int pos)
		{
			// apply the window method (e.g HammingWindow, HannWindow etc)
			win.Apply(ref data, audiodata, pos);

			Marshal.Copy(data, 0, fftwData, fftsize);
			fftwf_execute(fftwPlan);
			Marshal.Copy(fftwData, fft, 0, fftsize);
			
			// fft input will now contain the FFT values in a Half Complex format
			// r0, r1, r2, ..., rn/2, i(n+1)/2-1, ..., i2, i1
			// Here, rk is the real part of the kth output, and ikis the imaginary part. (Division by 2 is rounded down.)
			// For a halfcomplex array hc[n], the kth component thus has its real part in hc[k] and its imaginary part in hc[n-k],
			// with the exception of k == 0 or n/2 (the latter only if n is even)—in these two cases, the imaginary part is zero due to symmetries of the real-input DFT, and is not stored.
			m.MatrixData[0][j] = Math.Sqrt(fft[0] * fft[0]);
			for (int i = 1; i < winsize/2; i++) {
				// amplitude (or magnitude) is the square root of the power spectrum
				// the magnitude spectrum is abs(fft), i.e. Math.Sqrt(re*re + img*img)
				// use 20*log10(Y) to get dB from amplitude
				// the power spectrum is the magnitude spectrum squared
				// use 10*log10(Y) to get dB from power spectrum
				m.MatrixData[i][j] = Math.Sqrt((fft[i * 2]* fft[i * 2] +
				                                fft[fftsize - i * 2] * fft[fftsize - i * 2]));
			}
			//m.MatrixData[winsize/2][j] = Math.Sqrt(fft[winsize] * fft[winsize]);
		}
Exemple #9
0
		/// <summary>
		/// Computes a Scms model from the MFCC representation of a song.
		/// </summary>
		/// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param>
		/// <returns></returns>
		public static Scms GetScmsNoInverse(Comirva.Audio.Util.Maths.Matrix mfccs, string name) {
			DbgTimer t = new DbgTimer();
			t.Start();
			
			Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2);

			#if DEBUG
			if (Analyzer.DEBUG_INFO_VERBOSE) {
				if (Analyzer.DEBUG_OUTPUT_TEXT) mean.WriteText(name + "_mean.txt");
				mean.DrawMatrixGraph(name + "_mean.png");
			}
			#endif

			// Covariance
			Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean);
			#if DEBUG
			if (Analyzer.DEBUG_INFO_VERBOSE) {
				if (Analyzer.DEBUG_OUTPUT_TEXT) covarMatrix.WriteText(name + "_covariance.txt");
				covarMatrix.DrawMatrixGraph(name + "_covariance.png");
			}
			#endif
			
			Comirva.Audio.Util.Maths.Matrix covarMatrixInv = new Comirva.Audio.Util.Maths.Matrix(covarMatrix.Rows, covarMatrix.Columns);
			
			// Store the Mean, Covariance, Inverse Covariance in an optimal format.
			int dim = mean.Rows;
			Scms s = new Scms(dim);
			int l = 0;
			for (int i = 0; i < dim; i++) {
				s.mean[i] = (float) mean.MatrixData[i][0];
				for (int j = i; j < dim; j++) {
					s.cov[l] = (float) covarMatrix.MatrixData[i][j];
					s.icov[l] = (float) covarMatrixInv.MatrixData[i][j];
					l++;
				}
			}

			Dbg.WriteLine("GetScmsNoInverse - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return s;
		}