Esempio n. 1
0
        public static float[] DecodeUsingMplayerAndSox(string fileIn, int srate, int secondsToAnalyze)
        {
            lock (_locker) {
                using (Process tosoxreadable = new Process())
                {
                    fileIn = Regex.Replace(fileIn, "%20", " ");
                    DbgTimer t = new DbgTimer();
                    t.Start();
                    String curdir = System.Environment.CurrentDirectory;
                    Dbg.WriteLine("Decoding: " + fileIn);
                    String tempFile       = System.IO.Path.GetTempFileName();
                    String soxreadablewav = tempFile + ".wav";
                    Dbg.WriteLine("Temporary wav file: " + soxreadablewav);

                    tosoxreadable.StartInfo.FileName               = "./NativeLibraries\\mplayer\\mplayer.exe";
                    tosoxreadable.StartInfo.Arguments              = " -quiet -vc null -vo null -ao pcm:fast:waveheader \"" + fileIn + "\" -ao pcm:file=\\\"" + soxreadablewav + "\\\"";
                    tosoxreadable.StartInfo.UseShellExecute        = false;
                    tosoxreadable.StartInfo.RedirectStandardOutput = true;
                    tosoxreadable.StartInfo.RedirectStandardError  = true;
                    tosoxreadable.Start();
                    tosoxreadable.WaitForExit();

                    int exitCode = tosoxreadable.ExitCode;
                    // 0 = succesfull
                    // 1 = partially succesful
                    // 2 = failed
                    if (exitCode != 0)
                    {
                        string standardError = tosoxreadable.StandardError.ReadToEnd();
                        Console.Out.WriteLine(standardError);
                        return(null);
                    }

                                        #if DEBUG
                    string standardOutput = tosoxreadable.StandardOutput.ReadToEnd();
                    Console.Out.WriteLine(standardOutput);
                                        #endif

                    float[] floatBuffer = null;
                    if (File.Exists(soxreadablewav))
                    {
                        floatBuffer = DecodeUsingSox(soxreadablewav, srate, secondsToAnalyze);
                        try
                        {
                            File.Delete(tempFile);
                            File.Delete(soxreadablewav);
                        }
                        catch (IOException io)
                        {
                            Console.WriteLine(io);
                        }
                    }
                    Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms");
                    return(floatBuffer);
                }
            }
        }
Esempio n. 2
0
        public static float[] DecodeUsingMplayer(string fileIn, int srate)
        {
            lock (_locker) {
                using (Process towav = new Process())
                {
                    fileIn = Regex.Replace(fileIn, "%20", " ");
                    DbgTimer t = new DbgTimer();
                    t.Start();
                    String curdir = System.Environment.CurrentDirectory;
                    Dbg.WriteLine("Decoding: " + fileIn);
                    String tempFile = System.IO.Path.GetTempFileName();
                    String wav      = tempFile + ".wav";
                    Dbg.WriteLine("Temporary wav file: " + wav);

                    towav.StartInfo.FileName               = "./NativeLibraries\\mplayer\\mplayer.exe";
                    towav.StartInfo.Arguments              = " -quiet -ao pcm:fast:waveheader \"" + fileIn + "\" -format floatle -af resample=" + srate + ":0:2,pan=1:0.5:0.5 -channels 1 -vo null -vc null -ao pcm:file=\\\"" + wav + "\\\"";
                    towav.StartInfo.UseShellExecute        = false;
                    towav.StartInfo.RedirectStandardOutput = true;
                    towav.StartInfo.RedirectStandardError  = true;
                    towav.Start();
                    towav.WaitForExit();

                    int exitCode = towav.ExitCode;
                    // 0 = succesfull
                    // 1 = partially succesful
                    // 2 = failed
                    if (exitCode != 0)
                    {
                        string standardError = towav.StandardError.ReadToEnd();
                        Console.Out.WriteLine(standardError);
                        return(null);
                    }

                                        #if DEBUG
                    string standardOutput = towav.StandardOutput.ReadToEnd();
                    Console.Out.WriteLine(standardOutput);
                                        #endif

                    RiffRead riff = new RiffRead(wav);
                    riff.Process();
                    float[] floatBuffer = riff.SoundData[0];
                    try
                    {
                        File.Delete(tempFile);
                        //File.Delete(wav);
                    }
                    catch (IOException io)
                    {
                        Console.WriteLine(io);
                    }

                    Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms");
                    return(floatBuffer);
                }
            }
        }
Esempio n. 3
0
        public Matrix Apply(ref Matrix m)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            Matrix mel = new Matrix(filterWeights.rows, m.columns);

            /*
             * // Performance optimization of ...
             * mel = filterWeights.Multiply(m);
             * for (int i = 0; i < mel.rows; i++) {
             *      for (int j = 0; j < mel.columns; j++) {
             *              mel.d[i, j] = (mel.d[i, j] < 1.0f ? 0 : (float)(10.0 * Math.Log10(mel.d[i, j])));
             *              //mel.d[i, j] = (float)(10.0 * Math.Log10(mel.d[i, j]));
             *      }
             * }
             */

            int mc         = m.columns;
            int mr         = m.rows;
            int melcolumns = mel.columns;
            int fwc        = filterWeights.columns;
            int fwr        = filterWeights.rows;

            unsafe
            {
                fixed(float *md = m.d, fwd = filterWeights.d, meld = mel.d)
                {
                    for (int i = 0; i < mc; i++)
                    {
                        for (int k = 0; k < fwr; k++)
                        {
                            int idx  = k * melcolumns + i;
                            int kfwc = k * fwc;

                            for (int j = 0; j < mr; j++)
                            {
                                meld[idx] += fwd[kfwc + j] * md[j * mc + i];
                            }

                            meld[idx] = (meld[idx] < 1.0f ?
                                         0 : (float)(10.0 * Math.Log10(meld[idx])));
                        }
                    }
                }
            }

            Matrix mfcc = dct.Multiply(mel);

            Dbg.WriteLine("mfcc (MfccLessOptimized) Execution Time: " + t.Stop().TotalMilliseconds + " ms");

            return(mfcc);
        }
Esempio n. 4
0
        /// <summary>
        /// Computes a Scms model from the MFCC representation of a song.
        /// </summary>
        /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param>
        /// <returns></returns>
        public static Scms GetScmsNoInverse(Comirva.Audio.Util.Maths.Matrix mfccs, string name)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2);

                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    mean.WriteText(name + "_mean.txt");
                }
                mean.DrawMatrixGraph(name + "_mean.png");
            }
                        #endif

            // Covariance
            Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean);
                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    covarMatrix.WriteText(name + "_covariance.txt");
                }
                covarMatrix.DrawMatrixGraph(name + "_covariance.png");
            }
                        #endif

            Comirva.Audio.Util.Maths.Matrix covarMatrixInv = new Comirva.Audio.Util.Maths.Matrix(covarMatrix.Rows, covarMatrix.Columns);

            // Store the Mean, Covariance, Inverse Covariance in an optimal format.
            int  dim = mean.Rows;
            Scms s   = new Scms(dim);
            int  l   = 0;
            for (int i = 0; i < dim; i++)
            {
                s.mean[i] = (float)mean.MatrixData[i][0];
                for (int j = i; j < dim; j++)
                {
                    s.cov[l]  = (float)covarMatrix.MatrixData[i][j];
                    s.icov[l] = (float)covarMatrixInv.MatrixData[i][j];
                    l++;
                }
            }

            Dbg.WriteLine("GetScmsNoInverse - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return(s);
        }
        public Matrix Apply(ref Matrix m)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            Matrix mel = new Matrix(filterWeights.rows, m.columns);

            int mc         = m.columns;
            int melcolumns = mel.columns;
            int fwc        = filterWeights.columns;
            int fwr        = filterWeights.rows;

            unsafe
            {
                fixed(float *md = m.d, fwd = filterWeights.d, meld = mel.d)
                {
                    for (int i = 0; i < mc; i++)
                    {
                        for (int k = 0; k < fwr; k++)
                        {
                            int idx  = k * melcolumns + i;
                            int kfwc = k * fwc;

                            // The filter weights matrix is mostly 0.
                            // So only multiply non-zero elements!
                            for (int j = fwFT[k, 0]; j < fwFT[k, 1]; j++)
                            {
                                meld[idx] += fwd[kfwc + j] * md[j * mc + i];
                            }

                            meld[idx] = (meld[idx] < 1.0f ?
                                         0 : (float)(10.0 * Math.Log10(meld[idx])));
                        }
                    }
                }
            }

            try {
                Matrix mfcc = dct.Multiply(mel);

                long stop = 0;
                t.Stop(ref stop);
                Dbg.WriteLine("Mirage - mfcc Execution Time: {0}ms", stop);

                return(mfcc);
            } catch (MatrixDimensionMismatchException) {
                throw new MfccFailedException();
            }
        }
Esempio n. 6
0
        public static Scms Analyze (string file_path)
        {
            DbgTimer t = new DbgTimer ();
            t.Start ();

            Matrix stftdata = ad.Decode (file_path);
            Matrix mfccdata = mfcc.Apply (ref stftdata);
            Scms scms = Scms.GetScms (mfccdata);

            long stop = 0;
            t.Stop (ref stop);
            Dbg.WriteLine ("Mirage - Total Execution Time: {0}ms", stop);

            return scms;
        }
Esempio n. 7
0
        // EDIT !
        public static Matrix AnalyzeMFCC (string file_path)
        {
            Hyena.Log.Debug("AnalyzeMFCC called for " + file_path);

            DbgTimer t = new DbgTimer ();
            t.Start ();

            Matrix stftdata = ad.Decode (file_path);
            Matrix mfccdata = mfcc.Apply (ref stftdata);

            long stop = 0;
            t.Stop (ref stop);
            Dbg.WriteLine ("Mirage - Total Execution Time: {0}ms", stop);

            return mfccdata;
        }
        public static Scms Analyze(string file_path)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            Matrix stftdata = ad.Decode(file_path);
            Matrix mfccdata = mfcc.Apply(ref stftdata);
            Scms   scms     = Scms.GetScms(mfccdata);

            long stop = 0;

            t.Stop(ref stop);
            Dbg.WriteLine("Mirage - Total Execution Time: {0}ms", stop);

            return(scms);
        }
        // Computes a Scms model from the MFCC representation of a song.
        public static Scms GetScms(Matrix mfcc)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            // Mean
            Vector m = mfcc.Mean();

            // Covariance
            Matrix c = mfcc.Covariance(m);

            // Inverse Covariance
            Matrix ic;

            try {
                ic = c.Inverse();
            } catch (MatrixSingularException) {
                throw new ScmsImpossibleException();
            }

            // Store the Mean, Covariance, Inverse Covariance in an optimal format.
            int  dim = m.rows;
            Scms s   = new Scms(dim);
            int  l   = 0;

            for (int i = 0; i < dim; i++)
            {
                s.mean[i] = m.d[i, 0];
                for (int j = i; j < dim; j++)
                {
                    s.cov[l]  = c.d[i, j];
                    s.icov[l] = ic.d[i, j];
                    l++;
                }
            }


            long stop = 0;

            t.Stop(ref stop);
            Dbg.WriteLine("Mirage - scms created in: {0}ms", stop);

            return(s);
        }
Esempio n. 10
0
		/// <summary>
		/// Apply the STFT on the audiodata
		/// </summary>
		/// <param name="audiodata">Audiodata to apply the STFT on</param>
		/// <returns>A matrix with the result of the STFT</returns>
		public Matrix Apply(float[] audiodata)
		{
			DbgTimer t = new DbgTimer();
			t.Start();
			
			// calculate how many hops (bands) we have using the current overlap (hopsize)
			int hops = (audiodata.Length - winsize)/ hopsize;
			
			// Create a Matrix with "winsize" Rows and "hops" Columns
			// Matrix[Row, Column]
			Matrix stft = new Matrix(winsize/2 +1, hops);
			
			for (int i = 0; i < hops; i++) {
				fft.ComputeMirageMatrixUsingFftw(ref stft, i, audiodata, i*hopsize);
			}
			
			Dbg.WriteLine("Stft (ComputeMirageMatrix) Execution Time: " + t.Stop().TotalMilliseconds + " ms");
			
			return stft;
		}
Esempio n. 11
0
        /// <summary>
        /// Apply the STFT on the audiodata
        /// </summary>
        /// <param name="audiodata">Audiodata to apply the STFT on</param>
        /// <returns>A matrix with the result of the STFT</returns>
        public Matrix Apply(float[] audiodata)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            // calculate how many hops (bands) we have using the current overlap (hopsize)
            int hops = (audiodata.Length - winsize) / hopsize;

            // Create a Matrix with "winsize" Rows and "hops" Columns
            // Matrix[Row, Column]
            Matrix stft = new Matrix(winsize / 2 + 1, hops);

            for (int i = 0; i < hops; i++)
            {
                fft.ComputeMirageMatrixUsingFftw(ref stft, i, audiodata, i * hopsize);
            }

            Dbg.WriteLine("Stft (ComputeMirageMatrix) Execution Time: " + t.Stop().TotalMilliseconds + " ms");

            return(stft);
        }
Esempio n. 12
0
        public static AudioFeature AnalyzeMandelEllis(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // get work config from the audio file
            WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);

            // Calculate the audio feature
            AudioFeature audioFeature = mandelEllisExtractor.Calculate(MathUtils.FloatToDouble(param.AudioSamples));

            if (audioFeature != null) {
                // Store duration
                audioFeature.Duration = (long) param.DurationInMs;

                // Store file name
                audioFeature.Name = filePath.Name;
            }

            Dbg.WriteLine ("MandelEllisExtractor - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);

            return audioFeature;
        }
Esempio n. 13
0
        /// <summary>
        /// Method to analyze and add using the soundfingerprinting methods
        /// </summary>
        /// <param name="filePath">full file path</param>
        /// <param name="repository">Soundfingerprinting Repository</param>
        /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param>
        /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param>
        /// <returns>true if successful</returns>
        public static bool AnalyzeAndAddSoundfingerprinting(FileInfo filePath, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // get work config from the audio file
            WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);
            param.FingerprintingConfiguration = fingerprintingConfigCreation;
            string fileName = param.FileName;

            // build track
            Track track = new Track();
            track.Title = param.FileName;
            track.TrackLengthMs = (int) param.DurationInMs;
            track.FilePath = param.PathToAudioFile;
            track.Tags = param.Tags;
            track.Id = -1; // this will be set by the insert method

            // Get fingerprint signatures using the Soundfingerprinting methods
            double[][] logSpectrogram;
            List<bool[]> fingerprints;
            List<double[][]> spectralImages;
            if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys,  param, out logSpectrogram, out fingerprints, out spectralImages)) {

                // store logSpectrogram as Matrix
                Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram);
                logSpectrogramMatrix = logSpectrogramMatrix.Transpose();

                #region Debug for Soundfingerprinting Method
                if (doOutputDebugInfo) {
                    // Image Service
                    ImageService imageService = new ImageService(repository.FingerprintService.SpectrumService, repository.FingerprintService.WaveletService);
                    imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfigCreation.Stride, fingerprintingConfigCreation.FingerprintLength, fingerprintingConfigCreation.Overlap, 2).Save(fileName + "_specgram_logimages.png");

                    logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_specgram_logimage.png", true);

                    if (DEBUG_OUTPUT_TEXT) {
                        logSpectrogramMatrix.WriteCSV(fileName + "_specgram_log.csv", ";");
                    }
                }
                #endregion
            } else {
                // failed
                Console.Out.WriteLine("Failed! Could not compute the soundfingerprint {0}!", fileName);
                return false;
            }

            Dbg.WriteLine ("AnalyzeAndAddSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return true;
        }
Esempio n. 14
0
		/// <summary>
		/// Method to analyse and add all the different types of audio features
		/// </summary>
		/// <param name="filePath">full file path</param>
		/// <param name="db">Scms database (Mirage)</param>
		/// <param name="repository">Soundfingerprinting Repository</param>
		/// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param>
		/// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param>
		/// <returns>true if successful</returns>
		public static bool AnalyzeAndAddComplete(FileInfo filePath, Db db, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) {
			DbgTimer t = new DbgTimer();
			t.Start ();
			
			// get work config from the audio file
			WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);
			if (param == null) return false;
			
			param.FingerprintingConfiguration = fingerprintingConfigCreation;
			string fileName = param.FileName;
			
			// build track
			Track track = new Track();
			track.Title = param.FileName;
			track.TrackLengthMs = (int) param.DurationInMs;
			track.FilePath = param.PathToAudioFile;
			track.Tags = param.Tags;
			track.Id = -1; // this will be set by the insert method
			
			double[][] logSpectrogram;
			List<bool[]> fingerprints;
			if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints)) {

				// store logSpectrogram as Matrix
				try {
					Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram);
					logSpectrogramMatrix = logSpectrogramMatrix.Transpose();
					
					#region Output debugging information (Saving spectrograms and/or csv files)
					if (doOutputDebugInfo) {
						logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_matrix_spectrogram.png", true);

						if (DEBUG_OUTPUT_TEXT) {
							logSpectrogramMatrix.WriteCSV(fileName + "_matrix_spectrogram.csv", ";");
						}

						// Save debug images using fingerprinting methods
						SaveFingerprintingDebugImages(fileName, logSpectrogram, fingerprints, repository.FingerprintService, param.FingerprintingConfiguration);
					}
					#endregion
					
					// Insert Statistical Cluster Model Similarity Audio Feature as well
					if (!AnalyseAndAddScmsUsingLogSpectrogram(logSpectrogramMatrix, param, db, track.Id, doOutputDebugInfo, useHaarWavelet)) {
						Dbg.WriteLine("AnalyzeAndAddComplete - Failed inserting Statistical Cluster Model Similarity Audio Feature");
						// Failed, but ignore!
					}
				} catch (Exception e) {
					Dbg.WriteLine("AnalyzeAndAddComplete - Failed creating Statistical Cluster Model Similarity Audio Feature");
					Dbg.WriteLine(e.Message);
					// Failed, but ignore!
				}
			} else {
				// Failed
				return false;
			}

			
			Dbg.WriteLine("AnalyzeAndAddComplete - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return true;
		}
Esempio n. 15
0
        /// <summary>
        /// Computes a Scms model from the MFCC representation of a song.
        /// </summary>
        /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param>
        /// <returns></returns>
        public static Scms GetScms(Comirva.Audio.Util.Maths.Matrix mfccs, string name)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2);

                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    mean.WriteText(name + "_mean.txt");
                }
                mean.DrawMatrixGraph(name + "_mean.png");
            }
                        #endif

            // Covariance
            Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean);
                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    covarMatrix.WriteText(name + "_covariance.txt");
                }
                covarMatrix.DrawMatrixGraph(name + "_covariance.png");
            }
                        #endif

            // Inverse Covariance
            Comirva.Audio.Util.Maths.Matrix covarMatrixInv;
            try {
                covarMatrixInv = covarMatrix.InverseGausJordan();
            } catch (Exception) {
                Dbg.WriteLine("MatrixSingularException - Scms failed!");
                return(null);
            }
                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    covarMatrixInv.WriteAscii(name + "_inverse_covariance.ascii");
                }
                covarMatrixInv.DrawMatrixGraph(name + "_inverse_covariance.png");
            }
                        #endif

            // Store the Mean, Covariance, Inverse Covariance in an optimal format.
            int  dim = mean.Rows;
            Scms s   = new Scms(dim);
            int  l   = 0;
            for (int i = 0; i < dim; i++)
            {
                s.mean[i] = (float)mean.MatrixData[i][0];
                for (int j = i; j < dim; j++)
                {
                    s.cov[l]  = (float)covarMatrix.MatrixData[i][j];
                    s.icov[l] = (float)covarMatrixInv.MatrixData[i][j];
                    l++;
                }
            }

            Dbg.WriteLine("Compute Scms - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return(s);
        }
Esempio n. 16
0
        private static bool AnalyseAndAddScmsUsingFingerprints(List<bool[]> fingerprints,
		                                                       WorkUnitParameterObject param,
		                                                       Db db,
		                                                       int trackId,
		                                                       bool doOutputDebugInfo=DEFAULT_DEBUG_INFO)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // Insert Statistical Cluster Model Similarity Audio Feature
            string fileName = param.FileName;

            int fingerprintWidth = param.FingerprintingConfiguration.FingerprintLength;
            int fingerprintHeight = param.FingerprintingConfiguration.LogBins;
            int fingerprintCount = 0;

            foreach (bool[] fingerprint in fingerprints) {
                fingerprintCount++;
                Comirva.Audio.Util.Maths.Matrix scmsMatrix = new Comirva.Audio.Util.Maths.Matrix(fingerprintWidth, fingerprintHeight);

                for (int i = 0; i < fingerprintWidth /*128*/; i++) {
                    for (int j = 0; j < fingerprintHeight /*32*/; j++) {
                        // Negative Numbers = 01
                        // Positive Numbers = 10
                        // Zeros            = 00
                        bool v1 = fingerprint[(2 * fingerprintHeight * i) + (2 * j)];
                        bool v2 = fingerprint[(2 * fingerprintHeight * i) + (2 * j) + 1];

                        if (v1) {
                            scmsMatrix.MatrixData[i][j] = 2.0;
                        } else if (v2) {
                            scmsMatrix.MatrixData[i][j] = 0.0;
                        } else {
                            scmsMatrix.MatrixData[i][j] = 1.0;
                        }
                    }
                }

                if (doOutputDebugInfo) {
                    scmsMatrix.DrawMatrixImage(String.Format("{0}_fingerprint_{1}.png", fileName, fingerprintCount), fingerprintWidth, fingerprintHeight);
                }

                #region Store in a Statistical Cluster Model Similarity class.
                Scms audioFeature = Scms.GetScmsNoInverse(scmsMatrix, fileName);

                if (audioFeature != null) {

                    // Store bitstring hash as well
                    audioFeature.BitString = GetBitString(fingerprint);

                    // Store duration
                    audioFeature.Duration = (long) param.DurationInMs;

                    // Store file name
                    audioFeature.Name = param.PathToAudioFile;

                    // Add to database
                    int id = trackId;
                    if (db.AddTrack(audioFeature) == -1) {
                        Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName);
                        return false;
                    }
                } else {
                    return false;
                }
                #endregion
            }

            Dbg.WriteLine ("AnalyseAndAddScmsUsingFingerprints - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return true;
        }
Esempio n. 17
0
        /// <summary>
        /// Query the database for perceptually similar tracks using the sound fingerprinting methods
        /// </summary>
        /// <param name="filePath">input file</param>
        /// <returns>a dictionary of similar tracks</returns>
        public static Dictionary<Track, double> SimilarTracksSoundfingerprinting(FileInfo filePath, Repository repository)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // get work config from the audio file
            WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);
            param.FingerprintingConfiguration = fingerprintingConfigQuerying;

            Dictionary<Track, double> candidates = repository.FindSimilarFromAudioSamples(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys,  1, param);

            Dbg.WriteLine ("SimilarTracksSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return candidates;
        }
Esempio n. 18
0
        /// <summary>
        /// Computes a Scms model from the MFCC representation of a song.
        /// </summary>
        /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param>
        /// <returns></returns>
        public static Scms GetScms(Comirva.Audio.Util.Maths.Matrix mfccs, string name)
        {
            DbgTimer t = new DbgTimer();
            t.Start();

            Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2);

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                mean.WriteText(name + "_mean.txt");
                mean.DrawMatrixGraph(name + "_mean.png");
            }
            #endif

            // Covariance
            Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean);
            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                covarMatrix.WriteText(name + "_covariance.txt");
                covarMatrix.DrawMatrixGraph(name + "_covariance.png");
            }
            #endif

            // Inverse Covariance
            Comirva.Audio.Util.Maths.Matrix covarMatrixInv;
            try {
                covarMatrixInv = covarMatrix.InverseGausJordan();
            } catch (Exception) {
                Dbg.WriteLine("MatrixSingularException - Scms failed!");
                return null;
            }
            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                covarMatrixInv.WriteAscii(name + "_inverse_covariance.ascii");
                covarMatrixInv.DrawMatrixGraph(name + "_inverse_covariance.png");
            }
            #endif

            // Store the Mean, Covariance, Inverse Covariance in an optimal format.
            int dim = mean.Rows;
            Scms s = new Scms(dim);
            int l = 0;
            for (int i = 0; i < dim; i++) {
                s.mean[i] = (float) mean.MatrixData[i][0];
                for (int j = i; j < dim; j++) {
                    s.cov[l] = (float) covarMatrix.MatrixData[i][j];
                    s.icov[l] = (float) covarMatrixInv.MatrixData[i][j];
                    l++;
                }
            }

            Dbg.WriteLine("(Comirva) - scms created in: {0} ms", t.Stop().TotalMilliseconds);

            return s;
        }
Esempio n. 19
0
		public double[][] CreateLogSpectrogram(
			float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration)
		{
			DbgTimer t = new DbgTimer();
			t.Start ();

			if (configuration.NormalizeSignal)
			{
				NormalizeInPlace(samples);
			}

			int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/
			double[][] frames = new double[width][];
			int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration);
			double[] window = windowFunction.GetWindow();
			for (int i = 0; i < width; i++)
			{
				double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/

				// take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512)
				// or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000)
				for (int j = 0; j < configuration.WindowSize; j++)
				{
					// Weight by Hann Window
					complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j];
					
					// need to clear out as fft modifies buffer (phase)
					complexSignal[(2 * j) + 1] = 0;
				}
				
				lomonFFT.TableFFT(complexSignal, true);
				
				frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
			}
			
			Dbg.WriteLine ("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return frames;
		}
Esempio n. 20
0
        // TODO: Rememeber to use another stride when querying
        public static Dictionary<Track, double> SimilarTracksSoundfingerprinting(FileInfo filePath)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();
            FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance;

            float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE);
            if (audiodata == null || audiodata.Length == 0)  {
                Dbg.WriteLine("Error! - No Audio Found");
                return null;
            }

            // Name of file being processed
            string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name));

            // Calculate duration in ms
            double duration = (double) audiodata.Length / SAMPLING_RATE * 1000;

            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536

            // zero pad if the audio file is too short to perform a mfcc
            if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap))
            {
                int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap;
                Array.Resize<float>(ref audiodata, lenNew);
            }

            // Get fingerprint signatures using the Soundfingerprinting methods

            // Get database
            DatabaseService databaseService = DatabaseService.Instance;

            IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ",");
            Repository repository = new Repository(permutations, databaseService, fingerprintService);

            // work config
            WorkUnitParameterObject param = new WorkUnitParameterObject();
            param.FingerprintingConfiguration = fingerprintingConfig;
            param.PathToAudioFile = filePath.FullName;
            param.AudioSamples = audiodata;
            param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000;
            param.StartAtMilliseconds = 0;

            Dictionary<Track, double> candidates = repository.FindSimilarFromAudioSamples(25, 4, 2, param);
            return candidates;

            /*
            // Use var keyword to enumerate dictionary
            foreach (var pair in candidates)
            {
                Console.WriteLine("{0} - {1:0.00}",
                                  pair.Key.Title,
                                  pair.Value);
            }
             */

            Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
        }
Esempio n. 21
0
        public static AudioFeature AnalyzeSoundfingerprinting(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE);
            if (audiodata == null || audiodata.Length == 0)  {
                Dbg.WriteLine("Error! - No Audio Found");
                return null;
            }

            // Read TAGs using BASS
            FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance;
            Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName);

            // Name of file being processed
            string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name));

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii");
                if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt");
            }
            #endif

            if (doOutputDebugInfo) {
                DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png");
            }

            // Calculate duration in ms
            double duration = (double) audiodata.Length / SAMPLING_RATE * 1000;

            // zero pad if the audio file is too short to perform a mfcc
            if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap))
            {
                int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap;
                Array.Resize<float>(ref audiodata, lenNew);
            }

            // Get fingerprint signatures using the Soundfingerprinting methods

            // Get database
            DatabaseService databaseService = DatabaseService.Instance;

            IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ",");
            Repository repository = new Repository(permutations, databaseService, fingerprintService);

            // Image Service
            ImageService imageService = new ImageService(
                fingerprintService.SpectrumService,
                fingerprintService.WaveletService);

            // work config
            WorkUnitParameterObject param = new WorkUnitParameterObject();
            param.FingerprintingConfiguration = fingerprintingConfig;
            param.AudioSamples = audiodata;
            param.PathToAudioFile = filePath.FullName;
            param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000;
            param.StartAtMilliseconds = 0;

            // build track
            Track track = new Track();
            track.Title = name;
            track.TrackLengthMs = (int) duration;
            track.FilePath = filePath.FullName;
            track.Id = -1; // this will be set by the insert method

            #region parse tag_info
            if (tag_info != null) {
                Dictionary<string, string> tags = new Dictionary<string, string>();

                //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title);
                if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist);
                if (tag_info.album != string.Empty) tags.Add("album", tag_info.album);
                if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist);
                if (tag_info.year != string.Empty) tags.Add("year", tag_info.year);
                if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment);
                if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre);
                if (tag_info.track != string.Empty) tags.Add("track", tag_info.track);
                if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc);
                if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright);
                if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby);
                if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer);
                if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher);
                if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist);
                if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer);
                if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer);
                if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm);
                //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename);
                tags.Add("channelinfo", tag_info.channelinfo.ToString());
                //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString());
                if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString());
                if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString());
                if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString());
                if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor);
                if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping);
                if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood);
                if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating);
                if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc);

                foreach(var nativeTag in tag_info.NativeTags) {
                    string[] keyvalue = nativeTag.Split('=');
                    tags.Add(keyvalue[0], keyvalue[1]);
                }
                track.Tags = tags;
            }
            #endregion

            AudioFeature audioFeature = null;
            double[][] logSpectrogram;
            if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) {

                if (doOutputDebugInfo) {
                    imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png");

                    Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram);
                    logSpectrogramMatrix = logSpectrogramMatrix.Transpose();
                    logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true);

                    if (DEBUG_OUTPUT_TEXT) {
                        logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";");
                    }
                }

                audioFeature = new DummyAudioFeature();

                // Store duration
                audioFeature.Duration = (long) duration;

                // Store file name
                audioFeature.Name = filePath.FullName;
            } else {
                // failed
            }

            Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return audioFeature;
        }
Esempio n. 22
0
        public static Scms AnalyzeScms(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();
            FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance;

            float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE);
            if (audiodata == null || audiodata.Length == 0)  {
                Dbg.WriteLine("Error! - No Audio Found");
                return null;
            }

            // Name of file being processed
            string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name));

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii");
                if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt");
            }
            #endif

            if (doOutputDebugInfo) {
                DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png");
            }

            // Calculate duration in ms
            double duration = (double) audiodata.Length / SAMPLING_RATE * 1000;

            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536

            // zero pad if the audio file is too short to perform a mfcc
            if (audiodata.Length < WINDOW_SIZE * 8)
            {
                int lenNew = WINDOW_SIZE * 8;
                Array.Resize<float>(ref audiodata, lenNew);
            }

            // 2. Windowing
            // 3. FFT
            Comirva.Audio.Util.Maths.Matrix stftdata = stftMirage.Apply(audiodata);

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) {
                    stftdata.WriteAscii(name + "_stftdata.ascii");
                    stftdata.WriteCSV(name + "_stftdata.csv", ";");
                }
            }
            #endif

            if (doOutputDebugInfo) {
                // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
                stftdata.DrawMatrixImageLogValues(name + "_specgram.png", true);

                // spec gram with log values for the y axis (frequency)
                stftdata.DrawMatrixImageLogY(name + "_specgramlog.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE);
            }

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE & false) {
                #region Inverse STFT
                double[] audiodata_inverse_stft = stftMirage.InverseStft(stftdata);

                // divide
                //MathUtils.Divide(ref audiodata_inverse_stft, AUDIO_MULTIPLIER);
                MathUtils.Normalize(ref audiodata_inverse_stft);

                if (DEBUG_OUTPUT_TEXT) {
                    WriteAscii(audiodata_inverse_stft, name + "_audiodata_inverse_stft.ascii");
                    WriteF3Formatted(audiodata_inverse_stft, name + "_audiodata_inverse_stft.txt");
                }

                DrawGraph(audiodata_inverse_stft, name + "_audiodata_inverse_stft.png");

                float[] audiodata_inverse_float = MathUtils.DoubleToFloat(audiodata_inverse_stft);
                bass.SaveFile(audiodata_inverse_float, name + "_inverse_stft.wav", Analyzer.SAMPLING_RATE);
                #endregion
            }
            #endif

            // 4. Mel Scale Filterbank
            // Mel-frequency is proportional to the logarithm of the linear frequency,
            // reflecting similar effects in the human's subjective aural perception)
            // 5. Take Logarithm
            // 6. DCT (Discrete Cosine Transform)

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                #region Mel Scale and Log Values
                Comirva.Audio.Util.Maths.Matrix mellog = mfccMirage.ApplyMelScaleAndLog(ref stftdata);

                if (DEBUG_OUTPUT_TEXT) {
                    mellog.WriteCSV(name + "_mel_log.csv", ";");
                }

                if (doOutputDebugInfo) {
                    mellog.DrawMatrixImage(name + "_mel_log.png", 600, 400, true, true);
                }
                #endregion

                #region Inverse Mel Scale and Log Values
                if (false) {
                    Comirva.Audio.Util.Maths.Matrix inverse_mellog = mfccMirage.InverseMelScaleAndLog(ref mellog);

                    inverse_mellog.WriteCSV(name + "_mel_log_inverse.csv", ";");
                    inverse_mellog.DrawMatrixImageLogValues(name + "_mel_log_inverse.png", true);

                    double[] audiodata_inverse_mellog = stftMirage.InverseStft(inverse_mellog);
                    //MathUtils.Divide(ref audiodata_inverse_mellog, AUDIO_MULTIPLIER/100);
                    MathUtils.Normalize(ref audiodata_inverse_mellog);

                    if (DEBUG_OUTPUT_TEXT) {
                        WriteAscii(audiodata_inverse_mellog, name + "_audiodata_inverse_mellog.ascii");
                        WriteF3Formatted(audiodata_inverse_mellog, name + "_audiodata_inverse_mellog.txt");
                    }

                    DrawGraph(audiodata_inverse_mellog, name + "_audiodata_inverse_mellog.png");

                    float[] audiodata_inverse_mellog_float = MathUtils.DoubleToFloat(audiodata_inverse_mellog);
                    bass.SaveFile(audiodata_inverse_mellog_float, name + "_inverse_mellog.wav", Analyzer.SAMPLING_RATE);
                }
                #endregion
            }
            #endif

            Comirva.Audio.Util.Maths.Matrix featureData = null;
            if (useHaarWavelet) {
                #region Wavelet Transform
                int lastHeight = 0;
                int lastWidth = 0;
                featureData = mfccMirage.ApplyMelScaleWaveletCompression(ref stftdata, out lastHeight, out lastWidth);

                #if DEBUG
                if (Analyzer.DEBUG_INFO_VERBOSE) {
                    if (DEBUG_OUTPUT_TEXT) featureData.WriteAscii(name + "_waveletdata.ascii");
                }
                #endif

                if (doOutputDebugInfo) {
                    featureData.DrawMatrixImageLogValues(name + "_waveletdata.png", true);
                }

                #if DEBUG
                if (Analyzer.DEBUG_INFO_VERBOSE  & false) {
                    #region Inverse Wavelet
                    // try to do an inverse wavelet transform
                    Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseMelScaleWaveletCompression(ref featureData, lastHeight, lastWidth);

                    if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(name + "_specgramlog_inverse_wavelet.csv", ";");
                    stftdata_inverse_wavelet.DrawMatrixImageLogValues(name + "_specgramlog_inverse_wavelet.png", true);

                    double[] audiodata_inverse_wavelet = stftMirage.InverseStft(stftdata_inverse_wavelet);
                    MathUtils.Normalize(ref audiodata_inverse_wavelet);

                    if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_wavelet, name + "_audiodata_inverse_wavelet.txt");
                    DrawGraph(audiodata_inverse_wavelet, name + "_audiodata_inverse_wavelet.png");
                    bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_wavelet), name + "_inverse_wavelet.wav", Analyzer.SAMPLING_RATE);
                    #endregion
                }
                #endif
                #endregion
            } else {
                #region DCT Transform
                // It seems the Mirage way of applying the DCT is slightly faster than the
                // Comirva way due to less loops
                featureData = mfccMirage.ApplyMelScaleDCT(ref stftdata);
                //featureData = mfccMirage.ApplyComirvaWay(ref stftdata);

                #if DEBUG
                if (Analyzer.DEBUG_INFO_VERBOSE) {
                    if (DEBUG_OUTPUT_TEXT) featureData.WriteAscii(name + "_mfccdata.ascii");
                }
                #endif

                if (doOutputDebugInfo) {
                    featureData.DrawMatrixImageLogValues(name + "_mfccdata.png", true);
                }

                #if DEBUG
                if (Analyzer.DEBUG_INFO_VERBOSE & false) {
                    #region Inverse MFCC
                    // try to do an inverse mfcc
                    Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseMelScaleDCT(ref featureData);

                    if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(name + "_stftdata_inverse_mfcc.csv", ";");
                    stftdata_inverse_mfcc.DrawMatrixImageLogValues(name + "_specgramlog_inverse_mfcc.png", true);

                    double[] audiodata_inverse_mfcc = stftMirage.InverseStft(stftdata_inverse_mfcc);
                    MathUtils.Normalize(ref audiodata_inverse_mfcc);

                    if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_mfcc, name + "_audiodata_inverse_mfcc.txt");
                    DrawGraph(audiodata_inverse_mfcc, name + "_audiodata_inverse_mfcc.png");
                    bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_mfcc), name + "_inverse_mfcc.wav", Analyzer.SAMPLING_RATE);
                    #endregion
                }
                #endif
                #endregion
            }

            // Store in a Statistical Cluster Model Similarity class.
            // A Gaussian representation of a song
            Scms audioFeature = Scms.GetScms(featureData, name);

            if (audioFeature != null) {

                // Store image if debugging
                if (doOutputDebugInfo) {
                    audioFeature.Image = featureData.DrawMatrixImageLogValues(name + "_featuredata.png", true, false, 0, 0, true);
                }

                // Store bitstring hash as well
                string hashString = GetBitString(featureData);
                audioFeature.BitString = hashString;

                // Store duration
                audioFeature.Duration = (long) duration;

                // Store file name
                audioFeature.Name = filePath.FullName;
            }

            Dbg.WriteLine ("Mirage - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);

            return audioFeature;
        }
Esempio n. 23
0
        public static AudioFeature AnalyzeMandelEllis(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE);
            if (audiodata == null || audiodata.Length == 0)  {
                Dbg.WriteLine("Error! - No Audio Found");
                return null;
            }

            #if DEBUG
            DrawGraph(MathUtils.FloatToDouble(audiodata), "waveform.png");
            #endif

            // Calculate duration in ms
            double duration = (double) audiodata.Length / SAMPLING_RATE * 1000;

            // Normalize
            //MathUtils.NormalizeInPlace(audiodata);

            // Matlab multiplies with 2^15 (32768)
            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536

            MandelEllisExtractor extractor = new MandelEllisExtractor(SAMPLING_RATE, WINDOW_SIZE, MFCC_COEFFICIENTS, MEL_COEFFICIENTS);
            AudioFeature audioFeature = extractor.Calculate(MathUtils.FloatToDouble(audiodata));

            if (audioFeature != null) {
                // Store duration
                audioFeature.Duration = (long) duration;

                // Store file name
                audioFeature.Name = filePath.Name;
            }

            Dbg.WriteLine ("MandelEllisExtractor - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);

            return audioFeature;
        }
Esempio n. 24
0
        //private static Mfcc mfccOptimized = new Mfcc(WINDOW_SIZE, SAMPLING_RATE, MEL_COEFFICIENTS, MFCC_COEFFICIENTS);
        //private static MFCC mfccComirva = new MFCC(SAMPLING_RATE, WINDOW_SIZE, MFCC_COEFFICIENTS, true, 20.0, SAMPLING_RATE/2, MEL_COEFFICIENTS);

        #endif

        #region Methods

        public static bool AnalyzeAndAdd(FileInfo filePath, Db db, DatabaseService databaseService, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE);
            if (audiodata == null || audiodata.Length == 0)  {
                Dbg.WriteLine("Error! - No Audio Found");
                return false;
            }

            // Read TAGs using BASS
            FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance;
            Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName);

            // Name of file being processed
            string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name));

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii");
                if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt");
            }
            #endif

            if (doOutputDebugInfo) {
                DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png");
            }

            // Calculate duration in ms
            double duration = (double) audiodata.Length / SAMPLING_RATE * 1000;

            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536

            // zero pad if the audio file is too short to perform a mfcc
            if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap))
            {
                int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap;
                Array.Resize<float>(ref audiodata, lenNew);
            }

            // Get fingerprint signatures using the Soundfingerprinting methods
            IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ",");
            Repository repository = new Repository(permutations, databaseService, fingerprintService);

            // Image Service
            ImageService imageService = new ImageService(
                fingerprintService.SpectrumService,
                fingerprintService.WaveletService);

            // work config
            WorkUnitParameterObject param = new WorkUnitParameterObject();
            param.FingerprintingConfiguration = fingerprintingConfig;
            param.AudioSamples = audiodata;
            param.PathToAudioFile = filePath.FullName;
            param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000;
            param.StartAtMilliseconds = 0;

            // build track
            Track track = new Track();
            track.Title = name;
            track.TrackLengthMs = (int) duration;
            track.FilePath = filePath.FullName;
            track.Id = -1; // this will be set by the insert method

            #region parse tag_info
            if (tag_info != null) {
                Dictionary<string, string> tags = new Dictionary<string, string>();

                //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title);
                if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist);
                if (tag_info.album != string.Empty) tags.Add("album", tag_info.album);
                if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist);
                if (tag_info.year != string.Empty) tags.Add("year", tag_info.year);
                if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment);
                if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre);
                if (tag_info.track != string.Empty) tags.Add("track", tag_info.track);
                if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc);
                if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright);
                if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby);
                if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer);
                if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher);
                if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist);
                if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer);
                if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer);
                if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm);
                //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename);
                tags.Add("channelinfo", tag_info.channelinfo.ToString());
                //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString());
                if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString());
                if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString());
                if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString());
                if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor);
                if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping);
                if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood);
                if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating);
                if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc);

                foreach(var nativeTag in tag_info.NativeTags) {
                    string[] keyvalue = nativeTag.Split('=');
                    tags.Add(keyvalue[0], keyvalue[1]);
                }
                track.Tags = tags;
            }
            #endregion

            double[][] logSpectrogram;
            if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) {

                // store logSpectrogram as Matrix
                Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram);
                logSpectrogramMatrix = logSpectrogramMatrix.Transpose();

                #region Debug for Soundfingerprinting Method
                if (doOutputDebugInfo) {
                    imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png");

                    logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true);

                    if (DEBUG_OUTPUT_TEXT) {
                        logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";");
                    }
                }
                #endregion

                #region Insert Statistical Cluster Model Similarity Audio Feature as well
                Comirva.Audio.Util.Maths.Matrix scmsMatrix = null;
                if (useHaarWavelet) {
                    #region Wavelet Transform
                    int lastHeight = 0;
                    int lastWidth = 0;
                    scmsMatrix = mfccMirage.ApplyWaveletCompression(ref logSpectrogramMatrix, out lastHeight, out lastWidth);

                    #if DEBUG
                    if (Analyzer.DEBUG_INFO_VERBOSE) {
                        if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_waveletdata.ascii");
                    }
                    #endif

                    if (doOutputDebugInfo) {
                        scmsMatrix.DrawMatrixImageLogValues(name + "_waveletdata.png", true);
                    }

                    #if DEBUG
                    if (Analyzer.DEBUG_INFO_VERBOSE) {
                        #region Inverse Wavelet
                        // try to do an inverse wavelet transform
                        Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseWaveletCompression(ref scmsMatrix, lastHeight, lastWidth, logSpectrogramMatrix.Rows, logSpectrogramMatrix.Columns);

                        if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(name + "_specgramlog_inverse_wavelet.csv", ";");
                        stftdata_inverse_wavelet.DrawMatrixImageLogValues(name + "_specgramlog_inverse_wavelet.png", true);
                        #endregion
                    }
                    #endif
                    #endregion
                } else {
                    #region DCT Transform
                    // It seems the Mirage way of applying the DCT is slightly faster than the
                    // Comirva way due to less loops
                    scmsMatrix = mfccMirage.ApplyDCT(ref logSpectrogramMatrix);

                    #if DEBUG
                    if (Analyzer.DEBUG_INFO_VERBOSE) {
                        if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_mfccdata.ascii");
                    }
                    #endif

                    if (doOutputDebugInfo) {
                        scmsMatrix.DrawMatrixImageLogValues(name + "_mfccdata.png", true);
                    }

                    #if DEBUG
                    if (Analyzer.DEBUG_INFO_VERBOSE) {
                        #region Inverse MFCC
                        // try to do an inverse mfcc
                        Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseDCT(ref scmsMatrix);

                        if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(name + "_stftdata_inverse_mfcc.csv", ";");
                        stftdata_inverse_mfcc.DrawMatrixImageLogValues(name + "_specgramlog_inverse_mfcc.png", true);
                        #endregion
                    }
                    #endif
                    #endregion
                }

                // Store in a Statistical Cluster Model Similarity class.
                // A Gaussian representation of a song
                Scms audioFeature = Scms.GetScms(scmsMatrix, name);

                if (audioFeature != null) {

                    // Store image if debugging
                    if (doOutputDebugInfo) {
                        audioFeature.Image = scmsMatrix.DrawMatrixImageLogValues(name + "_featuredata.png", true, false, 0, 0, true);
                    }

                    // Store bitstring hash as well
                    string hashString = GetBitString(scmsMatrix);
                    audioFeature.BitString = hashString;

                    // Store duration
                    audioFeature.Duration = (long) duration;

                    // Store file name
                    audioFeature.Name = filePath.FullName;

                    int id = track.Id;
                    if (db.AddTrack(ref id, audioFeature) == -1) {
                        Console.Out.WriteLine("Failed! Could not add audioFeature to database {0}!", name);
                    }
                }
                #endregion

            } else {
                // failed
                return false;
            }

            Dbg.WriteLine ("AnalyzeAndAdd - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return true;
        }
Esempio n. 25
0
        public static AudioFeature AnalyzeScms(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // get work config from the audio file
            WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath, doOutputDebugInfo);
            string fileName = param.FileName;

            // used to save wave files in the debug inverse methods
            FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance;

            // 2. Windowing
            // 3. FFT
            Comirva.Audio.Util.Maths.Matrix stftdata = stftMirage.Apply(param.AudioSamples);

            if (DEBUG_INFO_VERBOSE & DEBUG_OUTPUT_TEXT) {
                stftdata.WriteAscii(fileName + "_stftdata.ascii");
                stftdata.WriteCSV(fileName + "_stftdata.csv", ";");
            }

            if (doOutputDebugInfo) {
                // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024);
                //stftdata.DrawMatrixImageLogValues(fileName + "_specgram.png", true);

                // spec gram with log values for the y axis (frequency)
                stftdata.DrawMatrixImageLogY(fileName + "_specgramlog.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE);
            }

            if (DEBUG_DO_INVERSE_TESTS) {
                #region Inverse STFT
                double[] audiodata_inverse_stft = stftMirage.InverseStft(stftdata);

                // divide
                //MathUtils.Divide(ref audiodata_inverse_stft, AUDIO_MULTIPLIER);
                MathUtils.Normalize(ref audiodata_inverse_stft);

                if (DEBUG_OUTPUT_TEXT) {
                    WriteAscii(audiodata_inverse_stft, fileName + "_audiodata_inverse_stft.ascii");
                    WriteF3Formatted(audiodata_inverse_stft, fileName + "_audiodata_inverse_stft.txt");
                }

                DrawGraph(audiodata_inverse_stft, fileName + "_audiodata_inverse_stft.png");

                float[] audiodata_inverse_float = MathUtils.DoubleToFloat(audiodata_inverse_stft);
                bass.SaveFile(audiodata_inverse_float, fileName + "_inverse_stft.wav", Analyzer.SAMPLING_RATE);
                #endregion
            }

            // 4. Mel Scale Filterbank
            // Mel-frequency is proportional to the logarithm of the linear frequency,
            // reflecting similar effects in the human's subjective aural perception)
            // 5. Take Logarithm
            // 6. DCT (Discrete Cosine Transform)

            if (DEBUG_INFO_VERBOSE) {
                #region Mel Scale and Log Values
                Comirva.Audio.Util.Maths.Matrix mellog = mfccMirage.ApplyMelScaleAndLog(ref stftdata);

                if (DEBUG_OUTPUT_TEXT) {
                    mellog.WriteCSV(fileName + "_mel_log.csv", ";");
                }

                if (doOutputDebugInfo) {
                    mellog.DrawMatrixImage(fileName + "_mel_log.png", 600, 400, true, true);
                }
                #endregion

                #region Inverse Mel Scale and Log Values
                if (DEBUG_DO_INVERSE_TESTS) {
                    Comirva.Audio.Util.Maths.Matrix inverse_mellog = mfccMirage.InverseMelScaleAndLog(ref mellog);

                    inverse_mellog.WriteCSV(fileName + "_mel_log_inverse.csv", ";");
                    inverse_mellog.DrawMatrixImageLogValues(fileName + "_mel_log_inverse.png", true);

                    double[] audiodata_inverse_mellog = stftMirage.InverseStft(inverse_mellog);
                    //MathUtils.Divide(ref audiodata_inverse_mellog, AUDIO_MULTIPLIER/100);
                    MathUtils.Normalize(ref audiodata_inverse_mellog);

                    if (DEBUG_OUTPUT_TEXT) {
                        WriteAscii(audiodata_inverse_mellog, fileName + "_audiodata_inverse_mellog.ascii");
                        WriteF3Formatted(audiodata_inverse_mellog, fileName + "_audiodata_inverse_mellog.txt");
                    }

                    DrawGraph(audiodata_inverse_mellog, fileName + "_audiodata_inverse_mellog.png");

                    float[] audiodata_inverse_mellog_float = MathUtils.DoubleToFloat(audiodata_inverse_mellog);
                    bass.SaveFile(audiodata_inverse_mellog_float, fileName + "_inverse_mellog.wav", Analyzer.SAMPLING_RATE);
                }
                #endregion
            }

            Comirva.Audio.Util.Maths.Matrix featureData = null;
            if (useHaarWavelet) {
                #region Wavelet Transform
                int lastHeight = 0;
                int lastWidth = 0;
                featureData = mfccMirage.ApplyMelScaleAndWaveletCompress(ref stftdata, out lastHeight, out lastWidth);

                if (DEBUG_INFO_VERBOSE & DEBUG_OUTPUT_TEXT) {
                    featureData.WriteAscii(fileName + "_waveletdata.ascii");
                }

                if (doOutputDebugInfo) {
                    featureData.DrawMatrixImageLogValues(fileName + "_waveletdata.png", true);
                }

                if (DEBUG_DO_INVERSE_TESTS) {
                    #region Inverse Wavelet
                    // try to do an inverse wavelet transform
                    Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseMelScaleAndWaveletCompress(ref featureData, lastHeight, lastWidth);

                    if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(fileName + "_specgramlog_inverse_wavelet.csv", ";");
                    stftdata_inverse_wavelet.DrawMatrixImageLogValues(fileName + "_specgramlog_inverse_wavelet.png", true);

                    double[] audiodata_inverse_wavelet = stftMirage.InverseStft(stftdata_inverse_wavelet);
                    MathUtils.Normalize(ref audiodata_inverse_wavelet);

                    if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_wavelet, fileName + "_audiodata_inverse_wavelet.txt");
                    DrawGraph(audiodata_inverse_wavelet, fileName + "_audiodata_inverse_wavelet.png");
                    bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_wavelet), fileName + "_inverse_wavelet.wav", Analyzer.SAMPLING_RATE);
                    #endregion
                }
                #endregion
            } else {
                #region DCT Transform
                // It seems the Mirage way of applying the DCT is slightly faster than the
                // Comirva way due to less loops
                featureData = mfccMirage.ApplyMelScaleDCT(ref stftdata);
                //featureData = mfccMirage.ApplyComirvaWay(ref stftdata);

                if (DEBUG_INFO_VERBOSE & DEBUG_OUTPUT_TEXT) {
                    featureData.WriteAscii(fileName + "_mfccdata.ascii");
                }

                if (doOutputDebugInfo) {
                    featureData.DrawMatrixImageLogValues(fileName + "_mfccdata.png", true);
                }

                if (DEBUG_DO_INVERSE_TESTS) {
                    #region Inverse MFCC
                    // try to do an inverse mfcc
                    Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseMelScaleDCT(ref featureData);

                    if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(fileName + "_stftdata_inverse_mfcc.csv", ";");
                    stftdata_inverse_mfcc.DrawMatrixImageLogValues(fileName + "_specgramlog_inverse_mfcc.png", true);

                    double[] audiodata_inverse_mfcc = stftMirage.InverseStft(stftdata_inverse_mfcc);
                    MathUtils.Normalize(ref audiodata_inverse_mfcc);

                    if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_mfcc, fileName + "_audiodata_inverse_mfcc.txt");
                    DrawGraph(audiodata_inverse_mfcc, fileName + "_audiodata_inverse_mfcc.png");
                    bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_mfcc), fileName + "_inverse_mfcc.wav", Analyzer.SAMPLING_RATE);
                    #endregion
                }
                #endregion
            }

            // Store in a Statistical Cluster Model Similarity class.
            // A Gaussian representation of a song
            Scms audioFeature = Scms.GetScms(featureData, fileName);

            if (audioFeature != null) {

                // Store image if debugging
                if (doOutputDebugInfo) {
                    audioFeature.Image = featureData.DrawMatrixImageLogValues(fileName + "_featuredata.png", true, false, 0, 0, true);
                }

                // Store bitstring hash as well
                string hashString = GetBitString(featureData);
                audioFeature.BitString = hashString;

                // Store duration
                audioFeature.Duration = (long) param.DurationInMs;

                // Store file name
                audioFeature.Name = filePath.FullName;
            } else {
                // failed creating the Scms class
                Console.Out.WriteLine("Failed! Could not compute the Scms {0}!", fileName);
            }

            Dbg.WriteLine ("AnalyzeScms - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return audioFeature;
        }
Esempio n. 26
0
        /// <summary>
        /// Return information from the Audio File
        /// </summary>
        /// <param name="filePath">filepath object</param>
        /// <returns>a WorkUnitParameter object</returns>
        public static WorkUnitParameterObject GetWorkUnitParameterObjectFromAudioFile(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE);
            if (audiodata == null || audiodata.Length == 0)  {
                Dbg.WriteLine("Error! - No Audio Found");
                return null;
            }

            // Name of file being processed
            string fileName = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name));

            #if DEBUG
            if (DEBUG_INFO_VERBOSE) {
                if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, fileName + "_audiodata.ascii");
                if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, fileName + "_audiodata.txt");
            }
            #endif

            if (doOutputDebugInfo) {
                DrawGraph(MathUtils.FloatToDouble(audiodata), fileName + "_audiodata.png");
            }

            // Calculate duration in ms
            double duration = (double) audiodata.Length / SAMPLING_RATE * 1000;

            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER);

            // zero pad if the audio file is too short to perform a mfcc
            if (audiodata.Length < (WINDOW_SIZE + OVERLAP))
            {
                int lenNew = WINDOW_SIZE + OVERLAP;
                Array.Resize<float>(ref audiodata, lenNew);
            }

            // work config
            WorkUnitParameterObject param = new WorkUnitParameterObject();
            param.AudioSamples = audiodata;
            param.PathToAudioFile = filePath.FullName;
            param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000;
            param.StartAtMilliseconds = 0;
            param.FileName = fileName;
            param.DurationInMs = duration;
            param.Tags = GetTagInfoFromFile(filePath.FullName);

            Dbg.WriteLine ("Get Audio File Parameters - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return param;
        }
		public Matrix Apply(ref Matrix m)
		{
			DbgTimer t = new DbgTimer();
			t.Start();

			Matrix mel = new Matrix(filterWeights.rows, m.columns);
			
			/*
			// Performance optimization of ...
			mel = filterWeights.Multiply(m);
			for (int i = 0; i < mel.rows; i++) {
				for (int j = 0; j < mel.columns; j++) {
					mel.d[i, j] = (mel.d[i, j] < 1.0f ? 0 : (float)(10.0 * Math.Log10(mel.d[i, j])));
					//mel.d[i, j] = (float)(10.0 * Math.Log10(mel.d[i, j]));
				}
			}
			 */
			
			int mc = m.columns;
			int mr = m.rows;
			int melcolumns = mel.columns;
			int fwc = filterWeights.columns;
			int fwr = filterWeights.rows;

			unsafe {
				fixed (float* md = m.d, fwd = filterWeights.d, meld = mel.d) {
					for (int i = 0; i < mc; i++) {
						for (int k = 0; k < fwr; k++) {
							int idx = k*melcolumns + i;
							int kfwc = k*fwc;

							for (int j = 0; j < mr; j++) {
								meld[idx] += fwd[kfwc + j] * md[j*mc + i];
							}

							meld[idx] = (meld[idx] < 1.0f ?
							             0 : (float)(10.0 * Math.Log10(meld[idx])));
						}
						
					}
				}
			}
			
			Matrix mfcc = dct.Multiply(mel);
			
			Dbg.WriteLine("mfcc (MfccLessOptimized) Execution Time: " + t.Stop().TotalMilliseconds + " ms");
			
			return mfcc;
		}
Esempio n. 28
0
        public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath, Repository repository)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // get work config from the audio file
            WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);
            param.FingerprintingConfiguration = fingerprintingConfigQuerying;

            // TODO: i don't really know how the threshold tables work.
            // 1 returns more similar hits
            // 2 returns sometimes only the one we search for
            // even 0 seem to work (like 1)
            List<FindSimilar.QueryResult> candidates = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys,  0, param);

            Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return candidates;
        }
Esempio n. 29
0
        /// <summary>
        /// Computes a Scms model from the MFCC representation of a song.
        /// </summary>
        /// <param name="mfcc">Mirage.Matrix mfcc</param>
        /// <returns></returns>
        public static Scms GetScms(Matrix mfcc, string name)
        {
            DbgTimer t = new DbgTimer();

            t.Start();

            // Mean
            Vector m = mfcc.Mean();

                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    m.WriteText(name + "_mean_orig.txt");
                }
                m.DrawMatrixGraph(name + "_mean_orig.png");
            }
                        #endif

            // Covariance
            Matrix c = mfcc.Covariance(m);

                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    c.WriteText(name + "_covariance_orig.txt");
                }
                c.DrawMatrixGraph(name + "_covariance_orig.png");
            }
                        #endif

            // Inverse Covariance
            Matrix ic;
            try {
                ic = c.Inverse();
            } catch (MatrixSingularException) {
                //throw new ScmsImpossibleException();
                Dbg.WriteLine("MatrixSingularException - Scms failed!");
                return(null);
            }

                        #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE)
            {
                if (Analyzer.DEBUG_OUTPUT_TEXT)
                {
                    ic.WriteAscii(name + "_inverse_covariance_orig.txt");
                }
                ic.DrawMatrixGraph(name + "_inverse_covariance_orig.png");
            }
                        #endif

            // Store the Mean, Covariance, Inverse Covariance in an optimal format.
            int  dim = m.rows;
            Scms s   = new Scms(dim);
            int  l   = 0;
            for (int i = 0; i < dim; i++)
            {
                s.mean[i] = m.d[i, 0];
                for (int j = i; j < dim; j++)
                {
                    s.cov[l]  = c.d[i, j];
                    s.icov[l] = ic.d[i, j];
                    l++;
                }
            }

            Dbg.WriteLine("(Mirage) - scms created in: {0} ms", t.Stop().TotalMilliseconds);

            return(s);
        }
Esempio n. 30
0
        public Matrix Apply(ref Matrix m)
        {
            DbgTimer t = new DbgTimer ();
            t.Start ();

            Matrix mel = new Matrix (filterWeights.rows, m.columns);

            int mc = m.columns;
            int melcolumns = mel.columns;
            int fwc = filterWeights.columns;
            int fwr = filterWeights.rows;

            unsafe {
                fixed (float* md = m.d, fwd = filterWeights.d, meld = mel.d) {
                    for (int i = 0; i < mc; i++) {
                        for (int k = 0; k < fwr; k++) {
                            int idx = k*melcolumns + i;
                            int kfwc = k*fwc;

                            // The filter weights matrix is mostly 0.
                            // So only multiply non-zero elements!
                            for (int j = fwFT[k,0]; j < fwFT[k,1]; j++) {
                                meld[idx] += fwd[kfwc + j] * md[j*mc + i];
                            }

                            meld[idx] = (meld[idx] < 1.0f ?
                                    0 : (float)(10.0 * Math.Log10(meld[idx])));
                        }
                    }
                }
            }

            try {
                Matrix mfcc = dct.Multiply (mel);

                long stop = 0;
                t.Stop (ref stop);
                Dbg.WriteLine ("Mirage - mfcc Execution Time: {0}ms", stop);

                return mfcc;

            } catch (MatrixDimensionMismatchException) {
                throw new MfccFailedException ();
            }
        }
Esempio n. 31
0
    private static void Test()
    {
        mirageaudio_initgst();

        string song1_filename = "/home/lorentz/Music/Library/Pachelbel/Johann Pachelbel - Canon And Gigue In D Major For 3 Violins And Basso Continuo.mp3";
        string song2_filename = "/home/lorentz/Music/Library/Karajan Adagios/CD 1/Pachelbel - Canon in d Major (Kanon And Gigue in d Major = d Dur) av Johann Pachelbel.mp3";
        Scms song1 = null;
        Scms song2 = null;

        DbgTimer t1 = new DbgTimer();
        t1.Start();
        int runs = 10;
        for (int i = 0; i < runs; i++) {
            Analyzer.Analyze(song1_filename);
        }
        long l1 = 0;
        t1.Stop(ref l1);
        Dbg.WriteLine("Analysis: " + runs + " times - " + l1 + "ms; " +
                      (double)l1/(double)runs + "ms per analysis");

        song1 = Analyzer.Analyze(song1_filename);
        song2 = Analyzer.Analyze(song2_filename);

        ScmsConfiguration config = new ScmsConfiguration (Analyzer.MFCC_COEFFICIENTS);

        Console.WriteLine("Distance = " + Scms.Distance (song1, song2, config));

        DbgTimer t2 = new DbgTimer();
        t2.Start();
        runs = 100000;
        for (int i = 0; i < runs; i++) {
            Scms.Distance (song1, song2, config);
        }
        long l2 = 0;
        t2.Stop(ref l2);
        Dbg.WriteLine("Distance Computation: " + runs + " times - " + l2 + "ms; " +
                      (double)l2/(double)runs + "ms per comparison");
    }
Esempio n. 32
0
        public static float[] DecodeUsingSox(string fileIn, int srate, int secondsToAnalyze)
        {
            lock (_locker) {
                using (Process toraw = new Process())
                {
                    fileIn = Regex.Replace(fileIn, "%20", " ");
                    DbgTimer t = new DbgTimer();
                    t.Start();
                    String curdir = System.Environment.CurrentDirectory;
                    Dbg.WriteLine("Decoding: " + fileIn);
                    String tempFile = System.IO.Path.GetTempFileName();
                    String raw      = tempFile + "_raw.wav";
                    Dbg.WriteLine("Temporary raw file: " + raw);

                    toraw.StartInfo.FileName               = "./NativeLibraries\\sox\\sox.exe";
                    toraw.StartInfo.Arguments              = " \"" + fileIn + "\" -r " + srate + " -e float -b 32 -G -t raw \"" + raw + "\" channels 1";
                    toraw.StartInfo.UseShellExecute        = false;
                    toraw.StartInfo.RedirectStandardOutput = true;
                    toraw.StartInfo.RedirectStandardError  = true;
                    toraw.Start();
                    toraw.WaitForExit();

                    int exitCode = toraw.ExitCode;
                    // 0 = succesfull
                    // 1 = partially succesful
                    // 2 = failed
                    if (exitCode != 0)
                    {
                        string standardError = toraw.StandardError.ReadToEnd();
                        Console.Out.WriteLine(standardError);
                        return(null);
                    }

                                        #if DEBUG
                    string standardOutput = toraw.StandardOutput.ReadToEnd();
                    Console.Out.WriteLine(standardOutput);
                                        #endif

                    float[]    floatBuffer;
                    FileStream fs = null;
                    try {
                        FileInfo fi = new FileInfo(raw);
                        fs = fi.OpenRead();
                        int bytes   = (int)fi.Length;
                        int samples = bytes / sizeof(float);
                        if ((samples * sizeof(float)) != bytes)
                        {
                            return(null);
                        }

                        // if the audio file is larger than seconds to analyze,
                        // find a proper section to exctract
                        if (bytes > secondsToAnalyze * srate * sizeof(float))
                        {
                            int seekto = (bytes / 2) - ((secondsToAnalyze / 2) * sizeof(float) * srate);
                            Dbg.WriteLine("Extracting section: seekto = " + seekto);
                            bytes = (secondsToAnalyze) * srate * sizeof(float);
                            fs.Seek((samples / 2 - (secondsToAnalyze / 2) * srate) * sizeof(float), SeekOrigin.Begin);
                        }

                        BinaryReader br = new BinaryReader(fs);

                        byte[] bytesBuffer = new byte[bytes];
                        br.Read(bytesBuffer, 0, bytesBuffer.Length);

                        int items = (int)bytes / sizeof(float);
                        floatBuffer = new float[items];

                        for (int i = 0; i < items; i++)
                        {
                            floatBuffer[i] = BitConverter.ToSingle(bytesBuffer, i * sizeof(float));                             // * 65536.0f;
                        }
                    } catch (System.IO.FileNotFoundException) {
                        floatBuffer = null;
                    } finally {
                        if (fs != null)
                        {
                            fs.Close();
                        }
                        try
                        {
                            File.Delete(tempFile);
                            File.Delete(raw);
                        }
                        catch (IOException io)
                        {
                            Console.WriteLine(io);
                        }

                        Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms");
                    }
                    return(floatBuffer);
                }
            }
        }
Esempio n. 33
0
        private static bool AnalyseAndAddScmsUsingFingerprints(List<double[][]> spectralImages,
		                                                       List<bool[]> fingerprints,
		                                                       WorkUnitParameterObject param,
		                                                       Db db,
		                                                       int trackId,
		                                                       bool doOutputDebugInfo=DEFAULT_DEBUG_INFO)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // Insert Statistical Cluster Model Similarity Audio Feature
            string fileName = param.FileName;

            // Merge the arrays in the List using Linq
            var result = spectralImages.SelectMany(i => i).ToArray();
            Comirva.Audio.Util.Maths.Matrix scmsMatrix = new Comirva.Audio.Util.Maths.Matrix(result);

            if (doOutputDebugInfo) {
                scmsMatrix.DrawMatrixImage(String.Format("{0}_spectral.png", fileName));
            }

            #region Store in a Statistical Cluster Model Similarity class.
            Scms audioFeature = Scms.GetScms(scmsMatrix, fileName);

            if (audioFeature != null) {

                // Store bitstring hash as well
                audioFeature.BitString = GetBitString(scmsMatrix);

                // Store duration
                audioFeature.Duration = (long) param.DurationInMs;

                // Store file name
                audioFeature.Name = param.PathToAudioFile;

                // Add to database
                int id = trackId;
                if (db.AddTrack(audioFeature) == -1) {
                    Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName);
                    return false;
                }
            } else {
                return false;
            }
            #endregion

            Dbg.WriteLine ("AnalyseAndAddScmsUsingFingerprints2 - Execution Time: {0} ms", t.Stop().TotalMilliseconds);
            return true;
        }
		public static float[] DecodeUsingMplayer(string fileIn, int srate) {
			
			lock (_locker) {
				using (Process towav = new Process())
				{
					fileIn = Regex.Replace(fileIn, "%20", " ");
					DbgTimer t = new DbgTimer();
					t.Start();
					String curdir = System.Environment.CurrentDirectory;
					Dbg.WriteLine("Decoding: " + fileIn);
					String tempFile = System.IO.Path.GetTempFileName();
					String wav = tempFile + ".wav";
					Dbg.WriteLine("Temporary wav file: " + wav);
					
					towav.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe";
					towav.StartInfo.Arguments = " -quiet -ao pcm:fast:waveheader \""+fileIn+"\" -format floatle -af resample="+srate+":0:2,pan=1:0.5:0.5 -channels 1 -vo null -vc null -ao pcm:file=\\\""+wav+"\\\"";
					towav.StartInfo.UseShellExecute = false;
					towav.StartInfo.RedirectStandardOutput = true;
					towav.StartInfo.RedirectStandardError = true;
					towav.Start();
					towav.WaitForExit();
					
					int exitCode = towav.ExitCode;
					// 0 = succesfull
					// 1 = partially succesful
					// 2 = failed
					if (exitCode != 0) {
						string standardError = towav.StandardError.ReadToEnd();
						Console.Out.WriteLine(standardError);
						return null;
					}
					
					#if DEBUG
					string standardOutput = towav.StandardOutput.ReadToEnd();
					Console.Out.WriteLine(standardOutput);
					#endif
					
					RiffRead riff = new RiffRead(wav);
					riff.Process();
					float[] floatBuffer = riff.SoundData[0];
					try
					{
						File.Delete(tempFile);
						//File.Delete(wav);
					}
					catch (IOException io)
					{
						Console.WriteLine(io);
					}
					
					Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms");
					return floatBuffer;
				}
			}
		}
Esempio n. 35
0
		/// <summary>
		/// Query the database for perceptually similar tracks using the sound fingerprinting methods
		/// </summary>
		/// <param name="filePath">input file</param>
		/// <param name="repository">the database (repository)</param>
		/// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 and 1 = return all candidates, 2+ = return only exact matches)</param>
		/// <param name="optimizeSignatureCount">Reduce the number of signatures in order to increase the search performance</param>
		/// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param>
		/// <param name="splashScreen">The "please wait" splash screen (or null)</param>
		/// <returns>a list of query results objects (e.g. similar tracks)</returns>
		public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath,
		                                                                                 Repository repository,
		                                                                                 int thresholdTables,
		                                                                                 bool optimizeSignatureCount,
		                                                                                 bool doSearchEverything,
		                                                                                 SplashSceenWaitingForm splashScreen) {
			DbgTimer t = new DbgTimer();
			t.Start ();

			if (splashScreen != null) splashScreen.SetProgress(0, "Reading audio file ...");
			
			// get work config from the audio file
			WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);
			if (param == null) {
				if (splashScreen != null) splashScreen.SetProgress(0, "Failed reading audio file!");
				return null;
			}
			
			param.FingerprintingConfiguration = fingerprintingConfigQuerying;
			
			if (splashScreen != null) splashScreen.SetProgress(1, "Successfully reading audio file!");

			// This is how the threshold tables work:
			// For each signature created from a query file we retrieve a number of candidates
			// based on how many fingerprints that are associated to the same hash bucket.
			// if the number of fingerprints associated to the same hash bucket is relatively high
			// the likelyhood for this being an exact match is also very high.
			// Therefore a value of 0 or 1 basically means return every track that has an association
			// to the same hash bucket, while a number higher than that increases the accuracy for
			// only matching identical matches.
			// 0 and 1 returns many matches
			// 2 returns sometimes only the one we search for (exact match)
			List<FindSimilar.QueryResult> similarFiles = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables,
			                                                                                        param.FingerprintingConfiguration.NumberOfKeys,
			                                                                                        thresholdTables,
			                                                                                        param,
			                                                                                        optimizeSignatureCount,
			                                                                                        doSearchEverything,
			                                                                                        splashScreen);

			Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return similarFiles;
		}
Esempio n. 36
0
        /// <summary>
        /// Computes a Scms model from the MFCC representation of a song.
        /// </summary>
        /// <param name="mfcc">Mirage.Matrix mfcc</param>
        /// <returns></returns>
        public static Scms GetScms(Matrix mfcc, string name)
        {
            DbgTimer t = new DbgTimer();
            t.Start();

            // Mean
            Vector m = mfcc.Mean();

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                m.WriteText(name + "_mean_orig.txt");
                m.DrawMatrixGraph(name + "_mean_orig.png");
            }
            #endif

            // Covariance
            Matrix c = mfcc.Covariance(m);

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                c.WriteText(name + "_covariance_orig.txt");
                c.DrawMatrixGraph(name + "_covariance_orig.png");
            }
            #endif

            // Inverse Covariance
            Matrix ic;
            try {
                ic = c.Inverse();
            } catch (MatrixSingularException) {
                //throw new ScmsImpossibleException();
                Dbg.WriteLine("MatrixSingularException - Scms failed!");
                return null;
            }

            #if DEBUG
            if (Analyzer.DEBUG_INFO_VERBOSE) {
                ic.WriteAscii(name + "_inverse_covariance_orig.txt");
                ic.DrawMatrixGraph(name + "_inverse_covariance_orig.png");
            }
            #endif

            // Store the Mean, Covariance, Inverse Covariance in an optimal format.
            int dim = m.rows;
            Scms s = new Scms(dim);
            int l = 0;
            for (int i = 0; i < dim; i++) {
                s.mean[i] = m.d[i, 0];
                for (int j = i; j < dim; j++) {
                    s.cov[l] = c.d[i, j];
                    s.icov[l] = ic.d[i, j];
                    l++;
                }
            }

            Dbg.WriteLine("(Mirage) - scms created in: {0} ms", t.Stop().TotalMilliseconds);

            return s;
        }
Esempio n. 37
0
		/// <summary>
		/// Find Similar Tracks to an audio file using its file path
		/// </summary>
		/// <param name="searchForPath">audio file path</param>
		/// <param name="db">database</param>
		/// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param>
		/// <param name="numToTake">max number of entries to return</param>
		/// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param>
		/// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param>
		/// <returns>a dictinary list of key value pairs (filepath and distance)</returns>
		public static Dictionary<KeyValuePair<int, string>, double> SimilarTracks(string searchForPath, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence)
		{
			DbgTimer t = new DbgTimer();
			t.Start();

			FileInfo fi = new FileInfo(searchForPath);
			AudioFeature seedAudioFeature = null;
			AudioFeature[] audioFeatures = null;
			switch (analysisMethod) {
				case Analyzer.AnalysisMethod.MandelEllis:
					seedAudioFeature = Analyzer.AnalyzeMandelEllis(fi);
					audioFeatures = new MandelEllis[100];
					break;
				case Analyzer.AnalysisMethod.SCMS:
					seedAudioFeature = Analyzer.AnalyzeScms(fi);
					audioFeatures = new Scms[100];
					break;
			}
			
			// Get all tracks from the DB except the seedSongs
			IDataReader r = db.GetTracks(null, seedAudioFeature.Duration, percentage);
			
			// store results in a dictionary
			var NameDictionary = new Dictionary<KeyValuePair<int, string>, double>();
			
			int[] mapping = new int[100];
			int read = 1;
			double dcur;
			
			while (read > 0) {
				read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod);
				for (int i = 0; i < read; i++) {
					dcur = seedAudioFeature.GetDistance(audioFeatures[i], distanceType);
					
					// convert to positive values
					dcur = Math.Abs(dcur);
					
					NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], audioFeatures[i].Name), dcur);
				}
			}
			
			// sort by non unique values
			var sortedDict = (from entry in NameDictionary orderby entry.Value ascending select entry)
				.Take(numToTake)
				.ToDictionary(pair => pair.Key, pair => pair.Value);
			
			Console.Out.WriteLine(String.Format("Found Similar to ({0}) in {1} ms", seedAudioFeature.Name, t.Stop().TotalMilliseconds));
			return sortedDict;
		}
		public static float[] DecodeUsingMplayerAndSox(string fileIn, int srate, int secondsToAnalyze) {
			
			lock (_locker) {
				using (Process tosoxreadable = new Process())
				{
					fileIn = Regex.Replace(fileIn, "%20", " ");
					DbgTimer t = new DbgTimer();
					t.Start();
					String curdir = System.Environment.CurrentDirectory;
					Dbg.WriteLine("Decoding: " + fileIn);
					String tempFile = System.IO.Path.GetTempFileName();
					String soxreadablewav = tempFile + ".wav";
					Dbg.WriteLine("Temporary wav file: " + soxreadablewav);
					
					tosoxreadable.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe";
					tosoxreadable.StartInfo.Arguments = " -quiet -vc null -vo null -ao pcm:fast:waveheader \""+fileIn+"\" -ao pcm:file=\\\""+soxreadablewav+"\\\"";
					tosoxreadable.StartInfo.UseShellExecute = false;
					tosoxreadable.StartInfo.RedirectStandardOutput = true;
					tosoxreadable.StartInfo.RedirectStandardError = true;
					tosoxreadable.Start();
					tosoxreadable.WaitForExit();
					
					int exitCode = tosoxreadable.ExitCode;
					// 0 = succesfull
					// 1 = partially succesful
					// 2 = failed
					if (exitCode != 0) {
						string standardError = tosoxreadable.StandardError.ReadToEnd();
						Console.Out.WriteLine(standardError);
						return null;
					}
					
					#if DEBUG
					string standardOutput = tosoxreadable.StandardOutput.ReadToEnd();
					Console.Out.WriteLine(standardOutput);
					#endif
					
					float[] floatBuffer = null;
					if (File.Exists(soxreadablewav)) {
						floatBuffer = DecodeUsingSox(soxreadablewav, srate, secondsToAnalyze);
						try
						{
							File.Delete(tempFile);
							File.Delete(soxreadablewav);
						}
						catch (IOException io)
						{
							Console.WriteLine(io);
						}
					}
					Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms");
					return floatBuffer;
				}
			}
		}
Esempio n. 39
0
		/// <summary>
		/// Find Similar Tracks to one or many audio files using their unique database id(s)
		/// </summary>
		/// <param name="id">an array of unique database ids for the audio files to search for similar matches</param>
		/// <param name="exclude">an array of unique database ids to ignore (normally the same as the id array)</param>
		/// <param name="db">database</param>
		/// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param>
		/// <param name="numToTake">max number of entries to return</param>
		/// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param>
		/// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param>
		/// <returns>a dictinary list of key value pairs (filepath and distance)</returns>
		public static Dictionary<KeyValuePair<int, string>, double> SimilarTracks(int[] id, int[] exclude, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence)
		{
			DbgTimer t = new DbgTimer();
			t.Start();
			
			AudioFeature[] seedAudioFeatures = null;
			AudioFeature[] audioFeatures = null;
			switch (analysisMethod) {
				case Analyzer.AnalysisMethod.MandelEllis:
					seedAudioFeatures = new MandelEllis[id.Length];
					audioFeatures = new MandelEllis[100];
					break;
				case Analyzer.AnalysisMethod.SCMS:
					seedAudioFeatures = new Scms[id.Length];
					audioFeatures = new Scms[100];
					break;
			}
			
			for (int i = 0; i < seedAudioFeatures.Length; i++) {
				seedAudioFeatures[i] = db.GetTrack(id[i], analysisMethod);
			}
			
			// Get all tracks from the DB except the seedSongs
			IDataReader r = db.GetTracks(exclude, seedAudioFeatures[0].Duration, percentage);
			
			// store results in a dictionary
			var NameDictionary = new Dictionary<KeyValuePair<int, string>, double>();
			
			int[] mapping = new int[100];
			int read = 1;
			double d;
			double dcur;
			float count;
			
			while (read > 0) {
				read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod);
				for (int i = 0; i < read; i++) {
					
					d = 0;
					count = 0;
					for (int j = 0; j < seedAudioFeatures.Length; j++) {
						dcur = seedAudioFeatures[j].GetDistance(audioFeatures[i], distanceType);
						
						// convert to positive values
						dcur = Math.Abs(dcur);

						d += dcur;
						count++;
					}
					if (d > 0) {
						NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], audioFeatures[i].Name), d/count);
						//NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], String.Format("{0} ({1} ms)", audioFeatures[i].Name, audioFeatures[i].Duration)), d/count);
					}
				}
			}
			
			// sort by non unique values
			var sortedDict = (from entry in NameDictionary orderby entry.Value ascending select entry)
				.Take(numToTake)
				.ToDictionary(pair => pair.Key, pair => pair.Value);

			Console.Out.WriteLine(String.Format("Found Similar to ({0}) in {1} ms", String.Join(",", seedAudioFeatures.Select(p=>p.Name)), t.Stop().TotalMilliseconds));
			return sortedDict;
		}
		public static float[] DecodeUsingSox(string fileIn, int srate, int secondsToAnalyze) {

			lock (_locker) {
				using (Process toraw = new Process())
				{
					fileIn = Regex.Replace(fileIn, "%20", " ");
					DbgTimer t = new DbgTimer();
					t.Start();
					String curdir = System.Environment.CurrentDirectory;
					Dbg.WriteLine("Decoding: " + fileIn);
					String tempFile = System.IO.Path.GetTempFileName();
					String raw = tempFile + "_raw.wav";
					Dbg.WriteLine("Temporary raw file: " + raw);
					
					toraw.StartInfo.FileName = "./NativeLibraries\\sox\\sox.exe";
					toraw.StartInfo.Arguments = " \"" + fileIn + "\" -r "+srate+" -e float -b 32 -G -t raw \"" + raw + "\" channels 1";
					toraw.StartInfo.UseShellExecute = false;
					toraw.StartInfo.RedirectStandardOutput = true;
					toraw.StartInfo.RedirectStandardError = true;
					toraw.Start();
					toraw.WaitForExit();
					
					int exitCode = toraw.ExitCode;
					// 0 = succesfull
					// 1 = partially succesful
					// 2 = failed
					if (exitCode != 0) {
						string standardError = toraw.StandardError.ReadToEnd();
						Console.Out.WriteLine(standardError);
						return null;
					}
					
					#if DEBUG
					string standardOutput = toraw.StandardOutput.ReadToEnd();
					Console.Out.WriteLine(standardOutput);
					#endif
					
					float[] floatBuffer;
					FileStream fs = null;
					try {
						FileInfo fi = new FileInfo(raw);
						fs = fi.OpenRead();
						int bytes = (int)fi.Length;
						int samples = bytes/sizeof(float);
						if ((samples*sizeof(float)) != bytes)
							return null;

						// if the audio file is larger than seconds to analyze,
						// find a proper section to exctract
						if (bytes > secondsToAnalyze*srate*sizeof(float)) {
							int seekto = (bytes/2) - ((secondsToAnalyze/2)*sizeof(float)*srate);
							Dbg.WriteLine("Extracting section: seekto = " + seekto);
							bytes = (secondsToAnalyze)*srate*sizeof(float);
							fs.Seek((samples/2-(secondsToAnalyze/2)*srate)*sizeof(float), SeekOrigin.Begin);
						}
						
						BinaryReader br = new BinaryReader(fs);
						
						byte[] bytesBuffer = new byte[bytes];
						br.Read(bytesBuffer, 0, bytesBuffer.Length);
						
						int items = (int)bytes/sizeof(float);
						floatBuffer = new float[items];
						
						for (int i = 0; i < items; i++) {
							floatBuffer[i] = BitConverter.ToSingle(bytesBuffer, i * sizeof(float)); // * 65536.0f;
						}
						
					} catch (System.IO.FileNotFoundException) {
						floatBuffer = null;
						
					} finally {
						if (fs != null)
							fs.Close();
						try
						{
							File.Delete(tempFile);
							File.Delete(raw);
						}
						catch (IOException io)
						{
							Console.WriteLine(io);
						}
						
						Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms");
					}
					return floatBuffer;
				}
			}
		}
        public List<bool[]> CreateFingerprintsFromLogSpectrum(
			double[][] logarithmizedSpectrum, IStride stride, int fingerprintLength, int overlap, int topWavelets, out List<double[][]> spectralImages)
        {
            DbgTimer t = new DbgTimer();
            t.Start ();

            // Cut the logaritmic spectrogram into smaller spectrograms with one stride between each
            spectralImages = SpectrumService.CutLogarithmizedSpectrum(logarithmizedSpectrum, stride, fingerprintLength, overlap);

            // Then apply the wavelet transform on them to lated reduce the resolution
            // do this in place
            WaveletService.ApplyWaveletTransformInPlace(spectralImages);

            // Then for each of the wavelet reduce the resolution by only keeping the top wavelets
            // and ignore the magnitude of the top wavelets.
            // Instead, we can simply keep the sign of it (+/-).
            // This information is enough to keep the extract perceptual characteristics of a song.
            List<bool[]> fingerprints = new List<bool[]>();
            foreach (var spectralImage in spectralImages)
            {
                bool[] image = FingerprintDescriptor.ExtractTopWavelets(spectralImage, topWavelets);
                fingerprints.Add(image);
            }

            Dbg.WriteLine ("Created {1} Fingerprints from Log Spectrum - Execution Time: {0} ms", t.Stop().TotalMilliseconds, fingerprints.Count);
            return fingerprints;
        }