예제 #1
0
		/// <summary>
		/// Transforms one window of MFCCs. The following steps are
		/// performed: <br>
		/// <br>
		/// (1) normalized power fft with hanning window function<br>
		/// (2) convert to Mel scale by applying a mel filter bank<br>
		/// (3) convertion to db<br>
		/// (4) finally a DCT is performed to get the mfcc<br>
		///<br>
		/// This process is mathematical identical with the process described in [1].
		/// </summary>
		/// <param name="window">double[] data to be converted, must contain enough data for
		///                        one window</param>
		/// <param name="start">int start index of the window data</param>
		/// <returns>double[] the window representation in Sone</returns>
		public double[] ProcessWindow(double[] window, int start)
		{
			//number of unique coefficients, and the rest are symmetrically redundant
			int fftSize = (windowSize / 2) + 1;

			//check start
			if(start < 0)
				throw new Exception("start must be a positive value");

			//check window size
			if(window == null || window.Length - start < windowSize)
				throw new Exception("the given data array must not be a null value and must contain data for one window");

			//just copy to buffer
			for (int j = 0; j < windowSize; j++)
				buffer[j] = window[j + start];

			//perform power fft
			normalizedPowerFFT.Transform(buffer, null);

			//use all coefficient up to the nequist frequency (ceil((fftSize+1)/2))
			Matrix x = new Matrix(buffer, windowSize);
			x = x.GetMatrix(0, fftSize-1, 0, 0); //fftSize-1 is the index of the nyquist frequency

			//apply mel filter banks
			x = melFilterBanks.Times(x);

			//to db
			double log10 = 10 * (1 / Math.Log(10)); // log for base 10 and scale by factor 10
			x.ThrunkAtLowerBoundary(1);
			x.LogEquals();
			x.TimesEquals(log10);

			//compute DCT
			x = dctMatrix.Times(x);

			return x.GetColumnPackedCopy();
		}
예제 #2
0
        /// <summary>
        /// Apply internal DCT and Mel Filterbands utilising the Comirva Matrix methods
        /// </summary>
        /// <param name="m">matrix (stftdata)</param>
        /// <returns>matrix mel scaled and dct'ed</returns>
        public Matrix ApplyMelScaleDCTComirva(ref Matrix m)
        {
            Mirage.DbgTimer t = new Mirage.DbgTimer();
            t.Start();

            // 4. Mel Scale Filterbank
            // Mel-frequency is proportional to the logarithm of the linear frequency,
            // reflecting similar effects in the human's subjective aural perception)
            m = filterWeights * m;

            // 5. Take Logarithm
            // to db
            double log10 = 20 * (1 / Math.Log(10)); // log for base 10 and scale by factor 10
            m.ThrunkAtLowerBoundary(1);
            m.LogEquals();
            m *= log10;

            // 6. DCT (Discrete Cosine Transform)
            m = dct * m;

            Mirage.Dbg.WriteLine("mfcc (MfccMirage-ComirvaWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms");
            return m;
        }