/// <summary> /// Perform an inverse mel scale and log. /// </summary> /// <param name="wavelet">mel scaled matrix</param> /// <returns>matrix mel removed and un-logged (e.g. stftdata)</returns> public Matrix InverseMelScaleAndLog(ref Matrix mel) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 5. Take Inverse Logarithm // Divide with first triangle height in order to scale properly for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = Math.Pow(10, (mel.MatrixData[i][j] / 20)) / melScaleTriangleHeights[0]; } } // 4. Inverse Mel Scale using interpolation // i.e. from e.g. // mel=Rows: 40, Columns: 165 (average freq, time slice) // to // m=Rows: 1024, Columns: 165 (freq, time slice) //Matrix m = filterWeights.Transpose() * mel; Matrix m = new Matrix(filterWeights.Columns, mel.Columns); InverseMelScaling(mel, m); Mirage.Dbg.WriteLine("Inverse Mel Scale And Log - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// Mel Scale Haar Wavelet Transform and Compress /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and wavelet'ed</returns> public Matrix ApplyMelScaleAndWaveletCompress(ref Matrix m, out int lastHeight, out int lastWidth) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) Matrix mel = filterWeights * m; // 5. Take Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = (mel.MatrixData[i][j] < 1.0 ? 0 : (20.0 * Math.Log10(mel.MatrixData[i][j]))); } } // 6. Perform the Wavelet Transform and Compress Matrix waveletCompressed = ApplyWaveletCompression(ref mel, out lastHeight, out lastWidth); Mirage.Dbg.WriteLine("Mel Scale And Wavelet Compression - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(waveletCompressed); }
/// <summary> /// Perform an inverse mfcc. E.g. perform an idct and inverse Mel Filterbands and return stftdata /// </summary> /// <param name="mfcc">mfcc matrix</param> /// <returns>matrix idct'ed and mel removed (e.g. stftdata)</returns> public Matrix InverseMelScaleDCT(ref Matrix mfcc) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. Perform the IDCT (Inverse Discrete Cosine Transform) Matrix mel = dct.Transpose() * mfcc; // 5. Take Inverse Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = Math.Pow(10, (mel.MatrixData[i][j] / 20)) / melScaleTriangleHeights[0]; } } // 4. Inverse Mel Scale using interpolation // i.e. from e.g. // mel=Rows: 40, Columns: 165 (average freq, time slice) // to // m=Rows: 1024, Columns: 165 (freq, time slice) //Matrix m = filterWeights.Transpose() * mel; Matrix m = new Matrix(filterWeights.Columns, mel.Columns); InverseMelScaling(mel, m); Mirage.Dbg.WriteLine("imfcc (MfccMirage-MirageWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// Perform an inverse haar wavelet mel scaled transform. E.g. perform an ihaar2d and inverse Mel Filterbands and return stftdata /// </summary> /// <param name="wavelet">wavelet matrix</param> /// <returns>matrix inverse wavelet'ed and mel removed (e.g. stftdata)</returns> public Matrix InverseMelScaleAndWaveletCompress(ref Matrix wavelet, int firstHeight, int firstWidth) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. Ucompress and then perform the Inverse Wavelet Transform Matrix mel = InverseWaveletCompression(ref wavelet, firstHeight, firstWidth, melScaleFreqsIndex.Length - 2, wavelet.Columns); // 5. Take Inverse Logarithm // Divide with first triangle height in order to scale properly for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = Math.Pow(10, (mel.MatrixData[i][j] / 20)) / melScaleTriangleHeights[0]; } } // 4. Inverse Mel Scale using interpolation // i.e. from e.g. // mel=Rows: 40, Columns: 165 (average freq, time slice) // to // m=Rows: 1024, Columns: 165 (freq, time slice) //Matrix m = filterWeights.Transpose() * mel; Matrix m = new Matrix(filterWeights.Columns, mel.Columns); InverseMelScaling(mel, m); Mirage.Dbg.WriteLine("Inverse Mel Scale and Wavelet Compression - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// Apply internal DCT and Mel Filterbands /// This method is faster than ApplyComirvaWay since it uses fewer loops. /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and dct'ed</returns> public Matrix ApplyMelScaleDCT(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) Matrix mel = filterWeights * m; // 5. Take Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = (mel.MatrixData[i][j] < 1.0 ? 0 : (20.0 * Math.Log10(mel.MatrixData[i][j]))); } } // 6. DCT (Discrete Cosine Transform) Matrix mfcc = dct * mel; Mirage.Dbg.WriteLine("mfcc (MfccMirage-MirageWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(mfcc); }
/// <summary> /// Perform an inverse DCT /// </summary> /// <param name="mfcc">dct matrix</param> /// <returns>matrix idct'ed (e.g. logSpectrogram)</returns> public Matrix InverseDCT(ref Matrix input) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. Perform the IDCT (Inverse Discrete Cosine Transform) Matrix m = dct.Transpose() * input; Mirage.Dbg.WriteLine("InverseDCT Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// DCT /// </summary> /// <param name="m">matrix (logSpectrogram)</param> /// <returns>matrix dct'ed</returns> public Matrix ApplyDCT(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. DCT (Discrete Cosine Transform) m = dct * m; Mirage.Dbg.WriteLine("ApplyDCT Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// Perform an inverse decompressed haar wavelet transform. E.g. perform an ihaar2d and return logSpectrogram /// </summary> /// <param name="wavelet">wavelet matrix</param> /// <returns>matrix inverse wavelet'ed (e.g. logSpectrogram)</returns> public Matrix InverseWaveletCompression(ref Matrix wavelet, int firstHeight, int firstWidth, int rows, int columns) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // Resize, e.g. Uncompress wavelet = wavelet.Resize(rows, columns); // 6. Perform the Inverse Wavelet Transform Matrix m = wavelet.Copy(); Wavelets.Compress.WaveletDecompress.Decompress2D(m.MatrixData, numberWaveletTransforms, firstHeight, firstWidth); Mirage.Dbg.WriteLine("Inverse Wavelet Compression - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// Haar Wavelet Transform and Compress /// </summary> /// <param name="m">matrix (logSpectrogram)</param> /// <returns>matrix wavelet'ed</returns> public Matrix ApplyWaveletCompression(ref Matrix m, out int lastHeight, out int lastWidth) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // Wavelet Transform Matrix wavelet = m.Copy(); Wavelets.Compress.WaveletCompress.HaarTransform2D(wavelet.MatrixData, numberWaveletTransforms, out lastHeight, out lastWidth); // Compress Matrix waveletCompressed = wavelet.Resize(numberCoefficients, wavelet.Columns); Mirage.Dbg.WriteLine("Wavelet Compression - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(waveletCompressed); }
/// <summary> /// Perform an inverse STFT and return the audiodata /// </summary> /// <param name="stft">A matrix with the STFT</param> /// <returns>Audio data</returns> /// <see cref="http://stackoverflow.com/questions/1230906/reverse-spectrogram-a-la-aphex-twin-in-matlab">Reverse Spectrogram A La Aphex Twin in MATLAB</see> public double[] InverseStft(Matrix stft) { var t = new Mirage.DbgTimer(); t.Start(); // stft is a Matrix with "winsize" Rows and "hops" Columns int columns = stft.Columns; int signalLengh = winsize + (columns)*hopsize; // PIN: Removed -1 from (columns-1) var signal = new double[signalLengh]; // Take the ifft of each column of pixels and piece together the results. for (int i = 0; i < columns; i++) { fft.ComputeInverseComirvaMatrixUsingLomontTableFFT(stft, i, ref signal, winsize, hopsize); //fft.ComputeInverseComirvaMatrixUsingLomontRealFFT(stft, i, ref signal, winsize, hopsize); } Mirage.Dbg.WriteLine("Perform Inverse Short Term Fourier Transform (ComputeComirvaMatrix) - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return signal; }
/// <summary> /// Apply the STFT on the audiodata /// </summary> /// <param name="audiodata">Audiodata to apply the STFT on</param> /// <returns>A matrix with the result of the STFT</returns> public Matrix Apply(float[] audiodata) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); int hops = (audiodata.Length - winsize)/ hopsize; // PIN: Removed + 1 // Create a Matrix with "winsize" Rows and "hops" Columns // Matrix[Row, Column] Matrix stft = new Matrix(winsize/2, hops); for (int i = 0; i < hops; i++) { // Lomont RealFFT seems to be the fastest option //fft.ComputeComirvaMatrixUsingFftw(ref stft, i, audiodata, i*hopsize); //fft.ComputeComirvaMatrixUsingLomontTableFFT(ref stft, i, audiodata, i*hopsize); fft.ComputeComirvaMatrixUsingLomontRealFFT(ref stft, i, audiodata, i*hopsize); } Mirage.Dbg.WriteLine("Perform Short Term Fourier Transform (ComputeComirvaMatrix) - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return stft; }
/// <summary> /// Mel Scale Haar Wavelet Transform /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and wavelet'ed</returns> public Matrix ApplyMelScaleWaveletPadding(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) Matrix mel = filterWeights * m; // 5. Take Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = (mel.MatrixData[i][j] < 1.0 ? 0 : (20.0 * Math.Log10(mel.MatrixData[i][j]))); } } // 6. Wavelet Transform // make sure the matrix is square before transforming (by zero padding) Matrix resizedMatrix; if (!mel.IsSymmetric() || !MathUtils.IsPowerOfTwo(mel.Rows)) { int size = (mel.Rows > mel.Columns ? mel.Rows : mel.Columns); int sizePow2 = MathUtils.NextPowerOfTwo(size); resizedMatrix = mel.Resize(sizePow2, sizePow2); } else { resizedMatrix = mel; } Matrix wavelet = WaveletUtils.HaarWaveletTransform(resizedMatrix.MatrixData, true); Mirage.Dbg.WriteLine("Wavelet Mel Scale And Wavelet Compression Padding - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(wavelet); }
/// <summary> /// Perform an inverse STFT and return the audiodata /// </summary> /// <param name="stft">A matrix with the STFT</param> /// <returns>Audio data</returns> /// <see cref="http://stackoverflow.com/questions/1230906/reverse-spectrogram-a-la-aphex-twin-in-matlab">Reverse Spectrogram A La Aphex Twin in MATLAB</see> public double[] InverseStft(Matrix stft) { var t = new Mirage.DbgTimer(); t.Start(); // stft is a Matrix with "winsize" Rows and "hops" Columns int columns = stft.Columns; int signalLengh = winsize + (columns) * hopsize; // PIN: Removed -1 from (columns-1) var signal = new double[signalLengh]; // Take the ifft of each column of pixels and piece together the results. for (int i = 0; i < columns; i++) { fft.ComputeInverseComirvaMatrixUsingLomontTableFFT(stft, i, ref signal, winsize, hopsize); //fft.ComputeInverseComirvaMatrixUsingLomontRealFFT(stft, i, ref signal, winsize, hopsize); } Mirage.Dbg.WriteLine("Perform Inverse Short Term Fourier Transform (ComputeComirvaMatrix) - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(signal); }
/// <summary> /// Apply the STFT on the audiodata /// </summary> /// <param name="audiodata">Audiodata to apply the STFT on</param> /// <returns>A matrix with the result of the STFT</returns> public Matrix Apply(float[] audiodata) { var t = new Mirage.DbgTimer(); t.Start(); int hops = (audiodata.Length - winsize) / hopsize; // PIN: Removed + 1 // Create a Matrix with "winsize" Rows and "hops" Columns // Matrix[Row, Column] var stft = new Matrix(winsize / 2, hops); for (int i = 0; i < hops; i++) { // Lomont RealFFT seems to be the fastest option //fft.ComputeComirvaMatrixUsingFftw(ref stft, i, audiodata, i*hopsize); //fft.ComputeComirvaMatrixUsingLomontTableFFT(ref stft, i, audiodata, i*hopsize); fft.ComputeComirvaMatrixUsingLomontRealFFT(ref stft, i, audiodata, i * hopsize); } Mirage.Dbg.WriteLine("Perform Short Term Fourier Transform (ComputeComirvaMatrix) - Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(stft); }
/// <summary> /// Apply internal DCT and Mel Filterbands utilising the Comirva Matrix methods /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and dct'ed</returns> public Matrix ApplyMelScaleDCTComirva(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) m = filterWeights * m; // 5. Take Logarithm // to db double log10 = 20 * (1 / Math.Log(10)); // log for base 10 and scale by factor 10 m.ThrunkAtLowerBoundary(1); m.LogEquals(); m *= log10; // 6. DCT (Discrete Cosine Transform) m = dct * m; Mirage.Dbg.WriteLine("mfcc (MfccMirage-ComirvaWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(m); }
/// <summary> /// Apply internal DCT and Mel Filterbands /// This method is faster than ApplyComirvaWay since it uses fewer loops. /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and dct'ed</returns> public Matrix ApplyMelScaleDCT(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) Matrix mel = filterWeights * m; // 5. Take Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = (mel.MatrixData[i][j] < 1.0 ? 0 : (20.0 * Math.Log10(mel.MatrixData[i][j]))); } } // 6. DCT (Discrete Cosine Transform) Matrix mfcc = dct * mel; Mirage.Dbg.WriteLine("mfcc (MfccMirage-MirageWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return mfcc; }
/// <summary> /// Perform an inverse decompressed haar wavelet transform. E.g. perform an ihaar2d and return logSpectrogram /// </summary> /// <param name="wavelet">wavelet matrix</param> /// <returns>matrix inverse wavelet'ed (e.g. logSpectrogram)</returns> public Matrix InverseWaveletCompression(ref Matrix wavelet, int firstHeight, int firstWidth, int rows, int columns) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // Resize, e.g. Uncompress wavelet = wavelet.Resize(rows, columns); // 6. Perform the Inverse Wavelet Transform Matrix m = wavelet.Copy(); Wavelets.Compress.WaveletDecompress.Decompress2D(m.MatrixData, numberWaveletTransforms, firstHeight, firstWidth); Mirage.Dbg.WriteLine("Inverse Wavelet Compression Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }
/// <summary> /// Perform an inverse haar wavelet mel scaled transform. E.g. perform an ihaar2d and inverse Mel Filterbands and return stftdata /// </summary> /// <param name="wavelet">wavelet matrix</param> /// <returns>matrix inverse wavelet'ed and mel removed (e.g. stftdata)</returns> public Matrix InverseMelScaleWaveletPadding(ref Matrix wavelet) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. Perform the Inverse Wavelet Transform Matrix mel = WaveletUtils.InverseHaarWaveletTransform(wavelet.MatrixData); // Resize (remove padding) mel = mel.Resize(melScaleFreqsIndex.Length - 2, wavelet.Columns); // 5. Take Inverse Logarithm // Divide with first triangle height in order to scale properly for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = Math.Pow(10, (mel.MatrixData[i][j] / 20)) / melScaleTriangleHeights[0]; } } // 4. Inverse Mel Scale using interpolation // i.e. from e.g. // mel=Rows: 40, Columns: 165 (average freq, time slice) // to // m=Rows: 1024, Columns: 165 (freq, time slice) //Matrix m = filterWeights.Transpose() * mel; Matrix m = new Matrix(filterWeights.Columns, mel.Columns); InverseMelScaling(mel, m); Mirage.Dbg.WriteLine("Inverse Wavelet Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }
/// <summary> /// Perform an inverse mfcc. E.g. perform an idct and inverse Mel Filterbands and return stftdata /// </summary> /// <param name="mfcc">mfcc matrix</param> /// <returns>matrix idct'ed and mel removed (e.g. stftdata)</returns> public Matrix InverseMelScaleDCT(ref Matrix mfcc) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. Perform the IDCT (Inverse Discrete Cosine Transform) Matrix mel = dct.Transpose() * mfcc; // 5. Take Inverse Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = Math.Pow(10, (mel.MatrixData[i][j] / 20)) / melScaleTriangleHeights[0]; } } // 4. Inverse Mel Scale using interpolation // i.e. from e.g. // mel=Rows: 40, Columns: 165 (average freq, time slice) // to // m=Rows: 1024, Columns: 165 (freq, time slice) //Matrix m = filterWeights.Transpose() * mel; Matrix m = new Matrix(filterWeights.Columns, mel.Columns); InverseMelScaling(mel, m); Mirage.Dbg.WriteLine("imfcc (MfccMirage-MirageWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }
/// <summary> /// Perform an inverse mel scale and log. /// </summary> /// <param name="wavelet">mel scaled matrix</param> /// <returns>matrix mel removed and un-logged (e.g. stftdata)</returns> public Matrix InverseMelScaleAndLog(ref Matrix mel) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 5. Take Inverse Logarithm // Divide with first triangle height in order to scale properly for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = Math.Pow(10, (mel.MatrixData[i][j] / 20)) / melScaleTriangleHeights[0]; } } // 4. Inverse Mel Scale using interpolation // i.e. from e.g. // mel=Rows: 40, Columns: 165 (average freq, time slice) // to // m=Rows: 1024, Columns: 165 (freq, time slice) //Matrix m = filterWeights.Transpose() * mel; Matrix m = new Matrix(filterWeights.Columns, mel.Columns); InverseMelScaling(mel, m); Mirage.Dbg.WriteLine("InverseMelScaleAndLog Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }
/// <summary> /// Perform an inverse DCT /// </summary> /// <param name="mfcc">dct matrix</param> /// <returns>matrix idct'ed (e.g. logSpectrogram)</returns> public Matrix InverseDCT(ref Matrix input) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. Perform the IDCT (Inverse Discrete Cosine Transform) Matrix m = dct.Transpose() * input; Mirage.Dbg.WriteLine("InverseDCT Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }
/// <summary> /// Haar Wavelet Transform and Compress /// </summary> /// <param name="m">matrix (logSpectrogram)</param> /// <returns>matrix wavelet'ed</returns> public Matrix ApplyWaveletCompression(ref Matrix m, out int lastHeight, out int lastWidth) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // Wavelet Transform Matrix wavelet = m.Copy(); Wavelets.Compress.WaveletCompress.HaarTransform2D(wavelet.MatrixData, numberWaveletTransforms, out lastHeight, out lastWidth); // Compress Matrix waveletCompressed = wavelet.Resize(numberCoefficients, wavelet.Columns); Mirage.Dbg.WriteLine("Wavelet Compression Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return waveletCompressed; }
/// <summary> /// Mel Scale Haar Wavelet Transform /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and wavelet'ed</returns> public Matrix ApplyMelScaleWaveletPadding(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) Matrix mel = filterWeights * m; // 5. Take Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = (mel.MatrixData[i][j] < 1.0 ? 0 : (20.0 * Math.Log10(mel.MatrixData[i][j]))); } } // 6. Wavelet Transform // make sure the matrix is square before transforming (by zero padding) Matrix resizedMatrix; if (!mel.IsSymmetric() || !MathUtils.IsPowerOfTwo(mel.Rows)) { int size = (mel.Rows > mel.Columns ? mel.Rows : mel.Columns); int sizePow2 = MathUtils.NextPowerOfTwo(size); resizedMatrix = mel.Resize(sizePow2, sizePow2); } else { resizedMatrix = mel; } Matrix wavelet = WaveletUtils.HaarWaveletTransform(resizedMatrix.MatrixData, true); Mirage.Dbg.WriteLine("Wavelet Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return wavelet; }
/// <summary> /// Mel Scale Haar Wavelet Transform and Compress /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and wavelet'ed</returns> public Matrix ApplyMelScaleWaveletCompression(ref Matrix m, out int lastHeight, out int lastWidth) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) Matrix mel = filterWeights * m; // 5. Take Logarithm for (int i = 0; i < mel.Rows; i++) { for (int j = 0; j < mel.Columns; j++) { mel.MatrixData[i][j] = (mel.MatrixData[i][j] < 1.0 ? 0 : (20.0 * Math.Log10(mel.MatrixData[i][j]))); } } // 6. Perform the Wavelet Transform and Compress Matrix waveletCompressed = ApplyWaveletCompression(ref mel, out lastHeight, out lastWidth); Mirage.Dbg.WriteLine("Wavelet Compression Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return waveletCompressed; }
/// <summary> /// DCT /// </summary> /// <param name="m">matrix (logSpectrogram)</param> /// <returns>matrix dct'ed</returns> public Matrix ApplyDCT(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 6. DCT (Discrete Cosine Transform) m = dct * m; Mirage.Dbg.WriteLine("ApplyDCT Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }
private static List<bool[]> GetFingerprintSignatures(FingerprintService fingerprintService, float[] samples, string name) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfigCreation; // Get fingerprints double[][] LogSpectrogram; List<double[][]> spectralImages; List<bool[]> fingerprints = fingerprintService.CreateFingerprintsFromAudioSamples(samples, param, out LogSpectrogram, out spectralImages); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { // Image Service ImageService imageService = new ImageService(fingerprintService.SpectrumService, fingerprintService.WaveletService); int width = param.FingerprintingConfiguration.FingerprintLength; int height = param.FingerprintingConfiguration.LogBins; imageService.GetImageForFingerprints(fingerprints, width, height, 2).Save(name + "_fingerprints.png"); } #endif Mirage.Dbg.WriteLine("GetFingerprintSignatures Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return fingerprints; }
/// <summary> /// Apply internal DCT and Mel Filterbands utilising the Comirva Matrix methods /// </summary> /// <param name="m">matrix (stftdata)</param> /// <returns>matrix mel scaled and dct'ed</returns> public Matrix ApplyMelScaleDCTComirva(ref Matrix m) { Mirage.DbgTimer t = new Mirage.DbgTimer(); t.Start(); // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) m = filterWeights * m; // 5. Take Logarithm // to db double log10 = 20 * (1 / Math.Log(10)); // log for base 10 and scale by factor 10 m.ThrunkAtLowerBoundary(1); m.LogEquals(); m *= log10; // 6. DCT (Discrete Cosine Transform) m = dct * m; Mirage.Dbg.WriteLine("mfcc (MfccMirage-ComirvaWay) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return m; }