/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. /// Developed for GenericRecognizer of harmonics. /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. /// But it requires that the frequency of any potential formants is not changing rapidly. /// THis may not be suitable for detecting human speech. However can reduce the frame step. /// </summary> /// <param name="m">spectrogram data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var binCount = m.GetLength(1); //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(binCount, binCount); // set up arrays to store decibels, formant intensity and max index. var dBArray = new double[rowCount]; var intensity = new double[rowCount]; var maxIndexArray = new int[rowCount]; // for all time frames for (int t = 2; t < rowCount - 2; t++) { // get average of five adjacent frames var frame1 = MatrixTools.GetRow(m, t - 2); var frame2 = MatrixTools.GetRow(m, t - 1); var frame3 = MatrixTools.GetRow(m, t); var frame4 = MatrixTools.GetRow(m, t + 1); var frame5 = MatrixTools.GetRow(m, t + 2); var frame = new double[colCount]; for (int i = 0; i < colCount; i++) { frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; } double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { // Would normally normalise the xcorr values for overlap count. // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. //normXr[i] = xr[i] / (colCount - i); normXr[i] = xr[i]; } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Developed for GenericRecognizer of harmonics. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>two arrays.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); double[] dBArray = new double[rowCount]; var intensity = new double[rowCount]; //an array of formant intensity var maxIndexArray = new int[rowCount]; //an array of max value index values var binCount = m.GetLength(1); double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients // for all time frames for (int t = 0; t < rowCount; t++) { var frame = MatrixTools.GetRow(m, t); double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. Also need to normalise the values for overlap count. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { normXr[i] = xr[i] / (colCount - i); } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } // frames = rows of matrix return(Tuple.Create(dBArray, intensity, maxIndexArray)); }