/// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram.
        /// Developed for GenericRecognizer of harmonics.
        /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise.
        ///          But it requires that the frequency of any potential formants is not changing rapidly.
        ///          THis may not be suitable for detecting human speech. However can reduce the frame step.
        /// </summary>
        /// <param name="m">spectrogram data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);
            var binCount = m.GetLength(1);

            //set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(binCount, binCount);

            // set up arrays to store decibels, formant intensity and max index.
            var dBArray       = new double[rowCount];
            var intensity     = new double[rowCount];
            var maxIndexArray = new int[rowCount];

            // for all time frames
            for (int t = 2; t < rowCount - 2; t++)
            {
                // get average of five adjacent frames
                var frame1 = MatrixTools.GetRow(m, t - 2);
                var frame2 = MatrixTools.GetRow(m, t - 1);
                var frame3 = MatrixTools.GetRow(m, t);
                var frame4 = MatrixTools.GetRow(m, t + 1);
                var frame5 = MatrixTools.GetRow(m, t + 2);
                var frame  = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5;
                }

                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    // Would normally normalise the xcorr values for overlap count.
                    // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values.
                    //normXr[i] = xr[i] / (colCount - i);
                    normXr[i] = xr[i];
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            }

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }
Example #2
0
        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Developed for GenericRecognizer of harmonics.
        /// </summary>
        /// <param name="m">data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>two arrays.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);

            double[] dBArray       = new double[rowCount];
            var      intensity     = new double[rowCount]; //an array of formant intensity
            var      maxIndexArray = new int[rowCount];    //an array of max value index values
            var      binCount      = m.GetLength(1);

            double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients

            // for all time frames
            for (int t = 0; t < rowCount; t++)
            {
                var    frame    = MatrixTools.GetRow(m, t);
                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half. Also need to normalise the values for overlap count.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    normXr[i] = xr[i] / (colCount - i);
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            } // frames = rows of matrix

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }