AutoAndCrossCorrelation C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : CrossCorrelation.cs Projet : ninascarpelli/audio-analysis

        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram.
        /// Developed for GenericRecognizer of harmonics.
        /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise.
        ///          But it requires that the frequency of any potential formants is not changing rapidly.
        ///          THis may not be suitable for detecting human speech. However can reduce the frame step.
        /// </summary>
        /// <param name="m">spectrogram data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);
            var binCount = m.GetLength(1);

            //set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(binCount, binCount);

            // set up arrays to store decibels, formant intensity and max index.
            var dBArray       = new double[rowCount];
            var intensity     = new double[rowCount];
            var maxIndexArray = new int[rowCount];

            // for all time frames
            for (int t = 2; t < rowCount - 2; t++)
            {
                // get average of five adjacent frames
                var frame1 = MatrixTools.GetRow(m, t - 2);
                var frame2 = MatrixTools.GetRow(m, t - 1);
                var frame3 = MatrixTools.GetRow(m, t);
                var frame4 = MatrixTools.GetRow(m, t + 1);
                var frame5 = MatrixTools.GetRow(m, t + 2);
                var frame  = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5;
                }

                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    // Would normally normalise the xcorr values for overlap count.
                    // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values.
                    //normXr[i] = xr[i] / (colCount - i);
                    normXr[i] = xr[i];
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            }

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }

Exemple #2

0

Afficher le fichier

        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Developed for GenericRecognizer of harmonics.
        /// </summary>
        /// <param name="m">data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>two arrays.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);

            double[] dBArray       = new double[rowCount];
            var      intensity     = new double[rowCount]; //an array of formant intensity
            var      maxIndexArray = new int[rowCount];    //an array of max value index values
            var      binCount      = m.GetLength(1);

            double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients

            // for all time frames
            for (int t = 0; t < rowCount; t++)
            {
                var    frame    = MatrixTools.GetRow(m, t);
                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half. Also need to normalise the values for overlap count.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    normXr[i] = xr[i] / (colCount - i);
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            } // frames = rows of matrix

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }

Exemple #3

0

Afficher le fichier

Fichier : CrossCorrelation.cs Projet : ninascarpelli/audio-analysis

        } //DetectBarsInTheRowsOfaMatrix()

        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Was first developed for crow calls.
        /// First looks for a decibel profile that matches the passed call duration and decibel loudness.
        /// Then samples the centre portion for the correct harmonic period.
        /// </summary>
        /// <param name="m">data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <param name="callSpan">Minimum length of call of interest.</param>
        /// <returns>a tuple.</returns>
        public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan)
        {
            int rowCount    = m.GetLength(0);
            int colCount    = m.GetLength(1);
            var intensity   = new double[rowCount];   //an array of period intensity
            var periodicity = new double[rowCount];   //an array of the periodicity values

            double[] dBArray = MatrixTools.GetRowAverages(m);
            dBArray = DataTools.filterMovingAverage(dBArray, 3);

            // for all time frames
            for (int t = 0; t < rowCount; t++)
            {
                if (dBArray[t] < dBThreshold)
                {
                    continue;
                }

                var row          = MatrixTools.GetRow(m, t);
                var spectrum     = AutoAndCrossCorrelation.CrossCorr(row, row);
                int zeroBinCount = 3; //to remove low freq content which dominates the spectrum
                for (int s = 0; s < zeroBinCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId          = DataTools.GetMaxIndex(spectrum);
                double intensityValue = spectrum[maxId];
                intensity[t] = intensityValue;

                double period = 0.0;
                if (maxId != 0)
                {
                    period = 2 * colCount / (double)maxId;
                }

                periodicity[t] = period;
            }

            return(Tuple.Create(dBArray, intensity, periodicity));
        }

Exemple #4

0

Afficher le fichier

Fichier : Oscillations2014.cs Projet : stewartmacdonald/audio-analysis

        /// <summary>
        /// Returns a matrix whose columns consist of autocorrelations of freq bin samples.
        /// The columns are non-overlapping.
        /// </summary>
        public static double[,] GetXcorrByTimeMatrix(double[] signal, int sampleLength)
        {
            // NormaliseMatrixValues freq bin values to z-score. This is required else get spurious results
            signal = DataTools.Vector2Zscores(signal);

            int sampleCount = signal.Length / sampleLength;

            double[,] xCorrelationsByTime = new double[sampleLength, sampleCount];

            for (int s = 0; s < sampleCount; s++)
            {
                int      start    = s * sampleLength;
                double[] subArray = DataTools.Subarray(signal, start, sampleLength);
                double[] autocor  = AutoAndCrossCorrelation.AutoCorrelationOldJavaVersion(subArray);

                //DataTools.writeBarGraph(autocor);
                MatrixTools.SetColumn(xCorrelationsByTime, s, autocor);
            }

            return(xCorrelationsByTime);
        }

Exemple #5

0

Afficher le fichier

Fichier : CrossCorrelation.cs Projet : stewartmacdonald/audio-analysis

        /// <summary>
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        ///
        /// </summary>
        /// <param name="m"></param>
        /// <param name="amplitudeThreshold"></param>
        /// <returns></returns>
        public static Tuple <double[], double[]> DetectBarsInTheRowsOfaMatrix(double[,] m, double threshold, int zeroBinCount)
        {
            int rowCount    = m.GetLength(0);
            int colCount    = m.GetLength(1);
            var intensity   = new double[rowCount];   //an array of period intensity
            var periodicity = new double[rowCount];   //an array of the periodicity values

            double[] prevRow = MatrixTools.GetRow(m, 0);
            prevRow = DataTools.DiffFromMean(prevRow);

            for (int r = 1; r < rowCount; r++)
            {
                double[] thisRow = MatrixTools.GetRow(m, r);
                thisRow = DataTools.DiffFromMean(thisRow);

                var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow);

                for (int s = 0; s < zeroBinCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId          = DataTools.GetMaxIndex(spectrum);
                double intensityValue = spectrum[maxId];
                intensity[r] = intensityValue;

                double period = 0.0;
                if (maxId != 0)
                {
                    period = 2 * colCount / (double)maxId;
                }

                periodicity[r] = period;

                prevRow = thisRow;
            }// rows

            return(Tuple.Create(intensity, periodicity));
        } //DetectBarsInTheRowsOfaMatrix()

Exemple #6

0

Afficher le fichier

        /// <summary>
        /// Returns a matrix whose columns consist of autocorrelations of freq bin samples.
        /// The columns are non-overlapping.
        /// </summary>
        /// <param name="signal">an array corresponding to one frequency bin.</param>
        /// <param name="sampleLength">the length of a sample or patch (non-overllapping) for which xcerrelation is obtained.</param>
        public static double[,] GetXcorrByTimeMatrix(double[] signal, int sampleLength)
        {
            // NormaliseMatrixValues freq bin values to z-score. This is required else get spurious results
            signal = DataTools.Vector2Zscores(signal);

            // get number of complete non-overlapping samples or patches
            var sampleCount         = signal.Length / sampleLength;
            var xCorrelationsByTime = new double[sampleLength, sampleCount];

            for (var s = 0; s < sampleCount; s++)
            {
                var start    = s * sampleLength;
                var subArray = DataTools.Subarray(signal, start, sampleLength);

                // do xcorr which returns an array same length as the sample or patch.
                var autocor = AutoAndCrossCorrelation.AutoCorrelationOldJavaVersion(subArray);

                //DataTools.writeBarGraph(autocor);
                MatrixTools.SetColumn(xCorrelationsByTime, s, autocor);
            }

            // return a matrix of [xCorrLength, sampleLength]
            return(xCorrelationsByTime);
        }

Exemple #7

0

Afficher le fichier

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignore those set by user
            int    frameSize     = 128;
            double windowOverlap = 0.5;

            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds
            double minPeriod          = double.Parse(configDict["MIN_PERIOD"]);          // seconds
            double maxPeriod          = double.Parse(configDict["MAX_PERIOD"]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   frameOffset     = sonoConfig.GetFrameOffset(sr);
            double   framesPerSecond = 1 / frameOffset;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 256      17640       14.5ms          68.9        68.9    ms          hz          hz
            // 512      17640       29.0ms          34.4        34.4    ms          hz          hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7..
            // Therefore do a Xcorrelation between bins 21 and 22.
            // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second.

            int midHz    = 1500;
            int lowerBin = (int)(midHz / freqBinWidth) + 1;  //because bin[0] = DC
            int upperBin = lowerBin + 4;
            int lowerHz  = (int)Math.Floor((lowerBin - 1) * freqBinWidth);
            int upperHz  = (int)Math.Ceiling((upperBin - 1) * freqBinWidth);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin);
            double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin);
            lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB #######################################################################
            upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB #######################################################################

            int step         = (int)(framesPerSecond / 40);               //take one/tenth second steps
            int stepCount    = rowCount / step;
            int sampleLength = 32;                                        //16 frames = 232ms - almost 1/4 second.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES

            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray == null || upperSubarray == null)
                {
                    break;
                }

                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 2;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 3);
            intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 #######################################################################

            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerHz,
                upperHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            var plots = new List <Plot>();

            //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0));
            //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0));
            //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25));
            //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25));
            //plots.Add(new Plot("intensity",  DataTools.NormaliseMatrixValues(intensity), intensityThreshold));
            plots.Add(new Plot("intensity", intensity, intensityThreshold));

            return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration));
        } //Analysis()

Exemple #8

0

Afficher le fichier

Fichier : IctalurusFurcatus.cs Projet : stewartmacdonald/audio-analysis

        public static double[] CalculateScores(double[] subBandSpectrum, int windowWidth)
        {
            double[] scores = { 0, 0, 0 };

            //TEST ONE

            /*
             * double totalAreaUnderSpectrum = subBandSpectrum.Sum();
             * double areaUnderLowest24bins = 0.0;
             * for (int i = 0; i < 24; i++)
             * {
             *  areaUnderLowest24bins += subBandSpectrum[i];
             * }
             * double areaUnderHighBins = totalAreaUnderSpectrum - areaUnderLowest24bins;
             * double areaUnderBins4to7 = 0.0;
             * for (int i = 4; i < 7; i++)
             * {
             *  areaUnderBins4to7 += subBandSpectrum[i];
             * }
             * double ratio1 = areaUnderBins4to7 / areaUnderLowest24bins;
             *
             * double areaUnderBins38to72 = 0.0;
             * for (int i = 38; i < 44; i++)
             * {
             *  areaUnderBins38to72 += subBandSpectrum[i];
             * }
             * for (int i = 52; i < 57; i++)
             * {
             *  areaUnderBins38to72 += subBandSpectrum[i];
             * }
             * for (int i = 64; i < 72; i++)
             * {
             *  areaUnderBins38to72 += subBandSpectrum[i];
             * }
             * double ratio2 = areaUnderBins38to72 / areaUnderHighBins;
             * double score = (ratio1 * 0.2) + (ratio2 * 0.8);
             * double[] truePositives = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 };
             *
             * if (score > 0.4)
             *  eventFound = true;
             * if ((areaUnderHighBins/3) < areaUnderLowest24bins)
             * //if (ratio1 > ratio2)
             * {
             *  eventFound = false;
             * }
             */

            // TEST TWO (A)
            // these are used for scoring
            //double[] truePositives1 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 };
            //double[] truePositives2 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0001, 0.0001, 0.0001, 0.0000, 0.0000, 0.0001, 0.0001, 0.0003, 0.0004, 0.0004, 0.0002, 0.0001, 0.0001, 0.0003, 0.0003, 0.0006, 0.0007, 0.0020, 0.0127, 0.0256, 0.0426, 0.0512, 0.0560, 0.0414, 0.0237, 0.0133, 0.0107, 0.0091, 0.0077, 0.0085, 0.0165, 0.0144, 0.0308, 0.0416, 0.0454, 0.0341, 0.0191, 0.0128, 0.0058, 0.0026, 0.0081, 0.0139, 0.0313, 0.0404, 0.0493, 0.0610, 0.1951, 0.4083, 0.5616, 0.5711, 0.5096, 0.4020, 0.2917, 0.1579, 0.1421, 0.1461, 0.1406, 0.2098, 0.1676, 0.2758, 0.2875, 0.6513, 0.9374, 1.0000, 0.7576, 0.4130, 0.2622, 0.1495, 0.0973, 0.0623, 0.0425, 0.0205, 0.0034, 0.0065, 0.0054, 0.0089, 0.0138, 0.0208, 0.0204, 0.0168, 0.0136, 0.0149, 0.0155, 0.0106, 0.0086, 0.0099, 0.0187 };
            //double[] truePositivesA = NormalDist.Convert2ZScores(truePositivesA);
            //double[] truePositivesB = NormalDist.Convert2ZScores(truePositivesB);

            // TEST TWO (B)
            // Use these spectra when using my filtering (i.e. not Chris's prefiltered)
            // these spectra are used for scoring when the window size is 2048
            //double[] truePositives1 = { 0.0014, 0.0012, 0.0009, 0.0003, 0.0001, 0.0005, 0.0008, 0.0029, 0.0057, 0.0070, 0.0069, 0.0063, 0.0053, 0.0032, 0.0013, 0.0011, 0.0011, 0.0007, 0.0000, 0.0006, 0.0010, 0.0013, 0.0008, 0.0009, 0.0022, 0.0046, 0.0069, 0.0082, 0.0070, 0.0065, 0.0082, 0.0078, 0.0052, 0.0021, 0.0132, 0.0357, 0.0420, 0.0996, 0.2724, 0.4557, 0.5739, 0.6366, 0.6155, 0.4598, 0.2334, 0.1468, 0.1410, 0.1759, 0.2157, 0.1988, 0.2131, 0.3072, 0.6161, 0.8864, 1.0000, 0.9290, 0.6983, 0.4208, 0.2690, 0.3190, 0.3109, 0.2605, 0.1896, 0.2118, 0.5961, 0.8298, 0.9290, 0.7363, 0.6605, 0.5840, 0.3576, 0.1019, 0.0162, 0.0400, 0.0405, 0.1106, 0.1803, 0.2083, 0.2058, 0.1475, 0.1387, 0.0870, 0.0804, 0.0975, 0.0848, 0.0490, 0.0193, 0.0217, 0.0210, 0.0214, 0.0253, 0.0254, 0.0072 };
            //double[] truePositives2 = { 0.0090, 0.0106, 0.0138, 0.0134, 0.0088, 0.0026, 0.0002, 0.0002, 0.0003, 0.0000, 0.0001, 0.0006, 0.0013, 0.0019, 0.0020, 0.0015, 0.0008, 0.0004, 0.0002, 0.0015, 0.0022, 0.0073, 0.0195, 0.0628, 0.2203, 0.4031, 0.5635, 0.5445, 0.4828, 0.2869, 0.1498, 0.0588, 0.0500, 0.0542, 0.0641, 0.1188, 0.1833, 0.1841, 0.2684, 0.3062, 0.2831, 0.1643, 0.0606, 0.0336, 0.0136, 0.0056, 0.0187, 0.0301, 0.0700, 0.1103, 0.1559, 0.2449, 0.5303, 0.8544, 1.0000, 0.8361, 0.6702, 0.4839, 0.3463, 0.1525, 0.1049, 0.1201, 0.1242, 0.2056, 0.1653, 0.2685, 0.2947, 0.5729, 0.7024, 0.6916, 0.4765, 0.2488, 0.1283, 0.0543, 0.0326, 0.0236, 0.0187, 0.0108, 0.0021, 0.0028, 0.0019, 0.0024, 0.0041, 0.0063, 0.0066, 0.0055, 0.0036, 0.0025, 0.0018, 0.0014, 0.0013, 0.0008, 0.0010 };
            // these spectra are used for scoring when the window size is 1024
            double[] truePositives1 = { 0.0007, 0.0004, 0.0000, 0.0025, 0.0059, 0.0069, 0.0044, 0.0012, 0.0001, 0.0006, 0.0013, 0.0032, 0.0063, 0.0067, 0.0070, 0.0033, 0.0086, 0.0128, 0.1546, 0.4550, 0.6197, 0.4904, 0.2075, 0.0714, 0.1171, 0.4654, 0.8634, 1.0000, 0.7099, 0.2960, 0.1335, 0.3526, 0.6966, 0.9215, 0.6628, 0.3047, 0.0543, 0.0602, 0.0931, 0.1364, 0.1314, 0.1047, 0.0605, 0.0204, 0.0128, 0.0114 };
            double[] truePositives2 = { 0.0126, 0.0087, 0.0043, 0.0002, 0.0000, 0.0010, 0.0018, 0.0016, 0.0005, 0.0002, 0.0050, 0.1262, 0.4054, 0.5111, 0.3937, 0.1196, 0.0156, 0.0136, 0.0840, 0.1598, 0.1691, 0.0967, 0.0171, 0.0152, 0.0234, 0.3648, 0.8243, 1.0000, 0.6727, 0.2155, 0.0336, 0.0240, 0.2661, 0.6240, 0.7523, 0.5098, 0.1493, 0.0149, 0.0046, 0.0020, 0.0037, 0.0061, 0.0061, 0.0036, 0.0010, 0.0008 };

            var    zscores          = NormalDist.Convert2ZScores(subBandSpectrum);
            double correlationScore = 0.0;
            double score1           = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives1);
            double score2           = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives2);

            correlationScore = score1;
            if (score2 > correlationScore)
            {
                correlationScore = score2;
            }

            // TEST THREE: sharpness and height of peaks
            // score the four heighest peaks
            double peaksScore = 0;

            double[] spectrumCopy = new double[subBandSpectrum.Length];
            for (int i = 0; i < subBandSpectrum.Length; i++)
            {
                spectrumCopy[i] = subBandSpectrum[i];
            }

            // set spectrum bounds
            int lowerBound = subBandSpectrum.Length / 4;
            int upperBound = subBandSpectrum.Length * 7 / 8;

            for (int p = 0; p < 4; p++)
            {
                int peakLocation = DataTools.GetMaxIndex(spectrumCopy);
                if (peakLocation < lowerBound)
                {
                    continue; // peak location cannot be too low
                }

                if (peakLocation > upperBound)
                {
                    continue; // peak location cannot be too high
                }

                double peakHeight = spectrumCopy[peakLocation];
                int    nh         = 3;
                if (windowWidth == 2048)
                {
                    nh = 6;
                }

                double peakSides = (subBandSpectrum[peakLocation - nh] + subBandSpectrum[peakLocation + nh]) / 2;
                peaksScore += peakHeight - peakSides;

                //now zero peak and peak neighbourhood
                if (windowWidth == 2048)
                {
                    nh = 9;
                }

                for (int n = 0; n < nh; n++)
                {
                    spectrumCopy[peakLocation + n] = 0;
                    spectrumCopy[peakLocation - n] = 0;
                }
            } // for 4 peaks

            // take average of four peaks
            peaksScore /= 4;

            // TEST FOUR: peak position ratios
            //
            //int[] peakLocationCentres = { 3, 10, 37, 44, 54, 67 };
            int[] peakLocationCentres = { 2, 5, 19, 22, 27, 33 };

            int nh2 = 6;

            if (windowWidth == 1024)
            {
                nh2 = 3;
            }

            int[]    actualPeakLocations = new int[6];
            double[] relativePeakHeights = new double[6];
            for (int p = 0; p < 6; p++)
            {
                double max   = -double.MaxValue;
                int    maxId = peakLocationCentres[p];
                for (int id = peakLocationCentres[p] - 4; id < peakLocationCentres[p] + 4; id++)
                {
                    if (id < 0)
                    {
                        id = 0;
                    }

                    if (subBandSpectrum[id] > max)
                    {
                        max   = subBandSpectrum[id];
                        maxId = id;
                    }
                }

                actualPeakLocations[p] = maxId;
                int lowerPosition = maxId - nh2;
                if (lowerPosition < 0)
                {
                    lowerPosition = 0;
                }

                relativePeakHeights[p] = subBandSpectrum[maxId] - subBandSpectrum[lowerPosition] - subBandSpectrum[maxId + nh2];
            }

            double[] targetHeights     = { 0.1, 0.1, 0.5, 0.5, 1.0, 0.6 };
            var      zscores1          = NormalDist.Convert2ZScores(relativePeakHeights);
            var      zscores2          = NormalDist.Convert2ZScores(targetHeights);
            double   relativePeakScore = AutoAndCrossCorrelation.CorrelationCoefficient(zscores1, zscores2);

            //###########################################################################################
            // PROCESS SCORES
            //if (score1 > scoreThreshold) eventFound = true;
            //if ((score1 > scoreThreshold) || (score2 > scoreThreshold)) eventFound = true;
            //double score = (correlationScore * 0.3) + (peaksScore * 0.7);
            double score = (relativePeakScore * 0.4) + (peaksScore * 0.6);

            scores[0] = score;
            scores[1] = relativePeakScore;
            scores[2] = peaksScore;
            return(scores);
        }

Exemple #9

0

Afficher le fichier

Fichier : FeatureExtraction.cs Projet : ninascarpelli/audio-analysis

        /// <summary>
        /// Apply feature learning process on a set of target (1-minute) recordings (inputPath)
        /// according to the a set of centroids learned using feature learning process.
        /// Output feature vectors (outputPath).
        /// </summary>
        public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List <double[][]> allCentroids,
                                                         string inputPath, string outputPath)
        {
            var           simVecDir     = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors"));
            int           frameSize     = config.FrameSize;
            int           finalBinCount = config.FinalBinCount;
            FreqScaleType scaleType     = config.FrequencyScaleType;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            double frameStep   = frameSize * (1 - settings.WindowOverlap);
            int    minFreqBin  = config.MinFreqBin;
            int    maxFreqBin  = config.MaxFreqBin;
            int    numFreqBand = config.NumFreqBand;
            int    patchWidth  =
                (maxFreqBin - minFreqBin + 1) / numFreqBand;
            int patchHeight = config.PatchHeight;

            // the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
            int frameWindowLength = config.FrameWindowLength;

            // the step size to make a window of frames
            int stepSize = config.StepSize;

            // the factor of downsampling
            int maxPoolingFactor = config.MaxPoolingFactor;

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            //*****
            // lists of features for all processing files
            // the key is the file name, and the value is the features for different bands
            Dictionary <string, List <double[, ]> > allFilesMinFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesMeanFeatureVectors     = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesMaxFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesStdFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesSkewnessFeatureVectors = new Dictionary <string, List <double[, ]> >();

            double[,] inputMatrix;
            List <AudioRecording> recordings = new List <AudioRecording>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    if (config.DoSegmentation)
                    {
                        recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
                    }
                    else
                    {
                        recordings.Add(recording);
                    }

                    for (int s = 0; s < recordings.Count; s++)
                    {
                        string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv");
                        var    amplitudeSpectrogram        = new AmplitudeSpectrogram(settings, recordings[s].WavReader);
                        var    decibelSpectrogram          = new DecibelSpectrogram(amplitudeSpectrogram);

                        // DO RMS NORMALIZATION
                        //sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                        // DO NOISE REDUCTION
                        if (config.DoNoiseReduction)
                        {
                            decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
                        }

                        // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                        if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                        {
                            inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
                        }
                        else
                        {
                            inputMatrix = decibelSpectrogram.Data;
                        }

                        // creating matrices from different freq bands of the source spectrogram
                        List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
                        double[][,] matrices2 = allSubmatrices2.ToArray();
                        List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();
                        for (int i = 0; i < matrices2.GetLength(0); i++)
                        {
                            // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                            double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor);

                            int rows              = downsampledMatrix.GetLength(0);
                            int columns           = downsampledMatrix.GetLength(1);
                            var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                            allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
                        }

                        // +++++++++++++++++++++++++++++++++++Feature Transformation
                        // to do the feature transformation, we normalize centroids and
                        // sequential patches from the input spectrogram to unit length
                        // Then, we calculate the dot product of each patch with the centroids' matrix

                        List <double[][]> allNormCentroids = new List <double[][]>();
                        for (int i = 0; i < allCentroids.Count; i++)
                        {
                            // double check the index of the list
                            double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][];
                            for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++)
                            {
                                normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]);
                            }

                            allNormCentroids.Add(normCentroids);
                        }

                        List <double[][]> allFeatureTransVectors = new List <double[][]>();

                        // processing the sequential patch matrix for each band
                        for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
                        {
                            List <double[]> featureTransVectors = new List <double[]>();
                            double[][]      similarityVectors   = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];

                            for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
                            {
                                // normalize each patch to unit length
                                var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
                                var normVector  = inputVector;

                                // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero.
                                if (inputVector.Euclidean() != 0)
                                {
                                    normVector = ART_2A.NormaliseVector(inputVector);
                                }

                                similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                            }

                            Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix());

                            // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames
                            // using FrameWindowLength

                            // patchId refers to the patch id that has been processed so far according to the step size.
                            // if we want no overlap between different frame windows, then stepSize = frameWindowLength
                            int patchId = 0;
                            while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0))
                            {
                                List <double[]> patchGroup = new List <double[]>();
                                for (int k = 0; k < frameWindowLength; k++)
                                {
                                    patchGroup.Add(similarityVectors[k + patchId]);
                                }

                                featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup));
                                patchId = patchId + stepSize;
                            }

                            allFeatureTransVectors.Add(featureTransVectors.ToArray());
                        }

                        // +++++++++++++++++++++++++++++++++++Feature Transformation

                        // +++++++++++++++++++++++++++++++++++Temporal Summarization
                        // Based on the resolution to generate features, the "numFrames" parameter will be set.
                        // Each 24 single-frame patches form 1 second
                        // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net)
                        // The pre-assumption is that each input recording is 1 minute long

                        // store features of different bands in lists
                        List <double[, ]> allMinFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allMeanFeatureVectors     = new List <double[, ]>();
                        List <double[, ]> allMaxFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allStdFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allSkewnessFeatureVectors = new List <double[, ]>();

                        // Each 24 frames form 1 second using WindowOverlap
                        // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization.
                        int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor);

                        foreach (var freqBandFeature in allFeatureTransVectors)
                        {
                            List <double[]> minFeatureVectors      = new List <double[]>();
                            List <double[]> meanFeatureVectors     = new List <double[]>();
                            List <double[]> maxFeatureVectors      = new List <double[]>();
                            List <double[]> stdFeatureVectors      = new List <double[]>();
                            List <double[]> skewnessFeatureVectors = new List <double[]>();

                            int c = 0;
                            while (c + numFrames <= freqBandFeature.GetLength(0))
                            {
                                // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.)
                                List <double[]> sequencesOfFramesList = new List <double[]>();
                                for (int i = c; i < c + numFrames; i++)
                                {
                                    sequencesOfFramesList.Add(freqBandFeature[i]);
                                }

                                List <double> min      = new List <double>();
                                List <double> mean     = new List <double>();
                                List <double> std      = new List <double>();
                                List <double> max      = new List <double>();
                                List <double> skewness = new List <double>();

                                double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                                // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
                                for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                                {
                                    double[] temp = new double[sequencesOfFrames.GetLength(0)];
                                    for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                                    {
                                        temp[k] = sequencesOfFrames[k, j];
                                    }

                                    min.Add(temp.GetMinValue());
                                    mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                                    std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                                    max.Add(temp.GetMaxValue());
                                    skewness.Add(temp.Skewness());
                                }

                                minFeatureVectors.Add(min.ToArray());
                                meanFeatureVectors.Add(mean.ToArray());
                                maxFeatureVectors.Add(max.ToArray());
                                stdFeatureVectors.Add(std.ToArray());
                                skewnessFeatureVectors.Add(skewness.ToArray());
                                c += numFrames;
                            }

                            // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames)
                            // (or the whole) at the end of the target recording , left unprocessed.
                            // this would be problematic when an the resolution to generate the feature vector is 1 min,
                            // but the the length of the target recording is a bit less than one min.
                            if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1)
                            {
                                // First, make a list of patches that would be less than the required resolution
                                List <double[]> sequencesOfFramesList = new List <double[]>();
                                int             unprocessedFrames     = freqBandFeature.GetLength(0) % numFrames;
                                for (int i = freqBandFeature.GetLength(0) - unprocessedFrames;
                                     i < freqBandFeature.GetLength(0);
                                     i++)
                                {
                                    sequencesOfFramesList.Add(freqBandFeature[i]);
                                }

                                List <double> min      = new List <double>();
                                List <double> mean     = new List <double>();
                                List <double> std      = new List <double>();
                                List <double> max      = new List <double>();
                                List <double> skewness = new List <double>();

                                double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                                // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
                                for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                                {
                                    double[] temp = new double[sequencesOfFrames.GetLength(0)];
                                    for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                                    {
                                        temp[k] = sequencesOfFrames[k, j];
                                    }

                                    min.Add(temp.GetMinValue());
                                    mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                                    std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                                    max.Add(temp.GetMaxValue());
                                    skewness.Add(temp.Skewness());
                                }

                                minFeatureVectors.Add(min.ToArray());
                                meanFeatureVectors.Add(mean.ToArray());
                                maxFeatureVectors.Add(max.ToArray());
                                stdFeatureVectors.Add(std.ToArray());
                                skewnessFeatureVectors.Add(skewness.ToArray());
                            }

                            allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix());
                            allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix());
                            allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix());
                            allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix());
                            allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix());
                        }

                        //*****
                        // the keys of the following dictionaries contain file name
                        // and their values are a list<double[,]> which the list.count is
                        // the number of all subsegments for which features are extracted
                        // the number of freq bands defined as an user-defined parameter.
                        // the 2D-array is the feature vectors.
                        allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors);
                        allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors);
                        allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors);
                        allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors);
                        allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors);

                        // +++++++++++++++++++++++++++++++++++Temporal Summarization
                    }
                }
            }

            // ++++++++++++++++++++++++++++++++++Writing features to one file
            // First, concatenate mean, max, std for each second.
            // Then, write the features of each pre-defined frequency band into a separate CSV file.
            var filesName        = allFilesMeanFeatureVectors.Keys.ToArray();
            var minFeatures      = allFilesMinFeatureVectors.Values.ToArray();
            var meanFeatures     = allFilesMeanFeatureVectors.Values.ToArray();
            var maxFeatures      = allFilesMaxFeatureVectors.Values.ToArray();
            var stdFeatures      = allFilesStdFeatureVectors.Values.ToArray();
            var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray();

            // The number of elements in the list shows the number of freq bands
            // the size of each element in the list shows the number of files processed to generate feature for.
            // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector
            var allMins     = new List <double[][, ]>();
            var allMeans    = new List <double[][, ]>();
            var allMaxs     = new List <double[][, ]>();
            var allStds     = new List <double[][, ]>();
            var allSkewness = new List <double[][, ]>();

            // looping over freq bands
            for (int i = 0; i < meanFeatures[0].Count; i++)
            {
                var mins       = new List <double[, ]>();
                var means      = new List <double[, ]>();
                var maxs       = new List <double[, ]>();
                var stds       = new List <double[, ]>();
                var skewnesses = new List <double[, ]>();

                // looping over all files
                for (int k = 0; k < meanFeatures.Length; k++)
                {
                    mins.Add(minFeatures[k].ToArray()[i]);
                    means.Add(meanFeatures[k].ToArray()[i]);
                    maxs.Add(maxFeatures[k].ToArray()[i]);
                    stds.Add(stdFeatures[k].ToArray()[i]);
                    skewnesses.Add(skewnessFeatures[k].ToArray()[i]);
                }

                allMins.Add(mins.ToArray());
                allMeans.Add(means.ToArray());
                allMaxs.Add(maxs.ToArray());
                allStds.Add(stds.ToArray());
                allSkewness.Add(skewnesses.ToArray());
            }

            // each element of meanFeatures array is a list of features for different frequency bands.
            // looping over the number of freq bands
            for (int i = 0; i < allMeans.ToArray().GetLength(0); i++)
            {
                // creating output feature file based on the number of freq bands
                var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv");

                // creating the header for CSV file
                List <string> header = new List <string>();
                header.Add("file name");

                for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("min" + j.ToString());
                }

                for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("mean" + j.ToString());
                }

                for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("max" + j.ToString());
                }

                for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("std" + j.ToString());
                }

                for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("skewness" + j.ToString());
                }

                var    csv     = new StringBuilder();
                string content = string.Empty;
                foreach (var entry in header.ToArray())
                {
                    content += entry.ToString() + ",";
                }

                csv.AppendLine(content);

                var allFilesFeatureVectors = new Dictionary <string, double[, ]>();

                // looping over files
                for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++)
                {
                    // concatenating mean, std, and max vector together for the pre-defined resolution
                    List <double[]> featureVectors = new List <double[]>();
                    for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++)
                    {
                        List <double[]> featureList = new List <double[]>
                        {
                            allMins.ToArray()[i][j].ToJagged()[k],
                                        allMeans.ToArray()[i][j].ToJagged()[k],
                                        allMaxs.ToArray()[i][j].ToJagged()[k],
                                        allStds.ToArray()[i][j].ToJagged()[k],
                                        allSkewness.ToArray()[i][j].ToJagged()[k],
                        };
                        double[] featureVector = DataTools.ConcatenateVectors(featureList);
                        featureVectors.Add(featureVector);
                    }

                    allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix());
                }

                // writing feature vectors to CSV file
                foreach (var entry in allFilesFeatureVectors)
                {
                    content  = string.Empty;
                    content += entry.Key.ToString() + ",";
                    foreach (var cent in entry.Value)
                    {
                        content += cent.ToString() + ",";
                    }

                    csv.AppendLine(content);
                }

                File.WriteAllText(outputFeatureFile, csv.ToString());
            }
        }

Exemple #10

0

Afficher le fichier

Fichier : CrossCorrelation.cs Projet : stewartmacdonald/audio-analysis

        //public const string key_COUNT = "count";

        public static Tuple <double[, ], double[, ], double[, ], double[]> DetectBarsUsingXcorrelation(double[,] m, int rowStep, int rowWidth, int colStep, int colWidth,
                                                                                                       double intensityThreshold, int zeroBinCount)
        {
            bool doNoiseremoval = true;

            //intensityThreshold = 0.3;

            int rowCount         = m.GetLength(0);
            int colCount         = m.GetLength(1);
            int numberOfColSteps = colCount / colStep;
            int numberOfRowSteps = rowCount / rowStep;

            var intensityMatrix   = new double[numberOfRowSteps, numberOfColSteps];
            var periodicityMatrix = new double[numberOfRowSteps, numberOfColSteps];
            var hitsMatrix        = new double[rowCount, colCount];

            double[] array2return = null;

            for (int b = 0; b < numberOfColSteps; b++)
            {
                int minCol = b * colStep;
                int maxCol = minCol + colWidth - 1;

                double[,] subMatrix = MatrixTools.Submatrix(m, 0, minCol, rowCount - 1, maxCol);
                double[] amplitudeArray = MatrixTools.GetRowAverages(subMatrix);

                if (doNoiseremoval)
                {
                    double StandardDeviationCount = 0.1;  // number of noise SDs to calculate noise threshold - determines severity of noise reduction
                    SNR.BackgroundNoise bgn       = SNR.SubtractBackgroundNoiseFromSignal(amplitudeArray, StandardDeviationCount);
                    amplitudeArray = bgn.NoiseReducedSignal;
                }

                //double noiseThreshold = 0.005;
                //for (int i = 1; i < amplitudeArray.Length - 1; i++)
                //{
                //    if ((amplitudeArray[i - 1] < noiseThreshold) && (amplitudeArray[i + 1] < noiseThreshold)) amplitudeArray[i] = 0.0;
                //}
                //DataTools.writeBarGraph(amplitudeArray);
                if (b == 2)
                {
                    array2return = amplitudeArray; //returned for debugging purposes only
                }

                //ii: DETECT HARMONICS
                var      results     = AutoAndCrossCorrelation.DetectPeriodicityInLongArray(amplitudeArray, rowStep, rowWidth, zeroBinCount);
                double[] intensity   = results.Item1;    //an array of periodicity scores
                double[] periodicity = results.Item2;

                //transfer periodicity info to a matrices.
                for (int rs = 0; rs < numberOfRowSteps; rs++)
                {
                    intensityMatrix[rs, b]   = intensity[rs];
                    periodicityMatrix[rs, b] = periodicity[rs];

                    //mark up the hits matrix
                    //double relativePeriod = periodicity[rs] / rowWidth / 2;
                    if (intensity[rs] > intensityThreshold)
                    {
                        int minRow = rs * rowStep;
                        int maxRow = minRow + rowStep - 1;
                        for (int r = minRow; r < maxRow; r++)
                        {
                            for (int c = minCol; c < maxCol; c++)
                            {
                                //hitsMatrix[r, c] = relativePeriod;
                                hitsMatrix[r, c] = periodicity[rs];
                            }
                        }
                    } // if()
                }     // for loop over numberOfRowSteps
            }         // for loop over numberOfColSteps

            return(Tuple.Create(intensityMatrix, periodicityMatrix, hitsMatrix, array2return));
        }

Exemple #11

0

Afficher le fichier

Fichier : CrossCorrelation.cs Projet : stewartmacdonald/audio-analysis

        } //DetectBarsInTheRowsOfaMatrix()

        /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Was first developed for crow calls.
        /// First looks for a decibel profile that matches the passed call duration and decibel loudness
        /// Then samples the centre portion for the correct harmonic period.
        /// </summary>
        /// <param name="m"></param>
        /// <param name="amplitudeThreshold"></param>
        /// <returns></returns>
        public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan)
        {
            int zeroBinCount = 3;  //to remove low freq content which dominates the spectrum
            int halfspan     = callSpan / 2;

            double[] dBArray = MatrixTools.GetRowAverages(m);
            dBArray = DataTools.filterMovingAverage(dBArray, 3);

            bool doNoiseRemoval = true;

            if (doNoiseRemoval)
            {
                double StandardDeviationCount = 0.1;  // number of noise SDs to calculate noise threshold - determines severity of noise reduction
                SNR.BackgroundNoise bgn       = SNR.SubtractBackgroundNoiseFromSignal(dBArray, StandardDeviationCount);
                dBArray = bgn.NoiseReducedSignal;
            }

            bool[] peaks = DataTools.GetPeaks(dBArray);

            int rowCount    = m.GetLength(0);
            int colCount    = m.GetLength(1);
            var intensity   = new double[rowCount];    //an array of period intensity
            var periodicity = new double[rowCount];    //an array of the periodicity values

            for (int r = halfspan; r < rowCount - halfspan; r++)
            {
                //APPLY A FILTER: must satisfy the following conditions for a call.
                if (!peaks[r])
                {
                    continue;
                }

                if (dBArray[r] < dBThreshold)
                {
                    continue;
                }

                double lowerDiff = dBArray[r] - dBArray[r - halfspan];
                double upperDiff = dBArray[r] - dBArray[r + halfspan];
                if (lowerDiff < dBThreshold || upperDiff < dBThreshold)
                {
                    continue;
                }

                double[] prevRow  = DataTools.DiffFromMean(MatrixTools.GetRow(m, r - 1));
                double[] thisRow  = DataTools.DiffFromMean(MatrixTools.GetRow(m, r));
                var      spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow);

                for (int s = 0; s < zeroBinCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId          = DataTools.GetMaxIndex(spectrum);
                double intensityValue = spectrum[maxId];
                intensity[r] = intensityValue;

                double period = 0.0;
                if (maxId != 0)
                {
                    period = 2 * colCount / (double)maxId;
                }

                periodicity[r] = period;

                prevRow = thisRow;
            } // rows

            return(Tuple.Create(dBArray, intensity, periodicity));
        } //DetectHarmonicsInSonogramMatrix()

Exemple #12

0

Afficher le fichier

Fichier : LewiniaPectoralis.cs Projet : ninascarpelli/audio-analysis

        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LewinsRailConfig lrConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            int sr = recording.SampleRate;

            int upperBandMinHz = lrConfig.UpperBandMinHz;
            int upperBandMaxHz = lrConfig.UpperBandMaxHz;
            int lowerBandMinHz = lrConfig.LowerBandMinHz;
            int lowerBandMaxHz = lrConfig.LowerBandMaxHz;

            //double decibelThreshold = lrConfig.DecibelThreshold;   //dB
            //int windowSize = lrConfig.WindowSize;
            double eventThreshold = lrConfig.EventThreshold; //in 0-1
            double minDuration    = lrConfig.MinDuration;    // seconds
            double maxDuration    = lrConfig.MaxDuration;    // seconds
            double minPeriod      = lrConfig.MinPeriod;      // seconds
            double maxPeriod      = lrConfig.MaxPeriod;      // seconds

            //double freqBinWidth = sr / (double)windowSize;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //i: MAKE SONOGRAM
            double framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);

            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            //######################################################################

            var   scorePlot  = new Plot("L.pect", intensity, lrConfig.IntensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold);
                DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold);
                var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, intensityPlot, periodicityPlot
                };
                debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits);
            }

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage));
        } //Analysis()

Exemple #13

0

Afficher le fichier

        public static void DetectTrackPeriodicity(SpectralTrack track, int xCorrelationLength, List <double[]> listOfSpectralBins, double framesPerSecond)
        {
            int halfSample = xCorrelationLength / 2;
            int lowerBin   = (int)Math.Round(track.AverageBin);
            int upperBin   = lowerBin + 1;

            upperBin = upperBin >= listOfSpectralBins.Count ? listOfSpectralBins.Count - 1 : upperBin;
            int length = track.Length;

            //only sample the middle third of track
            int start = length / 3;
            int end   = start + start - 1;

            //init score track and periodicity track
            double[] score  = new double[start];
            double[] period = new double[start];

            for (int r = start; r < end; r++) // for each position in centre third of track
            {
                int sampleStart = track.StartFrame - halfSample + r;
                if (sampleStart < 0)
                {
                    sampleStart = 0;
                }

                double[] lowerSubarray = DataTools.Subarray(listOfSpectralBins[lowerBin], sampleStart, xCorrelationLength);
                double[] upperSubarray = DataTools.Subarray(listOfSpectralBins[upperBin], sampleStart, xCorrelationLength);

                if (lowerSubarray == null || upperSubarray == null)
                {
                    break; //reached end of array
                }

                if (lowerSubarray.Length != xCorrelationLength || upperSubarray.Length != xCorrelationLength)
                {
                    break; //reached end of array
                }

                lowerSubarray = DataTools.SubtractMean(lowerSubarray); // zero mean the arrays
                upperSubarray = DataTools.SubtractMean(upperSubarray);

                //upperSubarray = lowerSubarray;

                var xCorSpectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); //sub-arrays already normalised

                //DataTools.writeBarGraph(xCorSpectrum);

                //Set the minimum OscilFreq of interest = 8 per second. Therefore max period ~ 125ms;
                //int 0.125sec = 2 * xCorrelationLength / minInterestingID / framesPerSecond; //
                double maxPeriod        = 0.05; //maximum period of interest
                int    minInterestingID = (int)Math.Round(2 * xCorrelationLength / maxPeriod / framesPerSecond);
                for (int s = 0; s <= minInterestingID; s++)
                {
                    xCorSpectrum[s] = 0.0;  //in real data these low freq/long period bins are dominant and hide other frequency content
                }

                int maxIdXcor = DataTools.GetMaxIndex(xCorSpectrum);
                period[r - start] = 2 * xCorrelationLength / (double)maxIdXcor / framesPerSecond; //convert maxID to period in seconds
                score[r - start]  = xCorSpectrum[maxIdXcor];
            } // for loop

            track.periodicityScore = score;
            track.periodicity      = period;

            //if (track.score.Average() < 0.3) track = null;
        }

Exemple #14

0

Afficher le fichier

Fichier : UnsupervisedFeatureLearningTest.cs Projet : gitter-badger/audio-analysis

        public void TestFeatureLearning()
        {
            // var outputDir = this.outputDirectory;
            var resultDir  = PathHelper.ResolveAssetPath("FeatureLearning");
            var folderPath = Path.Combine(resultDir, "random_audio_segments"); // Liz

            // PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening\random_audio_segments\1192_1000");
            // var resultDir = PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening");
            var outputMelImagePath             = Path.Combine(resultDir, "MelScaleSpectrogram.png");
            var outputNormMelImagePath         = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
            var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
            var outputReSpecImagePath          = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");

            // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");

            // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1000 random 1-min recordings from Gympie

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(folderPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            // get the nyquist value from the first wav file in the folder of recordings
            int nq = new AudioRecording(Directory.GetFiles(folderPath, "*.wav")[0]).Nyquist;

            int           nyquist       = nq;  // 11025;
            int           frameSize     = 1024;
            int           finalBinCount = 128; // 256; // 100; // 40; // 200; //
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           freqScale     = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
            var           fst           = freqScale.ScaleType;

            var sonoConfig = new SonogramConfig
            {
                WindowSize = frameSize,

                // since each 24 frames duration is equal to 1 second
                WindowOverlap      = 0.1028,
                DoMelScale         = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount        = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
            };

            /*
             * // testing
             * var recordingPath3 = PathHelper.ResolveAsset(folderPath, "SM304264_0+1_20160421_024539_46-47min.wav");
             * var recording3 = new AudioRecording(recordingPath3);
             * var sonogram3 = new SpectrogramStandard(sonoConfig, recording3.WavReader);
             *
             * // DO DRAW SPECTROGRAM
             * var image4 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image4.Save(outputMelImagePath);
             *
             * // Do RMS normalization
             * sonogram3.Data = SNR.RmsNormalization(sonogram3.Data);
             * var image5 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image5.Save(outputNormMelImagePath);
             *
             * // NOISE REDUCTION
             * sonogram3.Data = PcaWhitening.NoiseReduction(sonogram3.Data);
             * var image6 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image6.Save(outputNoiseReducedMelImagePath);
             *
             * //testing
             */

            // Define the minFreBin and MaxFreqBin to be able to work at arbitrary frequency bin bounds.
            // The default value is minFreqBin = 1 and maxFreqBin = finalBinCount.
            // To work with arbitrary frequency bin bounds we need to manually set these two parameters.
            int minFreqBin       = 40;                                          //1
            int maxFreqBin       = 80;                                          //finalBinCount;
            int numFreqBand      = 1;                                           //4;
            int patchWidth       = (maxFreqBin - minFreqBin + 1) / numFreqBand; // finalBinCount / numFreqBand;
            int patchHeight      = 1;                                           // 2; // 4; // 16; // 6; // Frame size
            int numRandomPatches = 20;                                          // 40; // 80; // 30; // 100; // 500; //

            // int fileCount = Directory.GetFiles(folderPath, "*.wav").Length;

            // Define variable number of "randomPatch" lists based on "numFreqBand"
            Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >();

            for (int i = 0; i < numFreqBand; i++)
            {
                randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List <double[, ]>());
            }

            List <double[, ]> randomPatches = new List <double[, ]>();

            /*
             * foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
             * {
             *  FileInfo f = filePath.ToFileInfo();
             *  if (f.Length == 0)
             *  {
             *      Debug.WriteLine(f.Name);
             *  }
             * }
             */
            double[,] inputMatrix;

            foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    sonoConfig.SourceFName = recording.BaseName;

                    var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

                    // DO RMS NORMALIZATION
                    sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                    // DO NOISE REDUCTION
                    // sonogram.Data = SNR.NoiseReduce_Median(sonogram.Data, nhBackgroundThreshold: 2.0);
                    sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);

                    // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                    if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                    {
                        inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram.Data, minFreqBin, maxFreqBin);
                    }
                    else
                    {
                        inputMatrix = sonogram.Data;
                    }

                    // creating matrices from different freq bands of the source spectrogram
                    List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);

                    // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list
                    int count = 0;
                    while (count < allSubmatrices.Count)
                    {
                        randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
                                                                               .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches,
                                                                                           PatchSampling.SamplingMethod.Random).ToMatrix());
                        count++;
                    }
                }
            }

            foreach (string key in randomPatchLists.Keys)
            {
                randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
            }

            // convert list of random patches matrices to one matrix
            int numberOfClusters = 50; //256; // 128; // 64; // 32; // 10; //
            List <double[][]> allBandsCentroids = new List <double[][]>();
            List <KMeansClusterCollection> allClusteringOutput = new List <KMeansClusterCollection>();

            for (int i = 0; i < randomPatches.Count; i++)
            {
                double[,] patchMatrix = randomPatches[i];

                // Apply PCA Whitening
                var whitenedSpectrogram = PcaWhitening.Whitening(true, patchMatrix);

                // Do k-means clustering
                var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters);

                // var clusteringOutput = KmeansClustering.Clustering(patchMatrix, noOfClusters, pathToClusterCsvFile);

                // writing centroids to a csv file
                // note that Csv.WriteToCsv can't write data types like dictionary<int, double[]> (problems with arrays)
                // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv
                // it might be a better way to do this
                string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
                var    clusterCentroids     = clusteringOutput.ClusterIdCentroid.Values.ToArray();
                Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix());

                //Csv.WriteToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids);

                // sorting clusters based on size and output it to a csv file
                Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize;
                int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);

                // Write cluster ID and size to a CSV file
                string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv");
                Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);

                // Draw cluster image directly from clustering output
                List <KeyValuePair <int, double[]> > list = clusteringOutput.ClusterIdCentroid.ToList();
                double[][] centroids = new double[list.Count][];

                for (int j = 0; j < list.Count; j++)
                {
                    centroids[j] = list[j].Value;
                }

                allBandsCentroids.Add(centroids);
                allClusteringOutput.Add(clusteringOutput.Clusters);

                List <double[, ]> allCentroids = new List <double[, ]>();
                for (int k = 0; k < centroids.Length; k++)
                {
                    // convert each centroid to a matrix in order of cluster ID
                    // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight);
                    // OR: in order of cluster size
                    double[,] cent =
                        MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight);

                    // normalize each centroid
                    double[,] normCent = DataTools.normalise(cent);

                    // add a row of zero to each centroid
                    double[,] cent2 = PatchSampling.AddRow(normCent);

                    allCentroids.Add(cent2);
                }

                // concatenate all centroids
                double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);

                // Draw clusters
                // int gridInterval = 1000;
                // var freqScale = new FrequencyScale(FreqScaleType.Mel, nyquist, frameSize, finalBinCount, gridInterval);

                var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
                clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);

                // clusterImage.Save(outputClusterImagePath, ImageFormat.Bmp);

                var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp");

                // Image bmp = Image.Load<Rgb24>(filename);
                FrequencyScale.DrawFrequencyLinesOnImage((Image <Rgb24>)clusterImage, freqScale, includeLabels: false);
                clusterImage.Save(outputClusteringImage);
            }

            //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target recordings
            var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");

            // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353972_20160303_055854_60_0.wav");    // folder with 1000 files
            // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353887_20151230_042625_60_0.wav");    // folder with 1000 files
            // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_354744_20151018_053923_60_0.wav");  // folder with 100 files

            var recording2 = new AudioRecording(recording2Path);
            var sonogram2  = new SpectrogramStandard(sonoConfig, recording2.WavReader);

            // DO DRAW SPECTROGRAM
            var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(),
                                                         freqScale.GridLineLocations);

            image.Save(outputMelImagePath);

            // Do RMS normalization
            sonogram2.Data = SNR.RmsNormalization(sonogram2.Data);
            var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(),
                                                          "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image2.Save(outputNormMelImagePath);

            // NOISE REDUCTION
            sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data);
            var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(),
                                                          "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image3.Save(outputNoiseReducedMelImagePath);

            // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
            if (minFreqBin != 1 || maxFreqBin != finalBinCount)
            {
                inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram2.Data, minFreqBin, maxFreqBin);
            }
            else
            {
                inputMatrix = sonogram2.Data;
            }

            // extracting sequential patches from the target spectrogram
            List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);

            double[][,] matrices2 = allSubmatrices2.ToArray();
            List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();

            for (int i = 0; i < matrices2.GetLength(0); i++)
            {
                int rows              = matrices2[i].GetLength(0);
                int columns           = matrices2[i].GetLength(1);
                var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight,
                                                                 (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
            }

            // +++++++++++++++++++++++++++++++++++Feature Transformation
            // to do the feature transformation, we normalize centroids and
            // sequential patches from the input spectrogram to unit length
            // Then, we calculate the dot product of each patch with the centroids' matrix

            List <double[][]> allNormCentroids = new List <double[][]>();

            for (int i = 0; i < allBandsCentroids.Count; i++)
            {
                // double check the index of the list
                double[][] normCentroids = new double[allBandsCentroids.ToArray()[i].GetLength(0)][];
                for (int j = 0; j < allBandsCentroids.ToArray()[i].GetLength(0); j++)
                {
                    normCentroids[j] = ART_2A.NormaliseVector(allBandsCentroids.ToArray()[i][j]);
                }

                allNormCentroids.Add(normCentroids);
            }

            List <double[][]> allFeatureTransVectors = new List <double[][]>();

            for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
            {
                double[][] featureTransVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];
                for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
                {
                    var normVector =
                        ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i]
                                               .ToJagged()[j]); // normalize each patch to unit length
                    featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                }

                allFeatureTransVectors.Add(featureTransVectors);
            }

            // +++++++++++++++++++++++++++++++++++Feature Transformation

            // +++++++++++++++++++++++++++++++++++Temporal Summarization
            // The resolution to generate features is 1 second
            // Each 24 single-frame patches form 1 second
            // for each 24 patch, we generate 3 vectors of mean, std, and max
            // The pre-assumption is that each input spectrogram is 1 minute

            List <double[, ]> allMeanFeatureVectors = new List <double[, ]>();
            List <double[, ]> allMaxFeatureVectors  = new List <double[, ]>();
            List <double[, ]> allStdFeatureVectors  = new List <double[, ]>();

            // number of frames needs to be concatenated to form 1 second. Each 24 frames make 1 second.
            int numFrames = (24 / patchHeight) * 60;

            foreach (var freqBandFeature in allFeatureTransVectors)
            {
                // store features of different bands in lists
                List <double[]> meanFeatureVectors = new List <double[]>();
                List <double[]> maxFeatureVectors  = new List <double[]>();
                List <double[]> stdFeatureVectors  = new List <double[]>();
                int             c = 0;
                while (c + numFrames < freqBandFeature.GetLength(0))
                {
                    // First, make a list of patches that would be equal to 1 second
                    List <double[]> sequencesOfFramesList = new List <double[]>();
                    for (int i = c; i < c + numFrames; i++)
                    {
                        sequencesOfFramesList.Add(freqBandFeature[i]);
                    }

                    List <double> mean = new List <double>();
                    List <double> std  = new List <double>();
                    List <double> max  = new List <double>();
                    double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                    // int len = sequencesOfFrames.GetLength(1);

                    // Second, calculate mean, max, and standard deviation of six vectors element-wise
                    for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                    {
                        double[] temp = new double[sequencesOfFrames.GetLength(0)];
                        for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                        {
                            temp[k] = sequencesOfFrames[k, j];
                        }

                        mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                        std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                        max.Add(temp.GetMaxValue());
                    }

                    meanFeatureVectors.Add(mean.ToArray());
                    maxFeatureVectors.Add(max.ToArray());
                    stdFeatureVectors.Add(std.ToArray());
                    c += numFrames;
                }

                allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix());
                allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix());
                allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix());
            }

            // +++++++++++++++++++++++++++++++++++Temporal Summarization

            // ++++++++++++++++++++++++++++++++++Writing features to file
            // First, concatenate mean, max, std for each second.
            // Then write to CSV file.

            for (int j = 0; j < allMeanFeatureVectors.Count; j++)
            {
                // write the features of each pre-defined frequency band into a separate CSV file
                var outputFeatureFile = Path.Combine(resultDir, "FeatureVectors" + j.ToString() + ".csv");

                // creating the header for CSV file
                List <string> header = new List <string>();
                for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].GetLength(1); i++)
                {
                    header.Add("mean" + i.ToString());
                }

                for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++)
                {
                    header.Add("max" + i.ToString());
                }

                for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++)
                {
                    header.Add("std" + i.ToString());
                }

                // concatenating mean, std, and max vector together for each 1 second
                List <double[]> featureVectors = new List <double[]>();
                for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].ToJagged().GetLength(0); i++)
                {
                    List <double[]> featureList = new List <double[]>
                    {
                        allMeanFeatureVectors.ToArray()[j].ToJagged()[i],
                                    allMaxFeatureVectors.ToArray()[j].ToJagged()[i],
                                    allStdFeatureVectors.ToArray()[j].ToJagged()[i],
                    };
                    double[] featureVector = DataTools.ConcatenateVectors(featureList);
                    featureVectors.Add(featureVector);
                }

                // writing feature vectors to CSV file
                using (StreamWriter file = new StreamWriter(outputFeatureFile))
                {
                    // writing the header to CSV file
                    foreach (var entry in header.ToArray())
                    {
                        file.Write(entry + ",");
                    }

                    file.Write(Environment.NewLine);

                    foreach (var entry in featureVectors.ToArray())
                    {
                        foreach (var value in entry)
                        {
                            file.Write(value + ",");
                        }

                        file.Write(Environment.NewLine);
                    }
                }
            }

            /*
             * // Reconstructing the target spectrogram based on clusters' centroids
             * List<double[,]> convertedSpec = new List<double[,]>();
             * int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand;
             * for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
             * {
             *  double[,] reconstructedSpec2 = KmeansClustering.ReconstructSpectrogram(allSequentialPatchMatrix.ToArray()[i], allClusteringOutput.ToArray()[i]);
             *  convertedSpec.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, columnPerFreqBand));
             * }
             *
             * sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(convertedSpec);
             *
             * // DO DRAW SPECTROGRAM
             * var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);
             * reconstructedSpecImage.Save(outputReSpecImagePath);
             */
        }

Exemple #15

0

Afficher le fichier

Fichier : Oscillations2010.cs Projet : Gwae1/audio-analysis

        } // end method ConvertODScores2Events()

        /*
         *      public static double PeakEntropy(double[] array)
         *      {
         *          bool[] peaks = DataTools.GetPeaks(array);
         *          int peakCount = DataTools.CountTrues(peaks);
         *          //set up histogram of peak energies
         *          double[] histogram = new double[peakCount];
         *          int count = 0;
         *          for (int k = 0; k < array.Length; k++)
         *          {
         *              if (peaks[k])
         *              {
         *                  histogram[count] = array[k];
         *                  count++;
         *              }
         *          }
         *          histogram = DataTools.NormaliseMatrixValues(histogram);
         *          histogram = DataTools.Normalise2Probabilites(histogram);
         *          double normFactor = Math.Log(histogram.Length) / DataTools.ln2;  //normalize for length of the array
         *          double entropy = DataTools.Entropy(histogram) / normFactor;
         *          return entropy;
         *      }
         *
         */

        /// <summary>
        /// returns the periodicity in an array of values.
        /// </summary>
        public static double[] PeriodicityAnalysis(double[] array)
        {
            //DataTools.writeBarGraph(array);
            var A         = AutoAndCrossCorrelation.MyCrossCorrelation(array, array); // do 2/3rds of maximum possible lag
            int dctLength = A.Length;

            A = DataTools.SubtractMean(A);

            //DataTools.writeBarGraph(A);

            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients
            double[] dct = MFCCStuff.DCT(A, cosines);

            for (int i = 0; i < dctLength; i++)
            {
                dct[i] = Math.Abs(dct[i]); //convert to absolute values
            }

            //DataTools.writeBarGraph(dct);
            for (int i = 0; i < 3; i++)
            {
                dct[i] = 0.0;   //remove low freq oscillations from consideration
            }

            dct = DataTools.normalise2UnitLength(dct);
            var peaks = DataTools.GetPeaks(dct);

            // remove non-peak values and low values
            for (int i = 0; i < dctLength; i++)
            {
                if (!peaks[i] || dct[i] < 0.2)
                {
                    dct[i] = 0.0;
                }
            }

            DataTools.writeBarGraph(dct);

            //get periodicity of highest three values
            int peakCount = 3;
            var period    = new double[peakCount];
            var maxIndex  = new double[peakCount];

            for (int i = 0; i < peakCount; i++)
            {
                int indexOfMaxValue = DataTools.GetMaxIndex(dct);
                maxIndex[i] = indexOfMaxValue;

                //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi
                if ((double)indexOfMaxValue == 0)
                {
                    period[i] = 0.0;
                }
                else
                {
                    period[i] = dctLength / (double)indexOfMaxValue * 2;
                }

                dct[indexOfMaxValue] = 0.0; // remove value for next iteration
            }

            LoggedConsole.WriteLine("Max indices = {0:f0},  {1:f0},  {2:f0}.", maxIndex[0], maxIndex[1], maxIndex[2]);
            return(period);
        }

Exemple #16

0

Afficher le fichier

Fichier : LewinsRail3OBSOLETE.cs Projet : stewartmacdonald/audio-analysis

        } //Analyze()

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="configDict"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="diOutputDir"></param>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignor those set by user
            int    frameSize     = 1024;
            double windowOverlap = 0.0;

            int    upperBandMinHz     = int.Parse(configDict[KeyUpperfreqbandBtm]);
            int    upperBandMaxHz     = int.Parse(configDict[KeyUpperfreqbandTop]);
            int    lowerBandMinHz     = int.Parse(configDict[KeyLowerfreqbandBtm]);
            int    lowerBandMaxHz     = int.Parse(configDict[KeyLowerfreqbandTop]);
            double decibelThreshold   = double.Parse(configDict[KeyDecibelThreshold]);;  //dB
            double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1
            double minDuration        = double.Parse(configDict[KeyMinDuration]);        // seconds
            double maxDuration        = double.Parse(configDict[KeyMaxDuration]);        // seconds
            double minPeriod          = double.Parse(configDict[KeyMinPeriod]);          // seconds
            double maxPeriod          = double.Parse(configDict[KeyMaxPeriod]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName   = recording.BaseName;
            sonoConfig.WindowSize    = frameSize;
            sonoConfig.WindowOverlap = windowOverlap;
            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength))
                {
                    break;
                }
                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;                                  //in real data these bins are dominant and hide other frequency content
                }
                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if ((period < minPeriod) || (period > maxPeriod))
                {
                    continue;
                }
                for (int j = 0; j < sampleLength; j++) //lay down score for sample length
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }
                    periodicity[start + j] = period;
                }
            }
            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray);
            var hits = new double[rowCount, colCount];

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration));
        } //Analysis()

C# (CSharp) AutoAndCrossCorrelation Exemples