コード例 #1
0
        /// <summary>
        /// returns oscillations using the DCT.
        /// </summary>
        public static void GetOscillationUsingDct(double[] array, double framesPerSecond, double[,] cosines, out double oscilFreq, out double period, out double intenisty)
        {
            var modifiedArray = DataTools.SubtractMean(array);
            var dctCoeff      = MFCCStuff.DCT(modifiedArray, cosines);

            // convert to absolute values because not interested in negative values due to phase.
            for (int i = 0; i < dctCoeff.Length; i++)
            {
                dctCoeff[i] = Math.Abs(dctCoeff[i]);
            }

            // remove low freq oscillations from consideration
            int thresholdIndex = dctCoeff.Length / 5;

            for (int i = 0; i < thresholdIndex; i++)
            {
                dctCoeff[i] = 0.0;
            }

            dctCoeff = DataTools.normalise2UnitLength(dctCoeff);

            //dct = DataTools.NormaliseMatrixValues(dctCoeff); //another option to NormaliseMatrixValues
            int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff);

            //recalculate DCT duration in seconds
            double dctDuration = dctCoeff.Length / framesPerSecond;

            oscilFreq = indexOfMaxValue / dctDuration * 0.5;                             //Times 0.5 because index = Pi and not 2Pi
            period    = 2 * dctCoeff.Length / (double)indexOfMaxValue / framesPerSecond; //convert maxID to period in seconds
            intenisty = dctCoeff[indexOfMaxValue];
        }
コード例 #2
0
        public void TestDecibelSpectrogram()
        {
            var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));

            // specfied linear scale
            var freqScale = new FrequencyScale(nyquist: 11025, frameSize: 1024, hertzGridInterval: 1000);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            // DO EQUALITY TEST on the AMPLITUDE SONGOGRAM DATA
            // Do not bother with the image because this is only an amplitude spectrogram.
            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            // DO FILE EQUALITY TEST on the DECIBEL SONGOGRAM DATA
            // Do not bother with the image because this has been tested elsewhere.
            var decibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon);

            var expectedFile = PathHelper.ResolveAsset("StandardSonograms", "BAC2_20071008_DecibelSonogramData.EXPECTED.bin");

            // run this once to generate expected test data
            // uncomment this to update the binary data. Should be rarely needed
            // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c
            //Binary.Serialize(expectedFile, decibelSonogram);

            var expected = Binary.Deserialize <double[, ]>(expectedFile);

            CollectionAssert.That.AreEqual(expected, decibelSonogram, EnvelopeAndFftTests.Delta);
        }
コード例 #3
0
        //##################################################################################################################################

        /// <summary>
        /// NOTE!!!! The decibel array has been normalised in 0 - 1.
        /// </summary>
        protected static Tuple <double[, ], double[]> MakeCepstrogram(SonogramConfig config, double[,] matrix, double[] decibels, int sampleRate)
        {
            double[,] m = matrix;
            int    nyquist            = sampleRate / 2;
            double epsilon            = config.epsilon;
            bool   includeDelta       = config.mfccConfig.IncludeDelta;
            bool   includeDoubleDelta = config.mfccConfig.IncludeDoubleDelta;

            //Log.WriteIfVerbose(" MakeCepstrogram(matrix, decibels, includeDelta=" + includeDelta + ", includeDoubleDelta=" + includeDoubleDelta + ")");

            //(i) APPLY FILTER BANK
            int  bandCount   = config.mfccConfig.FilterbankCount;
            bool doMelScale  = config.mfccConfig.DoMelScale;
            int  ccCount     = config.mfccConfig.CcCount;
            int  fftBinCount = config.FreqBinCount; //number of Hz bands = 2^N +1. Subtract DC bin
            int  minHz       = config.MinFreqBand ?? 0;
            int  maxHz       = config.MaxFreqBand ?? nyquist;

            Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank  =" + matrix.GetLength(1));

            //error check that filterBankCount < FFTbins
            if (bandCount > fftBinCount)
            {
                throw new Exception(
                          "## FATAL ERROR in BaseSonogram.MakeCepstrogram():- Can't calculate cepstral coeff. FilterbankCount > FFTbins. (" +
                          bandCount + " > " + fftBinCount + ")\n\n");
            }

            //this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth.
            if (doMelScale)
            {
                m = MFCCStuff.MelFilterBank(m, bandCount, nyquist, minHz, maxHz); // using the Greg integral
            }
            else
            {
                m = MFCCStuff.LinearFilterBank(m, bandCount, nyquist, minHz, maxHz);
            }

            Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")");

            //(ii) CONVERT AMPLITUDES TO DECIBELS
            m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram

            //(iii) NOISE REDUCTION
            var tuple1 = SNR.NoiseReduce(m, config.NoiseReductionType, config.NoiseReductionParameter);

            m = tuple1.Item1;

            //(iv) calculate cepstral coefficients
            m = MFCCStuff.Cepstra(m, ccCount);

            //(v) NormaliseMatrixValues
            m = DataTools.normalise(m);

            //(vi) Calculate the full range of MFCC coefficients ie including decibel and deltas, etc
            m = MFCCStuff.AcousticVectors(m, decibels, includeDelta, includeDoubleDelta);
            var tuple2 = Tuple.Create(m, tuple1.Item2);

            return(tuple2); // return matrix and full bandwidth modal noise profile
        }
コード例 #4
0
        public void SonogramDecibelMethodsAreEquivalent()
        {
            var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));

            // specfied linear scale
            var freqScale = new FrequencyScale(nyquist: 11025, frameSize: 1024, hertzGridInterval: 1000);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            // Method 1
            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
            var expectedDecibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon);

            // Method 2: make sure that the decibel spectrum is the same no matter which path we take to calculate it.
            var actualDecibelSpectrogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            CollectionAssert.That.AreEqual(expectedDecibelSonogram, actualDecibelSpectrogram.Data, EnvelopeAndFftTests.Delta);
        }
コード例 #5
0
        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram.
        /// Developed for GenericRecognizer of harmonics.
        /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise.
        ///          But it requires that the frequency of any potential formants is not changing rapidly.
        ///          THis may not be suitable for detecting human speech. However can reduce the frame step.
        /// </summary>
        /// <param name="m">spectrogram data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);
            var binCount = m.GetLength(1);

            //set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(binCount, binCount);

            // set up arrays to store decibels, formant intensity and max index.
            var dBArray       = new double[rowCount];
            var intensity     = new double[rowCount];
            var maxIndexArray = new int[rowCount];

            // for all time frames
            for (int t = 2; t < rowCount - 2; t++)
            {
                // get average of five adjacent frames
                var frame1 = MatrixTools.GetRow(m, t - 2);
                var frame2 = MatrixTools.GetRow(m, t - 1);
                var frame3 = MatrixTools.GetRow(m, t);
                var frame4 = MatrixTools.GetRow(m, t + 1);
                var frame5 = MatrixTools.GetRow(m, t + 2);
                var frame  = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5;
                }

                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    // Would normally normalise the xcorr values for overlap count.
                    // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values.
                    //normXr[i] = xr[i] / (colCount - i);
                    normXr[i] = xr[i];
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            }

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }
コード例 #6
0
        /// <summary>
        /// WARNING: calculation of k1 and k2 is faulty.
        /// MinDecibelReference should not be used ie k1 = EndpointDetectionConfiguration.SegmentationThresholdK1;
        /// See the alternative below
        ///
        /// ************* PARAMETERS FOR:- ENDPOINT DETECTION of VOCALISATIONS
        /// See Lamel et al 1981.
        /// They use k1, k2, k3 and k4, minimum pulse length and k1_k2Latency.
        /// Here we set k1 = k3, k4 = k2,  k1_k2Latency = 0.186s (5 frames)
        ///                  and "minimum pulse length" = 0.075s (2 frames)
        /// SEGMENTATION_THRESHOLD_K1 = decibels above the minimum level
        /// SEGMENTATION_THRESHOLD_K2 = decibels above the minimum level
        /// K1_K2_LATENCY = seconds delay between signal reaching k1 and k2 thresholds
        /// VOCAL_DELAY = seconds delay required to separate vocalisations
        /// MIN_VOCAL_DURATION = minimum length of energy pulse - do not use this - accept all pulses.
        /// SEGMENTATION_THRESHOLD_K1=3.5
        /// SEGMENTATION_THRESHOLD_K2=6.0
        /// K1_K2_LATENCY=0.05
        /// VOCAL_DELAY=0.2.
        /// </summary>
        public static int[] DetermineVocalisationEndpoints(double[] dbArray, double frameStep)
        {
            var k1k2Delay = (int)(K1K2Latency / frameStep);      //=5  frames delay between signal reaching k1 and k2 thresholds
            var frameGap  = (int)(VocalGap / frameStep);         //=10 frames delay required to separate vocalisations
            var minPulse  = (int)(MinPulseDuration / frameStep); //=2  frames is min vocal length

            return(MFCCStuff.VocalizationDetection(dbArray, K1Threshold, K2Threshold, k1k2Delay, frameGap, minPulse, null));
        }
コード例 #7
0
        public void SonogramDecibelMethodsAreEquivalent()
        {
            // Method 1
            var sonogram = new AmplitudeSonogram(this.sonoConfig, this.recording.WavReader);
            var expectedDecibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon);

            // Method 2: make sure that the decibel spectrum is the same no matter which path we take to calculate it.
            var actualDecibelSpectrogram = new SpectrogramStandard(this.sonoConfig, this.recording.WavReader);

            CollectionAssert.That.AreEqual(expectedDecibelSonogram, actualDecibelSpectrogram.Data, EnvelopeAndFftTests.Delta);
        }
コード例 #8
0
        private double[,] SobelEdgegram(double[,] matrix)
        {
            double[,] m = MFCCStuff.DecibelSpectra(matrix, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); //from spectrogram

            //double[,] m = Speech.DecibelSpectra(matrix);

            //NOISE REDUCTION
            var output = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter);

            this.SnrData.ModalNoiseProfile = output.Item2;
            return(ImageTools.SobelEdgeDetection(output.Item1));
        }
コード例 #9
0
        public static double[,] GetDecibelSpectrogramNoiseReduced(AudioRecording recording, int frameSize)
        {
            int frameStep = frameSize;

            // get decibel spectrogram
            var results     = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep);
            var spectrogram = MFCCStuff.DecibelSpectra(results.AmplitudeSpectrogram, results.WindowPower, recording.SampleRate, recording.Epsilon);

            // remove background noise from spectrogram
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram);
            spectrogram = SNR.TruncateBgNoiseFromSpectrogram(spectrogram, spectralDecibelBgn);
            spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, nhThreshold: 3.0);
            return(spectrogram);
        }
コード例 #10
0
        /// <summary>
        /// The data passed to this method must be the Spectral sonogram.
        /// </summary>
        public static Tuple <double[, ], double[]> GetCepstrogram(double[,] data, int minHz, int maxHz,
                                                                  int freqBinCount, double freqBinWidth, bool doMelScale, int ccCount)
        {
            ImageTools.DrawMatrix(data, @"C:\SensorNetworks\Output\MFCC_LewinsRail\tempImage1.jpg", false);
            double[,] m = SpectrogramTools.ExtractFreqSubband(data, minHz, maxHz, doMelScale, freqBinCount, freqBinWidth);
            ImageTools.DrawMatrix(m, @"C:\SensorNetworks\Output\MFCC_LewinsRail\tempImage2.jpg", false);

            //DO NOT DO NOISE REDUCTION BECAUSE ALREADY DONE
            //double[] modalNoise = SNR.CalculateModalNoise(m, 7); //calculate modal noise profile and smooth
            //m = SNR.NoiseReduce_Standard(m, modalNoise);
            //m = SNR.NoiseReduce_FixedRange(m, this.Configuration.DynamicRange);

            m = MFCCStuff.Cepstra(m, ccCount);
            m = DataTools.normalise(m);
            ImageTools.DrawMatrix(m, @"C:\SensorNetworks\Output\MFCC_LewinsRail\tempImage3.jpg", false);
            return(Tuple.Create(m, (double[])null));
        }
コード例 #11
0
        /// <summary>
        /// Initializes a new instance of the <see cref="DecibelSpectrogram"/> class.
        /// </summary>
        public DecibelSpectrogram(AmplitudeSpectrogram amplitudeSpectrogram)
        {
            this.Configuration = amplitudeSpectrogram.Configuration;
            this.Attributes    = amplitudeSpectrogram.Attributes;

            // (ii) CONVERT AMPLITUDES TO DECIBELS
            this.Data = MFCCStuff.DecibelSpectra(amplitudeSpectrogram.Data, this.Attributes.WindowPower, this.Attributes.SampleRate, this.Attributes.Epsilon);

            // (iii) NOISE REDUCTION
            var tuple = SNR.NoiseReduce(this.Data, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter);

            this.Data = tuple.Item1;   // store data matrix

            if (this.SnrData != null)
            {
                this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile
            }
        }
コード例 #12
0
        /// <summary>
        /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram.
        /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram.
        /// Developed for GenericRecognizer of harmonics.
        /// </summary>
        /// <param name="m">data matrix.</param>
        /// <param name="dBThreshold">Minimum sound level.</param>
        /// <returns>two arrays.</returns>
        public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold)
        {
            int rowCount = m.GetLength(0);
            int colCount = m.GetLength(1);

            double[] dBArray       = new double[rowCount];
            var      intensity     = new double[rowCount]; //an array of formant intensity
            var      maxIndexArray = new int[rowCount];    //an array of max value index values
            var      binCount      = m.GetLength(1);

            double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients

            // for all time frames
            for (int t = 0; t < rowCount; t++)
            {
                var    frame    = MatrixTools.GetRow(m, t);
                double maxValue = frame.Max();
                dBArray[t] = maxValue;
                if (maxValue < dBThreshold)
                {
                    continue;
                }

                double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame);

                // xr has twice length of frame and is symmetrical.
                // Require only first half. Also need to normalise the values for overlap count.
                double[] normXr = new double[colCount];
                for (int i = 0; i < colCount; i++)
                {
                    normXr[i] = xr[i] / (colCount - i);
                }

                // now do DCT across the auto cross xr
                int lowerDctBound   = 2;
                var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
                intensity[t]     = dctCoefficients[indexOfMaxValue];
                maxIndexArray[t] = indexOfMaxValue;
            } // frames = rows of matrix

            return(Tuple.Create(dBArray, intensity, maxIndexArray));
        }
コード例 #13
0
        /// <summary>
        /// Initializes a new instance of the <see cref="AmplitudeSpectrogram"/> class.
        /// </summary>
        public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav)
        {
            this.Configuration = config;
            this.Attributes    = new SpectrogramAttributes();

            double minDuration = 1.0;

            if (wav.Time.TotalSeconds < minDuration)
            {
                LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration);
                return;
            }

            //set attributes for the current recording and spectrogram type
            this.Attributes.SampleRate       = wav.SampleRate;
            this.Attributes.Duration         = wav.Time;
            this.Attributes.NyquistFrequency = wav.SampleRate / 2;
            this.Attributes.Duration         = wav.Time;
            this.Attributes.MaxAmplitude     = wav.CalculateMaximumAmplitude();
            this.Attributes.FrameDuration    = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate);

            var recording = new AudioRecording(wav);
            var fftdata   = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                config.WindowSize,
                config.WindowOverlap,
                this.Configuration.WindowFunction);

            // now recover required data
            //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value.
            this.Attributes.Epsilon     = fftdata.Epsilon;
            this.Attributes.WindowPower = fftdata.WindowPower;
            this.Attributes.FrameCount  = fftdata.FrameCount;
            this.Data = fftdata.AmplitudeSpectrogram;

            // IF REQUIRED CONVERT TO MEL SCALE
            if (this.Configuration.DoMelScale)
            {
                // this mel scale conversion uses the "Greg integral" !
                this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency);
            }
        }
コード例 #14
0
        public void TestDecibelSpectrogram()
        {
            // DO EQUALITY TEST on the AMPLITUDE SONGOGRAM DATA
            // Do not bother with the image because this is only an amplitude spectrogram.
            var sonogram = new AmplitudeSonogram(this.sonoConfig, this.recording.WavReader);

            // DO FILE EQUALITY TEST on the DECIBEL SONGOGRAM DATA
            // Do not bother with the image because this has been tested elsewhere.
            var decibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon);

            var expectedFile = PathHelper.ResolveAsset("StandardSonograms", "BAC2_20071008_DecibelSonogramData.EXPECTED.bin");

            // run this once to generate expected test data
            // uncomment this to update the binary data. Should be rarely needed
            // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c
            //Binary.Serialize(expectedFile, decibelSonogram);

            var expected = Binary.Deserialize <double[, ]>(expectedFile);

            CollectionAssert.That.AreEqual(expected, decibelSonogram, EnvelopeAndFftTests.Delta);
        }
コード例 #15
0
        public void TestStandardNoiseRemoval()
        {
            var recording  = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));
            int windowSize = 512;
            var sr         = recording.SampleRate;

            // window overlap is used only for sonograms. It is not used when calculating acoustic indices.
            double windowOverlap  = 0.0;
            var    windowFunction = WindowFunctions.HAMMING.ToString();

            var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                windowSize,
                windowOverlap,
                windowFunction);

            // Now recover the data
            // The following data is required when constructing sonograms
            //var duration = recording.WavReader.Time;
            //var frameCount = fftdata.FrameCount;
            //var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames;

            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(fftdata.AmplitudeSpectrogram, fftdata.WindowPower, sr, fftdata.Epsilon);

            // The following call to NoiseProfile.CalculateBackgroundNoise(double[,] spectrogram)
            // returns a noise profile that is used as the BGN spectral index.
            // It calculates the modal background noise for each freqeuncy bin and then returns a smoothed version.
            // By default, the number of SDs = 0 and the smoothing window = 7.
            // Method assumes that the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param>

            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram);

            var resourcesDir         = PathHelper.ResolveAssetPath("Indices");
            var expectedSpectrumFile = new FileInfo(resourcesDir + "\\NoiseProfile.bin");

            //Binary.Serialize(expectedSpectrumFile, spectralDecibelBgn);
            var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile);

            CollectionAssert.That.AreEqual(expectedVector, spectralDecibelBgn, 0.000_000_001);
        }
コード例 #16
0
        } // LocalPeaks()

        /// <summary>
        /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG
        /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds,
        /// because need to recalculate background noise etc.
        /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold);
        /// NOTE: We require a noise reduced decibel spectrogram
        /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth.
        /// </summary>
        public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold)
        {
            double epsilon          = recording.Epsilon;
            int    sampleRate       = recording.WavReader.SampleRate;
            int    bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges.
            int    ridgeBuffer      = frameSize * bufferFrameCount;
            var    ridgeRecording   = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer);
            int    frameStep        = frameSize;
            var    dspOutput        = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep);

            // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
            // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
            double[,] decibelSpectrogram;
            if (octaveScale)
            {
                var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000);
                decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale);
            }
            else
            {
                decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon);
            }

            // calculate the noise profile
            var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background

            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold);

            // thresholds in decibels
            // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold);

            return(sptInfo);
        }
コード例 #17
0
        public static double[] DoDct(double[] vector, double[,] cosines, int lowerDctBound)
        {
            //var dctArray = DataTools.Vector2Zscores(dctArray);
            var dctArray  = DataTools.SubtractMean(vector);
            int dctLength = dctArray.Length;

            double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines);

            // convert to absolute values because not interested in negative values due to phase.
            for (int i = 0; i < dctLength; i++)
            {
                dctCoeff[i] = Math.Abs(dctCoeff[i]);
            }

            // remove lower coefficients from consideration because they dominate
            for (int i = 0; i < lowerDctBound; i++)
            {
                dctCoeff[i] = 0.0;
            }

            dctCoeff = DataTools.normalise2UnitLength(dctCoeff);
            return(dctCoeff);
        }
コード例 #18
0
        }//end CONSTRUCTOR

        public override void Make(double[,] amplitudeM)
        {
            double[,] m = amplitudeM;

            // (i) IF REQUIRED CONVERT TO FULL BAND WIDTH MEL SCALE
            // Make sure you have Configuration.MelBinCount somewhere
            if (this.Configuration.DoMelScale)
            {
                m = MFCCStuff.MelFilterBank(m, this.Configuration.MelBinCount, this.NyquistFrequency, 0, this.NyquistFrequency); // using the Greg integral
            }

            // (ii) CONVERT AMPLITUDES TO DECIBELS
            m = MFCCStuff.DecibelSpectra(m, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon);

            // (iii) NOISE REDUCTION
            var tuple = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter);

            this.Data = tuple.Item1;   // store data matrix

            if (this.SnrData != null)
            {
                this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile
            }
        }
コード例 #19
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaBicolorConfig lbConfig,
            bool drawDebugImage,
            TimeSpan segmentStartOffset)
        {
            double decibelThreshold   = lbConfig.DecibelThreshold; //dB
            double intensityThreshold = lbConfig.IntensityThreshold;

            //double eventThreshold = lbConfig.EventThreshold; //in 0-1

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 3 * lbConfig.MaxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0);

            // Could smooth here rather than above. Above seemed slightly better?
            amplitudeScores  = DataTools.filterMovingAverage(amplitudeScores, 7);
            differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lbConfig.LowerBandMinHz,
                lbConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                lbConfig.MinDuration,
                lbConfig.MaxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // init the score array
            double[] scores = new double[rowCount];

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedEvents)
            {
                //rowtop,  rowWidth
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);

                    bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    // lay down score for sample length
                    for (int j = 0; j < dctLength; j++)
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = "L.b";
                    ae.Score_MaxInEvent = maximumIntensity;
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################

            // calculate the cosine similarity scores
            var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold);

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            Image debugImage = null;

            if (drawDebugImage)
            {
                // display a variety of debug score arrays

                //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold);
                DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };

                // other debug plots
                //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot };
                debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
コード例 #20
0
        /// <summary>
        /// Calculate summary statistics for supplied temporal and spectral targets.
        /// </summary>
        /// <remarks>
        /// The acoustic statistics calculated in this method are based on methods outlined in
        /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach",
        /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al.
        /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424
        /// ..
        /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram.
        /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count.
        ///    We use entropy instead because value not dependent on row or column count because it is normalized.
        /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event.
        /// 2. freq-mean
        /// 3. freq-variance
        /// 4. freq-skew and kurtosis
        /// 5. time-mean
        /// 6. time-variance
        /// 7. time-skew and kurtosis
        /// 8. freq-max (normalized)
        /// 9. time-max (normalized)
        /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could.
        /// ...
        /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels
        /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum.
        /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels.
        ///         There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to
        ///         energy values when calculating the statistics for the extracted acoustic event.
        /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis.
        ///         Ony mean and standard deviation.
        /// ..
        /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording,
        /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile
        /// BUT not so long as to cause memory constipation.
        /// </remarks>
        /// <param name="recording">as type AudioRecording which contains the event</param>
        /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param>
        /// <param name="spectralTarget">both bottom and top bounds in Hertz</param>
        /// <param name="config">parameters that determine the outcome of the analysis</param>
        /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param>
        /// <returns>an instance of EventStatistics</returns>
        public static EventStatistics AnalyzeAudioEvent(
            AudioRecording recording,
            Range <TimeSpan> temporalTarget,
            Range <double> spectralTarget,
            EventStatisticsConfiguration config,
            TimeSpan segmentStartOffset)
        {
            var stats = new EventStatistics
            {
                EventStartSeconds      = temporalTarget.Minimum.TotalSeconds,
                EventEndSeconds        = temporalTarget.Maximum.TotalSeconds,
                LowFrequencyHertz      = spectralTarget.Minimum,
                HighFrequencyHertz     = spectralTarget.Maximum,
                SegmentDurationSeconds = recording.Duration.TotalSeconds,
                SegmentStartSeconds    = segmentStartOffset.TotalSeconds,
            };

            // temporal target is supplied relative to recording, but not the supplied audio segment
            // shift coordinates relative to segment
            var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset);

            if (!recording
                .Duration
                .AsRangeFromZero(Topology.Inclusive)
                .Contains(localTemporalTarget))
            {
                stats.Error        = true;
                stats.ErrorMessage =
                    $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})";

                return(stats);
            }

            // convert recording to spectrogram
            int    sampleRate = recording.SampleRate;
            double epsilon    = recording.Epsilon;

            // extract the spectrogram
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep);

            double hertzBinWidth         = dspOutput1.FreqBinWidth;
            var    stepDurationInSeconds = config.FrameStep / (double)sampleRate;
            var    startFrame            = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds);

            // subtract 1 frame because want to end before start of end point.
            var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1;

            var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth);
            var topBin    = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth);

            // Events can have their high value set to the nyquist.
            // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in
            // these cases. So we just ask for the bin below.
            if (topBin >= config.FrameSize / 2)
            {
                topBin = (config.FrameSize / 2) - 1;
            }

            // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // extract the required acoustic event
            var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin);

            // Get the SNR of the event. This is just the max value in the matrix because noise reduced
            MatrixTools.MinMax(eventMatrix, out _, out double max);
            stats.SnrDecibels = max;

            // Now need to convert event matrix back to energy values before calculating other statistics
            eventMatrix = MatrixTools.Decibels2Power(eventMatrix);

            var columnAverages = MatrixTools.GetColumnAverages(eventMatrix);
            var rowAverages    = MatrixTools.GetRowAverages(eventMatrix);

            // calculate the mean and temporal standard deviation in decibels
            NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev);
            stats.MeanDecibels           = 10 * Math.Log10(mean);
            stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate the frequency standard deviation in decibels
            NormalDist.AverageAndSD(columnAverages, out mean, out stddev);
            stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate relative location of the temporal maximum
            int maxRowId = DataTools.GetMaxIndex(rowAverages);

            stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length;

            // calculate the entropy dispersion/concentration indices
            stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages);
            stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages);

            // calculate the spectral centroid and the dominant frequency
            double binCentroid = CalculateSpectralCentroid(columnAverages);

            stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum;
            int maxColumnId = DataTools.GetMaxIndex(columnAverages);

            stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum;

            // remainder of this method is to produce debugging images. Can comment out when not debugging.

            /*
             * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages);
             * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100);
             * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png";
             * image4.Save(path4);
             * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages);
             * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100);
             * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png";
             * image4.Save(path4);
             */
            return(stats);
        }
コード例 #21
0
        public static Tuple <double[]> Execute_MFCC_XCOR(double[,] target, double dynamicRange, SpectrogramStandard sonogram,
                                                         List <AcousticEvent> segments, int minHz, int maxHz, double minDuration)
        {
            Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET.");
            if (segments == null)
            {
                return(null);
            }

            int minBin       = (int)(minHz / sonogram.FBinWidth);
            int maxBin       = (int)(maxHz / sonogram.FBinWidth);
            int targetLength = target.GetLength(0);

            //set up the matrix of cosine coefficients
            int coeffCount = 12;                                             //only use first 12 coefficients.
            int binCount   = target.GetLength(1);                            //number of filters in filter bank

            double[,] cosines = MFCCStuff.Cosines(binCount, coeffCount + 1); //set up the cosine coefficients

            //adjust target's dynamic range to that set by user
            target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range
            target = MFCCStuff.Cepstra(target, coeffCount, cosines);
            double[] v1 = DataTools.Matrix2Array(target);
            v1 = DataTools.normalise2UnitLength(v1);

            string imagePath2 = @"C:\SensorNetworks\Output\FELT_Currawong\target.png";
            var    result1    = BaseSonogram.Data2ImageData(target);
            var    image      = result1.Item1;

            ImageTools.DrawMatrix(image, 1, 1, imagePath2);

            double[] scores = new double[sonogram.FrameCount];
            foreach (AcousticEvent av in segments)
            {
                Log.WriteLine("SEARCHING SEGMENT.");
                int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond);
                int endRow   = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond);
                if (endRow >= sonogram.FrameCount)
                {
                    endRow = sonogram.FrameCount - 1;
                }

                endRow -= targetLength;
                if (endRow <= startRow)
                {
                    endRow = startRow + 1;  //want minimum of one row
                }

                for (int r = startRow; r < endRow; r++)
                {
                    double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin);
                    matrix           = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range

                    //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png";
                    //var image = BaseSonogram.Data2ImageData(matrix);
                    //ImageTools.DrawMatrix(image, 1, 1, imagePath2);
                    matrix = MFCCStuff.Cepstra(matrix, coeffCount, cosines);

                    double[] v2 = DataTools.Matrix2Array(matrix);
                    v2 = DataTools.normalise2UnitLength(v2);
                    double crossCor = DataTools.DotProduct(v1, v2);
                    scores[r] = crossCor;
                } //end of rows in segment
            }     //foreach (AcousticEvent av in segments)

            var tuple = Tuple.Create(scores);

            return(tuple);
        }
コード例 #22
0
        /// <summary>
        /// THIS METHOD NO LONGER IN USE.
        /// NOT USEFUL FOR ANIMAL CALLS.
        /// Tried this but it is suitable only when there is guarantee of numerous spectral tracks as in the vowels of human speech.
        /// It yields SPURIOUS RESULTS where there is only one whistle track.
        /// </summary>
        public static double[,] DetectHarmonicsUsingDCT(double[,] matrix, int minBin, int maxBin, int hzWidth, bool normaliseDCT, int minPeriod, int maxPeriod, double dctThreshold)
        {
            int dctLength = maxBin - minBin + 1;                   //DCT spans N freq bins

            int minIndex = (int)(hzWidth / (double)maxPeriod * 2); //Times 0.5 because index = Pi and not 2Pi
            int maxIndex = (int)(hzWidth / (double)minPeriod * 2); //Times 0.5 because index = Pi and not 2Pi

            //double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi
            if (maxIndex > dctLength)
            {
                maxIndex = dctLength; //safety check in case of future changes to code.
            }

            int rows = matrix.GetLength(0);
            int cols = matrix.GetLength(1);

            double[,] hits = new double[rows, cols];

            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients

            for (int r = 0; r < rows - dctLength; r++)
            {
                //for (int c = minBin; c <= minBin; c++)//traverse columns - skip DC column
                //{
                var array = new double[dctLength];

                //accumulate J rows of values
                for (int i = 0; i < dctLength; i++)
                {
                    for (int j = 0; j < 5; j++)
                    {
                        array[i] += matrix[r + j, minBin + i];
                    }
                }

                array = DataTools.SubtractMean(array);

                //     DataTools.writeBarGraph(array);

                double[] dct = MFCCStuff.DCT(array, cosines);
                for (int i = 0; i < dctLength; i++)
                {
                    dct[i] = Math.Abs(dct[i]); //convert to absolute values
                }

                for (int i = 0; i < 5; i++)
                {
                    dct[i] = 0.0;  //remove low freq values from consideration
                }

                if (normaliseDCT)
                {
                    dct = DataTools.normalise2UnitLength(dct);
                }

                int indexOfMaxValue = DataTools.GetMaxIndex(dct);

                //DataTools.writeBarGraph(dct);

                double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi

                //mark DCT location with harmonic freq, only if harmonic freq is in correct range and amplitude
                if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold)
                {
                    for (int i = 0; i < dctLength; i++)
                    {
                        hits[r, minBin + i] = period;
                    }

                    for (int i = 0; i < dctLength; i++)
                    {
                        hits[r + 1, minBin + i] = period; //alternate row
                    }
                }

                //c += 5; //skip columns
                //}
                r++; //do alternate row
            }

            return(hits);
        }
コード例 #23
0
ファイル: BaseSonogram.cs プロジェクト: Gwae1/audio-analysis
        public static Image <Rgb24> GetSonogramImage(double[,] data, int nyquistFreq, int maxFrequency, bool doMelScale, int binHeight, bool doHighlightSubband, int subBandMinHz, int subBandMaxHz)
        {
            int width   = data.GetLength(0); // Number of spectra in sonogram
            int fftBins = data.GetLength(1);
            int maxBin  = (int)Math.Floor(fftBins * maxFrequency / (double)nyquistFreq);

            int imageHeight = maxBin * binHeight; // image ht = sonogram ht. Later include grid and score scales

            //set up min, max, range for normalising of dB values
            DataTools.MinMax(data, out double min, out double max);
            double range = max - min;

            // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit
            double fractionalStretching = 0.01;

            min   = min + (range * fractionalStretching);
            max   = max - (range * fractionalStretching);
            range = max - min;

            //int? minHighlightFreq = this.subBand_MinHz;
            //int? maxHighlightFreq = this.subBand_MaxHz;
            //int minHighlightBin = (minHighlightFreq == null) ? 0 : (int)Math.Round((double)minHighlightFreq / (double)NyquistFrequency * fftBins);
            //int maxHighlightBin = (maxHighlightFreq == null) ? 0 : (int)Math.Round((double)maxHighlightFreq / (double)NyquistFrequency * fftBins);

            //calculate top and bottom of sub-band
            int minHighlightBin = (int)Math.Round(subBandMinHz / (double)nyquistFreq * fftBins);
            int maxHighlightBin = (int)Math.Round(subBandMaxHz / (double)nyquistFreq * fftBins);

            if (doMelScale)
            {
                double maxMel      = MFCCStuff.Mel(nyquistFreq);
                int    melRange    = (int)(maxMel - 0 + 1);
                double pixelPerMel = imageHeight / (double)melRange;
                double minBandMel  = MFCCStuff.Mel(subBandMinHz);
                double maxBandMel  = MFCCStuff.Mel(subBandMaxHz);
                minHighlightBin = (int)Math.Round(minBandMel * pixelPerMel);
                maxHighlightBin = (int)Math.Round(maxBandMel * pixelPerMel);
            }

            Color[] grayScale = ImageTools.GrayScale();

            var bmp     = new Image <Rgb24>(width, imageHeight);
            int yOffset = imageHeight;

            // for all freq bins
            for (int y = 0; y < maxBin; y++)
            {
                //repeat this bin if ceptral image
                for (int r = 0; r < binHeight; r++)
                {
                    // for all pixels in line
                    for (int x = 0; x < width; x++)
                    {
                        // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range
                        double value = (data[x, y] - min) / range;
                        int    c     = 255 - (int)Math.Floor(255.0 * value); //original version
                        if (c < 0)
                        {
                            c = 0;
                        }
                        else if (c >= 256)
                        {
                            c = 255;
                        }

                        int g = c + 40; // green tinge used in the template scan band
                        if (g >= 256)
                        {
                            g = 255;
                        }

                        var col = doHighlightSubband && IsInBand(y, minHighlightBin, maxHighlightBin) ? Color.FromRgb((byte)c, (byte)g, (byte)c) : grayScale[c];
                        bmp[x, yOffset - 1] = col;
                    }

                    yOffset--;
                } //end repeats over one track
            }

            return(bmp);
        }
コード例 #24
0
        } // end method ConvertODScores2Events()

        /*
         *      public static double PeakEntropy(double[] array)
         *      {
         *          bool[] peaks = DataTools.GetPeaks(array);
         *          int peakCount = DataTools.CountTrues(peaks);
         *          //set up histogram of peak energies
         *          double[] histogram = new double[peakCount];
         *          int count = 0;
         *          for (int k = 0; k < array.Length; k++)
         *          {
         *              if (peaks[k])
         *              {
         *                  histogram[count] = array[k];
         *                  count++;
         *              }
         *          }
         *          histogram = DataTools.NormaliseMatrixValues(histogram);
         *          histogram = DataTools.Normalise2Probabilites(histogram);
         *          double normFactor = Math.Log(histogram.Length) / DataTools.ln2;  //normalize for length of the array
         *          double entropy = DataTools.Entropy(histogram) / normFactor;
         *          return entropy;
         *      }
         *
         */

        /// <summary>
        /// returns the periodicity in an array of values.
        /// </summary>
        public static double[] PeriodicityAnalysis(double[] array)
        {
            //DataTools.writeBarGraph(array);
            var A         = AutoAndCrossCorrelation.MyCrossCorrelation(array, array); // do 2/3rds of maximum possible lag
            int dctLength = A.Length;

            A = DataTools.SubtractMean(A);

            //DataTools.writeBarGraph(A);

            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients
            double[] dct = MFCCStuff.DCT(A, cosines);

            for (int i = 0; i < dctLength; i++)
            {
                dct[i] = Math.Abs(dct[i]); //convert to absolute values
            }

            //DataTools.writeBarGraph(dct);
            for (int i = 0; i < 3; i++)
            {
                dct[i] = 0.0;   //remove low freq oscillations from consideration
            }

            dct = DataTools.normalise2UnitLength(dct);
            var peaks = DataTools.GetPeaks(dct);

            // remove non-peak values and low values
            for (int i = 0; i < dctLength; i++)
            {
                if (!peaks[i] || dct[i] < 0.2)
                {
                    dct[i] = 0.0;
                }
            }

            DataTools.writeBarGraph(dct);

            //get periodicity of highest three values
            int peakCount = 3;
            var period    = new double[peakCount];
            var maxIndex  = new double[peakCount];

            for (int i = 0; i < peakCount; i++)
            {
                int indexOfMaxValue = DataTools.GetMaxIndex(dct);
                maxIndex[i] = indexOfMaxValue;

                //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi
                if ((double)indexOfMaxValue == 0)
                {
                    period[i] = 0.0;
                }
                else
                {
                    period[i] = dctLength / (double)indexOfMaxValue * 2;
                }

                dct[indexOfMaxValue] = 0.0; // remove value for next iteration
            }

            LoggedConsole.WriteLine("Max indices = {0:f0},  {1:f0},  {2:f0}.", maxIndex[0], maxIndex[1], maxIndex[2]);
            return(period);
        }
コード例 #25
0
        ///  <summary>
        ///  ################ THE KEY ANALYSIS METHOD for TRILLS
        ///
        ///  See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles.
        ///  </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lwConfig"></param>
        /// <param name="returnDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaWatjulumConfig lwConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            double intensityThreshold = lwConfig.IntensityThreshold;
            double minDuration        = lwConfig.MinDurationOfTrill; // seconds
            double maxDuration        = lwConfig.MaxDurationOfTrill; // seconds
            double minPeriod          = lwConfig.MinPeriod;          // seconds
            double maxPeriod          = lwConfig.MaxPeriod;          // seconds

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 4 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            //int colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7);

            // Could smooth here rather than above. Above seemed slightly better?
            //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7);
            //differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS
            var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                lwConfig.DecibelThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // LOOK FOR TRILL EVENTS
            // init the score array
            double[] scores = new double[rowCount];

            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedTrillEvents)
            {
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    double   oscilFreq;
                    double   period;
                    double   intensity;
                    Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity);

                    bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    for (int j = 0; j < dctLength; j++) //lay down score for sample length
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}";
                    ae.Score_MaxInEvent = maximumIntensity;
                    ae.Profile          = lwConfig.ProfileNames[0];
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################
            // LOOK FOR TINK EVENTS
            // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            double minDurationOfTink = lwConfig.MinDurationOfTink;  // seconds
            double maxDurationOfTink = lwConfig.MaxDurationOfTink;  // seconds

            // want stronger threshold for tink because brief.
            double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0;
            var    predictedTinkEvents  = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                tinkDecibelThreshold,
                minDurationOfTink,
                maxDurationOfTink,
                segmentStartOffset);

            foreach (var ae2 in predictedTinkEvents)
            {
                // Prune the list of potential acoustic events, for example using Cosine Similarity.

                //rowtop,  rowWidth
                //int eventStart = ae2.Oblong.RowTop;
                //int eventWidth = ae2.Oblong.RowWidth;
                //int step = 2;
                //double maximumIntensity = 0.0;

                // add abbreviatedSpeciesName into event
                //if (maximumIntensity >= intensityThreshold)
                //{
                ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}";

                //ae2.Score_MaxInEvent = maximumIntensity;
                ae2.Profile = lwConfig.ProfileNames[1];
                confirmedEvents.Add(ae2);

                //}
            }

            //######################################################################

            var   scorePlot  = new Plot(lwConfig.SpeciesName, scores, intensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };
                debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
コード例 #26
0
        public static void Main(Arguments arguments)
        {
            //1. set up the necessary files
            //DirectoryInfo diSource = arguments.Source.Directory;
            FileInfo fiSourceRecording = arguments.Source;
            FileInfo fiConfig          = arguments.Config.ToFileInfo();
            FileInfo fiImage           = arguments.Output.ToFileInfo();

            fiImage.CreateParentDirectories();

            string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING";
            string date  = "# DATE AND TIME: " + DateTime.Now;

            LoggedConsole.WriteLine(title);
            LoggedConsole.WriteLine(date);
            LoggedConsole.WriteLine("# Input  audio file: " + fiSourceRecording.Name);
            LoggedConsole.WriteLine("# Output image file: " + fiImage);

            //2. get the config dictionary
            Config configuration = ConfigFile.Deserialize(fiConfig);

            //below three lines are examples of retrieving info from Config config
            //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName];
            //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false;
            //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold;

            //3 transfer conogram parameters to a dictionary to be passed around
            var configDict = new Dictionary <string, string>();

            // #Resample rate must be 2 X the desired Nyquist. Default is that of recording.
            configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString();
            configDict["FrameLength"]  = configuration[AnalysisKeys.FrameLength] ?? "512";
            int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512;

            // #Frame Overlap as fraction: default=0.0
            configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0";
            double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0;

            // #MinHz: 500
            // #MaxHz: 3500
            // #NOISE REDUCTION PARAMETERS
            configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true";
            configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0";

            configDict["ADD_AXES"]             = configuration["ADD_AXES"] ?? "true";
            configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true";

            // 3: GET RECORDING
            var startOffsetMins = TimeSpan.Zero;
            var endOffsetMins   = TimeSpan.Zero;

            FileInfo fiOutputSegment = fiSourceRecording;

            if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero))
            {
                var buffer = new TimeSpan(0, 0, 0);
                fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav"));

                //This method extracts segment and saves to disk at the location fiOutputSegment.
                var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate;
                AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment);
            }

            var recording = new AudioRecording(fiOutputSegment.FullName);

            // EXTRACT ENVELOPE and SPECTROGRAM
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap);

            // average absolute value over the minute recording
            ////double[] avAbsolute = dspOutput.Average;

            // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ##################################
            // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            // double totalSeconds = wavDuration.TotalSeconds;

            // double[] signalEnvelope = dspOutput.Envelope;
            // double avSignalEnvelope = signalEnvelope.Average();
            // double[] frameEnergy = dspOutput.FrameEnergy;
            // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds;
            // double binWidth = dspOutput.BinWidth;
            // int nyquistBin = dspOutput.NyquistBin;
            // dspOutput.WindowPower,
            // dspOutput.FreqBinWidth
            int    nyquistFreq = dspOutput.NyquistFreq;
            double epsilon     = recording.Epsilon;

            // i: prepare amplitude spectrogram
            double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.
            var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData));

            // ii: prepare decibel spectrogram prior to noise removal
            double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon);
            decibelSpectrogramdata           = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata);
            var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // iii: Calculate background noise spectrum in decibels
            // Calculate noise value for each freq bin.
            double sdCount        = 0.0; // number of SDs above the mean for noise removal
            var    decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount);

            // DataTools.writeBarGraph(dBProfile.NoiseMode);

            // iv: Prepare noise reduced spectrogram
            decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds);

            //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background
            //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold);
            var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // prepare new sonogram config and draw second image going down different code pathway
            var config = new SonogramConfig
            {
                MinFreqBand             = 0,
                MaxFreqBand             = 10000,
                NoiseReductionType      = SNR.KeyToNoiseReductionType("Standard"),
                NoiseReductionParameter = 1.0,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,
            };

            //var mfccConfig = new MfccConfiguration(config);
            int  bandCount  = config.mfccConfig.FilterbankCount;
            bool doMelScale = config.mfccConfig.DoMelScale;
            int  ccCount    = config.mfccConfig.CcCount;
            int  fftBins    = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band
            int  minHz      = config.MinFreqBand ?? 0;
            int  maxHz      = config.MaxFreqBand ?? nyquistFreq;

            var standardSonogram = new SpectrogramStandard(config, recording.WavReader);
            var image4           = standardSonogram.GetImage();

            // TODO next line crashes - does not produce cepstral sonogram.
            //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader);
            //Image image5 = cepSng.GetImage();

            //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict);
            //var image = mti.GetImage();

            //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config);
            //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict);

            //prepare sonogram images
            var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false));

            protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond));
            protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width));
            protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram));
            var image6 = protoImage6.GetImage();

            var list = new List <Image <Rgb24> >();

            list.Add(image1); // amplitude spectrogram
            list.Add(image2); // decibel spectrogram before noise removal
            list.Add(image3); // decibel spectrogram after noise removal
            list.Add(image4); // second version of noise reduced spectrogram

            //list.Add(image5); // ceptral sonogram
            list.Add(image6.CloneAs <Rgb24>()); // multitrack image

            Image finalImage = ImageTools.CombineImagesVertically(list);

            finalImage.Save(fiImage.FullName);

            ////2: NOISE REMOVAL
            //double[,] originalSg = sonogram.Data;
            //double[,] mnr        = sonogram.Data;
            //mnr = ImageTools.WienerFilter(mnr, 3);

            //double backgroundThreshold = 4.0;   //SETS MIN DECIBEL BOUND
            //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold);

            //double ConfigRange = 70;        //sets the the max dB
            //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange);

            ////3: Spectral tracks sonogram
            //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr);
            //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10);
            //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary);
            ////binary = MatrixTools.PickOutLines(binary); //syntactic approach

            //sonogram.SetBinarySpectrum(binary);
            ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg);

            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_tracks.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn);

            //3: prepare image of spectral peaks sonogram
            //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_peaks.png";
            //image.Save(fn);

            //LoggedConsole.WriteLine("Spectral peaks  sonogram to file: " + fn);

            //4: Sobel approach
            //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_sobel.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn);

            // I1.txt contains the sonogram matrix produced by matlab
            //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt";
            //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166);

            //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]);
            //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]);

            // TODO put this back once sonogram issues resolved

            /*
             * LoggedConsole.WriteLine("START: AED");
             * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix);
             * LoggedConsole.WriteLine("END: AED");
             *
             *
             * //set up static variables for init Acoustic events
             * //AcousticEvent.   doMelScale = config.DoMelScale;
             * AcousticEvent.FreqBinCount = config.FreqBinCount;
             * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount;
             * //  int minF        = (int)config.MinFreqBand;
             * //  int maxF        = (int)config.MaxFreqBand;
             * AcousticEvent.FrameDuration = config.GetFrameOffset();
             *
             *
             * var events = new List<EventPatternRecog.Rectangle>();
             * foreach (Oblong o in oblongs)
             * {
             *  var e = new AcousticEvent(o);
             *  events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq));
             *  //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq);
             * }
             *
             * LoggedConsole.WriteLine("# AED events: " + events.Count);
             *
             * LoggedConsole.WriteLine("START: EPR");
             * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events);
             * LoggedConsole.WriteLine("END: EPR");
             *
             * var eprEvents = new List<AcousticEvent>();
             * foreach (EventPatternRecog.Rectangle r in eprRects)
             * {
             *  var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false);
             *  LoggedConsole.WriteLine(ae.WriteProperties());
             *  eprEvents.Add(ae);
             * }
             *
             * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png");
             *
             * bool doHighlightSubband = false; bool add1kHzLines = true;
             * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
             * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
             * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
             * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
             * image.AddEvents(eprEvents);
             * image.Save(outputFolder + wavFileName + ".png");
             */

            LoggedConsole.WriteLine("\nFINISHED!");
        }
コード例 #27
0
        /// <summary>
        /// Currently this method is called by only one species recognizer - LitoriaCaerulea.
        /// </summary>
        /// <param name="ipArray">an array of decibel values.</param>
        /// <param name="framesPerSecond">the frame rate.</param>
        /// <param name="decibelThreshold">Ignore frames below this threshold.</param>
        /// <param name="dctDuration">Duration in seconds of the required DCT.</param>
        /// <param name="minOscFreq">minimum oscillation frequency.</param>
        /// <param name="maxOscFreq">maximum oscillation frequency.</param>
        /// <param name="dctThreshold">Threshold for the maximum DCT coefficient.</param>
        /// <param name="dctScores">an array of dct scores.</param>
        /// <param name="oscFreq">an array of oscillation frequencies.</param>
        public static void DetectOscillations(
            double[] ipArray,
            double framesPerSecond,
            double decibelThreshold,
            double dctDuration,
            double minOscFreq,
            double maxOscFreq,
            double dctThreshold,
            out double[] dctScores,
            out double[] oscFreq)
        {
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);
            int minIndex  = (int)(minOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
            int maxIndex  = (int)(maxOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi

            if (maxIndex > dctLength)
            {
                LoggedConsole.WriteWarnLine("MaxIndex > DCT length. Therefore set maxIndex = DCT length.");
                maxIndex = dctLength;
            }

            int length = ipArray.Length;

            dctScores = new double[length];
            oscFreq   = new double[length];

            //set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            //following two lines write bmp image of cosine matrix values for checking.
            //string bmpPath = @"C:\SensorNetworks\Output\cosines.png";
            //ImageTools.DrawMatrix(cosines, bmpPath, true);

            for (int r = 1; r < length - dctLength; r++)
            {
                // only stop if current location is a peak
                if (ipArray[r] < ipArray[r - 1] || ipArray[r] < ipArray[r + 1])
                {
                    continue;
                }

                // only stop if current location is a peak
                if (ipArray[r] < decibelThreshold)
                {
                    continue;
                }

                // extract array and ready for DCT
                var dctArray = DataTools.Subarray(ipArray, r, dctLength);

                dctArray = DataTools.SubtractMean(dctArray);
                double[] dctCoefficient = MFCCStuff.DCT(dctArray, cosines);

                // convert to absolute values because not interested in negative values due to phase.
                for (int i = 0; i < dctLength; i++)
                {
                    dctCoefficient[i] = Math.Abs(dctCoefficient[i]);
                }

                // remove low freq oscillations from consideration
                int thresholdIndex = minIndex / 4;
                for (int i = 0; i < thresholdIndex; i++)
                {
                    dctCoefficient[i] = 0.0;
                }

                dctCoefficient = DataTools.normalise2UnitLength(dctCoefficient);

                int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficient);

                //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
                if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoefficient[indexOfMaxValue] > dctThreshold)
                {
                    for (int i = 0; i < dctLength; i++)
                    {
                        if (dctScores[r + i] < dctCoefficient[indexOfMaxValue])
                        {
                            dctScores[r + i] = dctCoefficient[indexOfMaxValue];
                            oscFreq[r + i]   = indexOfMaxValue / dctDuration / 2;
                        }
                    }
                }
            }
        }
コード例 #28
0
        public static void Execute(Arguments arguments)
        {
            const string Title = "# DETERMINING SIGNAL TO NOISE RATIO IN RECORDING";
            string       date  = "# DATE AND TIME: " + DateTime.Now;

            Log.WriteLine(Title);
            Log.WriteLine(date);
            Log.Verbosity = 1;

            var input                    = arguments.Source;
            var sourceFileName           = input.Name;
            var outputDir                = arguments.Output;
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(input.FullName);
            var outputTxtPath            = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".txt").ToFileInfo();

            Log.WriteIfVerbose("# Recording file: " + input.FullName);
            Log.WriteIfVerbose("# Config file:    " + arguments.Config);
            Log.WriteIfVerbose("# Output folder =" + outputDir.FullName);
            FileTools.WriteTextFile(outputTxtPath.FullName, date + "\n# Recording file: " + input.FullName);

            //READ PARAMETER VALUES FROM INI FILE
            // load YAML configuration
            Config configuration = ConfigFile.Deserialize(arguments.Config);

            //ii: SET SONOGRAM CONFIGURATION
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName        = input.FullName;
            sonoConfig.WindowSize         = configuration.GetIntOrNull(AnalysisKeys.KeyFrameSize) ?? 512;
            sonoConfig.WindowOverlap      = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.5;
            sonoConfig.WindowFunction     = configuration[AnalysisKeys.KeyWindowFunction];
            sonoConfig.NPointSmoothFFT    = configuration.GetIntOrNull(AnalysisKeys.KeyNPointSmoothFft) ?? 256;
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType(configuration[AnalysisKeys.NoiseReductionType]);

            int    minHz          = configuration.GetIntOrNull("MIN_HZ") ?? 0;
            int    maxHz          = configuration.GetIntOrNull("MAX_HZ") ?? 11050;
            double segK1          = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K1") ?? 0;
            double segK2          = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K2") ?? 0;
            double latency        = configuration.GetDoubleOrNull("K1_K2_LATENCY") ?? 0;
            double vocalGap       = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0;
            double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0;

            //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true;    //options to draw sonogram

            //double intensityThreshold = Acoustics.AED.Default.intensityThreshold;
            //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]);
            //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold;
            //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD))   smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]);

            // COnvert input recording into wav
            var convertParameters = new AudioUtilityRequest {
                TargetSampleRate = 17640
            };
            var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav"));

            if (File.Exists(fileToAnalyse.FullName))
            {
                File.Delete(fileToAnalyse.FullName);
            }

            var convertedFileInfo = AudioFilePreparer.PrepareFile(
                input,
                fileToAnalyse,
                convertParameters,
                outputDir);

            // (A) ##########################################################################################################################
            AudioRecording recording              = new AudioRecording(fileToAnalyse.FullName);
            int            signalLength           = recording.WavReader.Samples.Length;
            TimeSpan       wavDuration            = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            double         frameDurationInSeconds = sonoConfig.WindowSize / (double)recording.SampleRate;
            TimeSpan       frameDuration          = TimeSpan.FromTicks((long)(frameDurationInSeconds * TimeSpan.TicksPerSecond));
            int            stepSize = (int)Math.Floor(sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap));
            double         stepDurationInSeconds = sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap)
                                                   / recording.SampleRate;
            TimeSpan stepDuration    = TimeSpan.FromTicks((long)(stepDurationInSeconds * TimeSpan.TicksPerSecond));
            double   framesPerSecond = 1 / stepDuration.TotalSeconds;
            int      frameCount      = signalLength / stepSize;

            // (B) ################################## EXTRACT ENVELOPE and SPECTROGRAM ##################################
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                sonoConfig.WindowSize,
                sonoConfig.WindowOverlap);

            //double[] avAbsolute = dspOutput.Average; //average absolute value over the minute recording

            // (C) ################################## GET SIGNAL WAVEFORM ##################################
            double[] signalEnvelope   = dspOutput.Envelope;
            double   avSignalEnvelope = signalEnvelope.Average();

            // (D) ################################## GET Amplitude Spectrogram ##################################
            double[,] amplitudeSpectrogram = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.

            // (E) ################################## Generate deciBel spectrogram from amplitude spectrogram
            double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1);

            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(
                dspOutput.AmplitudeSpectrogram,
                dspOutput.WindowPower,
                recording.SampleRate,
                epsilon);

            LoggedConsole.WriteLine("# Finished calculating decibel spectrogram.");

            StringBuilder sb = new StringBuilder();

            sb.AppendLine("\nSIGNAL PARAMETERS");
            sb.AppendLine("Signal Duration     =" + wavDuration);
            sb.AppendLine("Sample Rate         =" + recording.SampleRate);
            sb.AppendLine("Min Signal Value    =" + dspOutput.MinSignalValue);
            sb.AppendLine("Max Signal Value    =" + dspOutput.MaxSignalValue);
            sb.AppendLine("Max Absolute Ampl   =" + signalEnvelope.Max().ToString("F3") + "  (See Note 1)");
            sb.AppendLine("Epsilon Ampl (1 bit)=" + epsilon);

            sb.AppendLine("\nFRAME PARAMETERS");
            sb.AppendLine("Window Size    =" + sonoConfig.WindowSize);
            sb.AppendLine("Frame Count    =" + frameCount);
            sb.AppendLine("Envelope length=" + signalEnvelope.Length);
            sb.AppendLine("Frame Duration =" + frameDuration.TotalMilliseconds.ToString("F3") + " ms");
            sb.AppendLine("Frame overlap  =" + sonoConfig.WindowOverlap);
            sb.AppendLine("Step Size      =" + stepSize);
            sb.AppendLine("Step duration  =" + stepDuration.TotalMilliseconds.ToString("F3") + " ms");
            sb.AppendLine("Frames Per Sec =" + framesPerSecond.ToString("F1"));

            sb.AppendLine("\nFREQUENCY PARAMETERS");
            sb.AppendLine("Nyquist Freq    =" + dspOutput.NyquistFreq + " Hz");
            sb.AppendLine("Freq Bin Width  =" + dspOutput.FreqBinWidth.ToString("F2") + " Hz");
            sb.AppendLine("Nyquist Bin     =" + dspOutput.NyquistBin);

            sb.AppendLine("\nENERGY PARAMETERS");
            double val = dspOutput.FrameEnergy.Min();

            sb.AppendLine(
                "Minimum dB / frame       =" + (10 * Math.Log10(val)).ToString("F2") + "  (See Notes 2, 3 & 4)");
            val = dspOutput.FrameEnergy.Max();
            sb.AppendLine("Maximum dB / frame       =" + (10 * Math.Log10(val)).ToString("F2"));

            sb.AppendLine("\ndB NOISE SUBTRACTION");
            double noiseRange = 2.0;

            //sb.AppendLine("Noise (estimate of mode) =" + sonogram.SnrData.NoiseSubtracted.ToString("F3") + " dB   (See Note 5)");
            //double noiseSpan = sonogram.SnrData.NoiseRange;
            //sb.AppendLine("Noise range              =" + noiseSpan.ToString("F2") + " to +" + (noiseSpan * -1).ToString("F2") + " dB   (See Note 6)");
            //sb.AppendLine("SNR (max frame-noise)    =" + sonogram.SnrData.Snr.ToString("F2") + " dB   (See Note 7)");

            //sb.Append("\nSEGMENTATION PARAMETERS");
            //sb.Append("Segment Thresholds K1: {0:f2}.  K2: {1:f2}  (See Note 8)", segK1, segK2);
            //sb.Append("# Event Count = " + predictedEvents.Count());

            FileTools.Append2TextFile(outputTxtPath.FullName, sb.ToString());
            FileTools.Append2TextFile(outputTxtPath.FullName, GetSNRNotes(noiseRange).ToString());

            // (F) ################################## DRAW IMAGE 1: original spectorgram
            Log.WriteLine("# Start drawing noise reduced sonograms.");
            TimeSpan X_AxisInterval = TimeSpan.FromSeconds(1);

            //int Y_AxisInterval = (int)Math.Round(1000 / dspOutput.FreqBinWidth);
            int nyquist    = recording.SampleRate / 2;
            int hzInterval = 1000;

            var image1 = DrawSonogram(deciBelSpectrogram, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval);

            // (G) ################################## Calculate modal background noise spectrum in decibels
            //double SD_COUNT = -0.5; // number of SDs above the mean for noise removal
            //NoiseReductionType nrt = NoiseReductionType.MODAL;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, SD_COUNT);

            //double upperPercentileBound = 0.2;    // lowest percentile for noise removal
            //NoiseReductionType nrt = NoiseReductionType.LOWEST_PERCENTILE;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound);

            // (H) ################################## Calculate BRIGGS noise removal from amplitude spectrum
            int percentileBound = 20; // low energy percentile for noise removal

            //double binaryThreshold   = 0.6;   //works for higher SNR recordings
            double binaryThreshold = 0.4; //works for lower SNR recordings

            //double binaryThreshold = 0.3;   //works for lower SNR recordings
            double[,] m = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetMask(
                amplitudeSpectrogram,
                percentileBound,
                binaryThreshold);

            string title  = "TITLE";
            var    image2 = NoiseRemoval_Briggs.DrawSonogram(
                m,
                wavDuration,
                X_AxisInterval,
                stepDuration,
                nyquist, hzInterval,
                title);

            //Image image2 = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetSonograms(amplitudeSpectrogram, upperPercentileBound, binaryThreshold,
            //                                                                          wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval);

            // (I) ################################## Calculate MEDIAN noise removal from amplitude spectrum

            //double upperPercentileBound = 0.8;    // lowest percentile for noise removal
            //NoiseReductionType nrt = NoiseReductionType.MEDIAN;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound);

            //double[,] noiseReducedSpectrogram1 = tuple.Item1;  //
            //double[] noiseProfile              = tuple.Item2;  // smoothed modal profile

            //SNR.NoiseProfile dBProfile = SNR.CalculateNoiseProfile(deciBelSpectrogram, SD_COUNT);       // calculate noise value for each freq bin.
            //double[] noiseProfile = DataTools.filterMovingAverage(dBProfile.noiseThresholds, 7);        // smooth modal profile
            //double[,] noiseReducedSpectrogram1 = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, dBProfile.noiseThresholds);
            //Image image2 = DrawSonogram(noiseReducedSpectrogram1, wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval);

            var combinedImage = ImageTools.CombineImagesVertically(image1, image2);

            string imagePath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".png");

            combinedImage.Save(imagePath);

            // (G) ################################## Calculate modal background noise spectrum in decibels

            Log.WriteLine("# Finished recording:- " + input.Name);
        }
コード例 #29
0
        //////public static IndexCalculateResult Analysis(
        public static SpectralIndexValuesForContentDescription Analysis(
            AudioRecording recording,
            TimeSpan segmentOffsetTimeSpan,
            int sampleRateOfOriginalAudioFile,
            bool returnSonogramInfo = false)
        {
            // returnSonogramInfo = true; // if debugging
            double epsilon    = recording.Epsilon;
            int    sampleRate = recording.WavReader.SampleRate;

            //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds);

            // Get FRAME parameters for the calculation of Acoustic Indices
            int frameSize = ContentSignatures.FrameSize;
            int frameStep = frameSize;                                 // that is, windowOverlap = zero

            double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            var    frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            // INITIALISE a RESULTS STRUCTURE TO return
            // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc.
            var config          = new IndexCalculateConfig(); // sets some default values
            int freqBinCount    = frameSize / 2;
            var indexProperties = GetIndexProperties();
            ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config);
            var spectralIndices = new SpectralIndexValuesForContentDescription();

            ///////result.SummaryIndexValues = null;
            ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues;

            // set up default spectrogram to return
            ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null;
            ///////result.Hits = null;
            ///////result.TrackScores = new List<Plot>();

            // ################################## FINISHED SET-UP
            // ################################## NOW GET THE AMPLITUDE SPECTROGRAM

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns.
            var dspOutput1           = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep);
            var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram;

            // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ##################################
            // Get the oscillation spectral index OSC separately from signal because need a different frame size etc.

            var sampleLength       = Oscillations2014.DefaultSampleLength;
            var frameLength        = Oscillations2014.DefaultFrameLength;
            var sensitivity        = Oscillations2014.DefaultSensitivityThreshold;
            var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity);

            // double length of the vector because want to work with 256 element vector for spectrogram purposes
            spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort);

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ##################################

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2.
            // original sample rate can be anything 11.0-44.1 kHz.
            int originalNyquist = sampleRateOfOriginalAudioFile / 2;

            // if up-sampling has been done
            if (dspOutput1.NyquistFreq > originalNyquist)
            {
                dspOutput1.NyquistFreq = originalNyquist;
                dspOutput1.NyquistBin  = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change
            }

            // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram);

            // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration
            double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram);
            for (int i = 0; i < temporalEntropySpectrum.Length; i++)
            {
                temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i];
            }

            spectralIndices.ENT = temporalEntropySpectrum;

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ##################################

            // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);
            spectralIndices.BGN = spectralDecibelBgn;

            // ii: Calculate the noise reduced decibel spectrogram derived from segment recording.
            //     REUSE the var decibelSpectrogram but this time using dspOutput1.
            decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM
            spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram);

            // ######################################################################################################################################################
            // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0
            //           FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth
            // dB THRESHOLD for calculating spectral coverage
            double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb;

            // Calculate lower and upper boundary bin ids.
            // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony.
            int midFreqBound   = config.MidFreqBound;
            int lowFreqBound   = config.LowFreqBound;
            int lowerBinBound  = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth);
            int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth);
            var spActivity     = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound);

            //spectralIndices.CVR = spActivity.CoverSpectrum;
            spectralIndices.EVN = spActivity.EventSpectrum;

            ///////result.TrackScores = null;
            ///////return result;
            return(spectralIndices);
        } // end calculation of Six Spectral Indices
コード例 #30
0
        /*
         *
         *      /// <summary>
         *      /// Detects oscillations in a given freq bin.
         *      /// there are several important parameters for tuning.
         *      /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity.
         *      ///     Do not want too short because too small a range of oscillations
         *      /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coeff returned by DCT.
         *      ///     Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them.
         *      /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted.
         *      ///     The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned.
         *      /// </summary>
         *      /// <param name="minBin">min freq bin of search band</param>
         *      /// <param name="maxBin">max freq bin of search band</param>
         *      /// <param name="dctLength">number of values</param>
         *      /// <param name="DCTindex">Sets lower bound for oscillations of interest.</param>
         *      /// <param name="minAmplitude">threshold - do not accept a DCT value if its amplitude is less than this threshold</param>
         *      public static Double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz,
         *                                                 double dctDuration, int minOscilFreq, int maxOscilFreq, double minAmplitude)
         *      {
         *          int minBin = (int)(minHz / sonogram.FBinWidth);
         *          int maxBin = (int)(maxHz / sonogram.FBinWidth);
         *
         *          int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration);
         *          int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
         *          int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
         *          if (maxIndex > dctLength) maxIndex = dctLength; //safety check in case of future changes to code.
         *
         *          int rows = sonogram.Data.GetLength(0);
         *          int cols = sonogram.Data.GetLength(1);
         *          Double[,] hits = new Double[rows, cols];
         *          Double[,] matrix = sonogram.Data;
         *          //matrix = ImageTools.WienerFilter(sonogram.Data, 3);// DO NOT USE - SMUDGES EVERYTHING
         *
         *
         *          double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients
         *          //following two lines write matrix of cos values for checking.
         *          //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt";
         *          //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3");
         *
         *          //following two lines write bmp image of cos values for checking.
         *          //string fPath = @"C:\SensorNetworks\Output\cosines.bmp";
         *          //ImageTools.DrawMatrix(cosines, fPath);
         *
         *
         *
         *          // traverse columns - skip DC column
         *
         *
         *          for (int c = minBin; c <= maxBin; c++)                    {
         *              for (int r = 0; r < rows - dctLength; r++)
         *              {
         *                  var array = new double[dctLength];
         *                  //accumulate J columns of values
         *                  for (int i = 0; i < dctLength; i++)
         *                      for (int j = 0; j < 5; j++) array[i] += matrix[r + i, c + j];
         *
         *                  array = DataTools.SubtractMean(array);
         *                  //     DataTools.writeBarGraph(array);
         *
         *                  double[] dct = MFCCStuff.DCT(array, cosines);
         *                  for (int i = 0; i < dctLength; i++) dct[i] = Math.Abs(dct[i]);//convert to absolute values
         *                  dct[0] = 0.0; dct[1] = 0.0; dct[2] = 0.0; dct[3] = 0.0; dct[4] = 0.0;//remove low freq oscillations from consideration
         *                  dct = DataTools.normalise2UnitLength(dct);
         *                  //dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues
         *                  int indexOfMaxValue = DataTools.GetMaxIndex(dct);
         *                  double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi
         *
         *                  //DataTools.MinMax(dct, out min, out max);
         *                  //      DataTools.writeBarGraph(dct);
         *
         *                  //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
         *                  if ((indexOfMaxValue >= minIndex) && (indexOfMaxValue <= maxIndex) && (dct[indexOfMaxValue] > minAmplitude))
         *                  {
         *                      for (int i = 0; i < dctLength; i++) hits[r + i, c] = oscilFreq;
         *                  }
         *                  r += 5; //skip rows
         *              }
         *              c++; //do alternate columns
         *          }
         *          return hits;
         *      }
         */

        public static double[] DetectOscillationsInScoreArray(double[] scoreArray, double dctDuration, double timeScale, double dctThreshold,
                                                              bool normaliseDCT, int minOscilFreq, int maxOscilFreq)
        {
            int dctLength = (int)Math.Round(timeScale * dctDuration);
            int minIndex  = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
            int maxIndex  = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi

            if (maxIndex > dctLength)
            {
                maxIndex = dctLength; //safety check in case of future changes to code.
            }

            int length = scoreArray.Length;

            double[] hits = new double[length];

            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients

            //following two lines write matrix of cos values for checking.
            //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt";
            //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3");

            //following two lines write bmp image of cos values for checking.
            //string fPath = @"C:\SensorNetworks\Output\cosines.bmp";
            //ImageTools.DrawMatrix(cosines, fPath);

            for (int r = 0; r < length - dctLength; r++)
            {
                var array = new double[dctLength];

                //transfer values
                for (int i = 0; i < dctLength; i++)
                {
                    array[i] = scoreArray[r + i];
                }

                array = DataTools.SubtractMean(array);

                //     DataTools.writeBarGraph(array);

                double[] dct = MFCCStuff.DCT(array, cosines);
                for (int i = 0; i < dctLength; i++)
                {
                    dct[i] = Math.Abs(dct[i]); //convert to absolute values
                }

                for (int i = 0; i < 5; i++)
                {
                    dct[i] = 0.0;   //remove low freq oscillations from consideration
                }

                if (normaliseDCT)
                {
                    dct = DataTools.normalise2UnitLength(dct);
                }

                int    indexOfMaxValue = DataTools.GetMaxIndex(dct);
                double oscilFreq       = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi

                //      DataTools.writeBarGraph(dct);
                //LoggedConsole.WriteLine("oscilFreq = " + oscilFreq);

                //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
                if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold)
                {
                    hits[r]     = dct[indexOfMaxValue];
                    hits[r + 1] = dct[indexOfMaxValue]; // because skipping rows.

                    //for (int i = 0; i < dctLength; i++) if (hits[r + i] < dct[indexOfMaxValue]) hits[r + i] = dct[indexOfMaxValue];
                }

                r += 1; //skip rows
            }

            return(hits);
        }