예제 #1
0
        public static double[,] GetSpectrogramMatrix(AudioRecording recordingSegment, int frameLength)
        {
            // set up the default songram config object
            // var sonoConfig = new SonogramConfig
            // {
            //     WindowSize = frameLength,
            //     WindowOverlap = 0.0,
            // };

            // BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader);

            // Taking the decibel spectrogram also works
            // BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader);

            // Do not do noise removal - it is unnecessary because only taking frequency bins three at a time.
            // Can take square-root of amplitude spectrogram. This emphasizes the low amplitude features.
            // Omitting sqrt results in detection of fewer oscillations
            //sonogram.Data = MatrixTools.SquareRootOfValues(sonogram.Data);

            // remove the DC bin if it has not already been removed.
            // Assume test of divisible by 2 is good enough.
            // int binCount = sonogram.Data.GetLength(1);
            // if (!binCount.IsEven())
            // {
            //     sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, binCount - 1);
            // }

            // AT: Switched to below method of extracting the spectrogram because BaseSonogram
            // does not allow small spectrograms (less than 0.2s) to calculated.

            var fft = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameLength, frameLength);

            return(fft.AmplitudeSpectrogram);
        }
        public static double[,] GetDecibelSpectrogramNoiseReduced(AudioRecording recording, int frameSize)
        {
            int frameStep = frameSize;

            // get decibel spectrogram
            var results     = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep);
            var spectrogram = MFCCStuff.DecibelSpectra(results.AmplitudeSpectrogram, results.WindowPower, recording.SampleRate, recording.Epsilon);

            // remove background noise from spectrogram
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram);
            spectrogram = SNR.TruncateBgNoiseFromSpectrogram(spectrogram, spectralDecibelBgn);
            spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, nhThreshold: 3.0);
            return(spectrogram);
        }
        public static double[,] GetAmplitudeSpectrogramNoiseReduced(AudioRecording recording, int frameSize)
        {
            int frameStep = frameSize;

            // get amplitude spectrogram and remove the DC column ie column zero.
            var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep);

            // remove background noise from the full amplitude spectrogram
            const double sdCount             = 0.1;
            const double spectralBgThreshold = 0.003;                                                                          // SPECTRAL AMPLITUDE THRESHOLD for smoothing background
            var          profile             = NoiseProfile.CalculateModalNoiseProfile(results.AmplitudeSpectrogram, sdCount); //calculate noise profile - assumes a dB spectrogram.

            double[] noiseValues          = DataTools.filterMovingAverage(profile.NoiseThresholds, 7);                         // smooth the noise profile
            var      amplitudeSpectrogram = SNR.NoiseReduce_Standard(results.AmplitudeSpectrogram, noiseValues, spectralBgThreshold);

            return(amplitudeSpectrogram);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="AmplitudeSpectrogram"/> class.
        /// </summary>
        public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav)
        {
            this.Configuration = config;
            this.Attributes    = new SpectrogramAttributes();

            double minDuration = 1.0;

            if (wav.Time.TotalSeconds < minDuration)
            {
                LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration);
                return;
            }

            //set attributes for the current recording and spectrogram type
            this.Attributes.SampleRate       = wav.SampleRate;
            this.Attributes.Duration         = wav.Time;
            this.Attributes.NyquistFrequency = wav.SampleRate / 2;
            this.Attributes.Duration         = wav.Time;
            this.Attributes.MaxAmplitude     = wav.CalculateMaximumAmplitude();
            this.Attributes.FrameDuration    = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate);

            var recording = new AudioRecording(wav);
            var fftdata   = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                config.WindowSize,
                config.WindowOverlap,
                this.Configuration.WindowFunction);

            // now recover required data
            //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value.
            this.Attributes.Epsilon     = fftdata.Epsilon;
            this.Attributes.WindowPower = fftdata.WindowPower;
            this.Attributes.FrameCount  = fftdata.FrameCount;
            this.Data = fftdata.AmplitudeSpectrogram;

            // IF REQUIRED CONVERT TO MEL SCALE
            if (this.Configuration.DoMelScale)
            {
                // this mel scale conversion uses the "Greg integral" !
                this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency);
            }
        }
예제 #5
0
        public static double SimilarityIndex2(double[] channelL, double[] channelR, double epsilon, int sampleRate)
        {
            //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep);
            int frameSize = 512;
            int frameStep = 512;

            var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
            var spgrmL     = dspOutputL.AmplitudeSpectrogram;

            var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
            var spgrmR     = dspOutputR.AmplitudeSpectrogram;

            double similarityIndex = 0;

            // get spgrm dimensions - assume both spgrms have same dimensions
            int rowCount = spgrmL.GetLength(0);
            int colCount = spgrmL.GetLength(1);

            for (int r = 0; r < rowCount; r++)
            {
                for (int c = 0; c < colCount; c++)
                {
                    double min   = Math.Min(spgrmL[r, c], spgrmR[r, c]);
                    double max   = Math.Max(spgrmL[r, c], spgrmR[r, c]);
                    double index = 0;
                    if (max <= 0.000001)
                    {
                        index = min / 0.000001;
                    } // to prevent division by zero.
                    else

                    //{ index = min / max; }
                    {
                        index = min * min / (max * max);
                    }

                    similarityIndex += index; // / Math.Max(L, R);
                }
            }

            return(similarityIndex / (rowCount * colCount));
        }
예제 #6
0
        public void TestStandardNoiseRemoval()
        {
            var recording  = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));
            int windowSize = 512;
            var sr         = recording.SampleRate;

            // window overlap is used only for sonograms. It is not used when calculating acoustic indices.
            double windowOverlap  = 0.0;
            var    windowFunction = WindowFunctions.HAMMING.ToString();

            var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                windowSize,
                windowOverlap,
                windowFunction);

            // Now recover the data
            // The following data is required when constructing sonograms
            //var duration = recording.WavReader.Time;
            //var frameCount = fftdata.FrameCount;
            //var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames;

            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(fftdata.AmplitudeSpectrogram, fftdata.WindowPower, sr, fftdata.Epsilon);

            // The following call to NoiseProfile.CalculateBackgroundNoise(double[,] spectrogram)
            // returns a noise profile that is used as the BGN spectral index.
            // It calculates the modal background noise for each freqeuncy bin and then returns a smoothed version.
            // By default, the number of SDs = 0 and the smoothing window = 7.
            // Method assumes that the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param>

            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram);

            var resourcesDir         = PathHelper.ResolveAssetPath("Indices");
            var expectedSpectrumFile = new FileInfo(resourcesDir + "\\NoiseProfile.bin");

            //Binary.Serialize(expectedSpectrumFile, spectralDecibelBgn);
            var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile);

            CollectionAssert.That.AreEqual(expectedVector, spectralDecibelBgn, 0.000_000_001);
        }
        } // LocalPeaks()

        /// <summary>
        /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG
        /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds,
        /// because need to recalculate background noise etc.
        /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold);
        /// NOTE: We require a noise reduced decibel spectrogram
        /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth.
        /// </summary>
        public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold)
        {
            double epsilon          = recording.Epsilon;
            int    sampleRate       = recording.WavReader.SampleRate;
            int    bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges.
            int    ridgeBuffer      = frameSize * bufferFrameCount;
            var    ridgeRecording   = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer);
            int    frameStep        = frameSize;
            var    dspOutput        = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep);

            // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
            // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
            double[,] decibelSpectrogram;
            if (octaveScale)
            {
                var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000);
                decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale);
            }
            else
            {
                decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon);
            }

            // calculate the noise profile
            var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background

            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold);

            // thresholds in decibels
            // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold);

            return(sptInfo);
        }
예제 #8
0
        public static void AssertFrequencyInSignal(WavReader wavReader, double[] signal, int[] frequencies, int variance = 1)
        {
            var fft = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(signal, wavReader.SampleRate, wavReader.Epsilon, 512, 0.0);

            var histogram = SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(fft.AmplitudeSpectrogram);

            var    max       = histogram.Max();
            double threshold = max * 0.8;
            var    highBins  = frequencies.Select(f => (int)(f / fft.FreqBinWidth)).ToArray();

            bool isOk = true;

            for (int bin = 0; bin < histogram.Length; bin++)
            {
                var value = histogram[bin];
                if (value > threshold)
                {
                    bool anyMatch = false;
                    foreach (var highBin in highBins)
                    {
                        if (bin >= highBin - variance && bin <= highBin + variance)
                        {
                            anyMatch = true;
                            break;
                        }
                    }

                    isOk = anyMatch;
                }

                if (!isOk)
                {
                    break;
                }
            }

            BaseTest.Assert.IsTrue(isOk);
        }
예제 #9
0
        /// <summary>
        /// Calculates the following spectrograms as per settings in the Images array in the config file: Towsey.SpectrogramGenerator.yml:
        /// Waveform.
        /// DecibelSpectrogram.
        /// DecibelSpectrogramNoiseReduced.
        /// CepstralSpectrogram.
        /// DifferenceSpectrogram.
        /// AmplitudeSpectrogramLocalContrastNormalization.
        /// Experimental.
        /// Comment the config.yml file with a hash, those spectrograms that are not required.
        /// </summary>
        /// <param name="sourceRecording">The name of the original recording.</param>
        /// <param name="config">Contains parameter info to make spectrograms.</param>
        /// <param name="sourceRecordingName">.Name of source recording. Required only spectrogram labels.</param>
        public static AudioToSonogramResult GenerateSpectrogramImages(
            FileInfo sourceRecording,
            SpectrogramGeneratorConfig config,
            string sourceRecordingName)
        {
            //int signalLength = recordingSegment.WavReader.GetChannel(0).Length;
            var recordingSegment = new AudioRecording(sourceRecording.FullName);
            int sampleRate       = recordingSegment.WavReader.SampleRate;
            var result           = new AudioToSonogramResult();

            var requestedImageTypes = config.Images ?? new[] { SpectrogramImageType.DecibelSpectrogram };
            var @do = requestedImageTypes.ToHashSet();

            int frameSize = config.GetIntOrNull("FrameLength") ?? 512;
            int frameStep = config.GetIntOrNull("FrameStep") ?? 441;

            // must calculate this because used later on.
            double frameOverlap = (frameSize - frameStep) / (double)frameSize;

            // Default noiseReductionType = Standard
            var bgNoiseThreshold = config.BgNoiseThreshold;

            // threshold for drawing the difference spectrogram
            var differenceThreshold = config.DifferenceThreshold;

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep);

            var sonoConfig = new SonogramConfig()
            {
                epsilon                 = recordingSegment.Epsilon,
                SampleRate              = sampleRate,
                WindowSize              = frameSize,
                WindowStep              = frameStep,
                WindowOverlap           = frameOverlap,
                WindowPower             = dspOutput1.WindowPower,
                Duration                = recordingSegment.Duration,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = bgNoiseThreshold,
            };

            var images = new Dictionary <SpectrogramImageType, Image <Rgb24> >(requestedImageTypes.Length);

            // IMAGE 1) draw the WAVEFORM
            if (@do.Contains(Waveform))
            {
                var minValues     = dspOutput1.MinFrameValues;
                var maxValues     = dspOutput1.MaxFrameValues;
                int height        = config.WaveformHeight;
                var waveformImage = GetWaveformImage(minValues, maxValues, height);

                // add in the title bar and time scales.
                string title =
                    $"WAVEFORM - {sourceRecordingName} (min value={dspOutput1.MinSignalValue:f3}, max value={dspOutput1.MaxSignalValue:f3})";
                var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
                    title,
                    waveformImage.Width,
                    ImageTags[Waveform]);
                var      startTime          = TimeSpan.Zero;
                var      xAxisTicInterval   = TimeSpan.FromSeconds(1);
                TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(frameStep / (double)sampleRate);
                var      labelInterval      = TimeSpan.FromSeconds(5);
                waveformImage = BaseSonogram.FrameSonogram(
                    waveformImage,
                    titleBar,
                    startTime,
                    xAxisTicInterval,
                    xAxisPixelDuration,
                    labelInterval);
                images.Add(Waveform, waveformImage);
            }

            // Draw various decibel spectrograms
            var decibelTypes = new[] { SpectrogramImageType.DecibelSpectrogram, DecibelSpectrogramNoiseReduced, DifferenceSpectrogram, Experimental };

            if (@do.Overlaps(decibelTypes))
            {
                // disable noise removal for first two spectrograms
                var disabledNoiseReductionType = sonoConfig.NoiseReductionType;
                sonoConfig.NoiseReductionType = NoiseReductionType.None;

                //Get the decibel spectrogram
                var decibelSpectrogram = new SpectrogramStandard(sonoConfig, dspOutput1.AmplitudeSpectrogram);
                result.DecibelSpectrogram   = decibelSpectrogram;
                double[,] dbSpectrogramData = (double[, ])decibelSpectrogram.Data.Clone();

                // IMAGE 2) Display the DecibelSpectrogram
                if (@do.Contains(SpectrogramImageType.DecibelSpectrogram))
                {
                    images.Add(
                        SpectrogramImageType.DecibelSpectrogram,
                        decibelSpectrogram.GetImageFullyAnnotated(
                            $"DECIBEL SPECTROGRAM ({sourceRecordingName})",
                            ImageTags[SpectrogramImageType.DecibelSpectrogram]));
                }

                if (@do.Overlaps(new[] { DecibelSpectrogramNoiseReduced, Experimental, CepstralSpectrogram }))
                {
                    sonoConfig.NoiseReductionType      = disabledNoiseReductionType;
                    sonoConfig.NoiseReductionParameter = bgNoiseThreshold;
                    double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram.Data);
                    decibelSpectrogram.Data =
                        SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram.Data, spectralDecibelBgn);
                    decibelSpectrogram.Data =
                        SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram.Data, nhThreshold: bgNoiseThreshold);

                    // IMAGE 3) DecibelSpectrogram - noise reduced
                    if (@do.Contains(DecibelSpectrogramNoiseReduced))
                    {
                        images.Add(
                            DecibelSpectrogramNoiseReduced,
                            decibelSpectrogram.GetImageFullyAnnotated(
                                $"DECIBEL SPECTROGRAM + Lamel noise subtraction. ({sourceRecordingName})",
                                ImageTags[DecibelSpectrogramNoiseReduced]));
                    }

                    // IMAGE 4) EXPERIMENTAL Spectrogram
                    if (@do.Contains(Experimental))
                    {
                        sonoConfig.NoiseReductionType = disabledNoiseReductionType;
                        images.Add(
                            Experimental,
                            GetDecibelSpectrogram_Ridges(
                                dbSpectrogramData,
                                decibelSpectrogram,
                                sourceRecordingName));
                    }
                }

                // IMAGE 5) draw difference spectrogram. This is derived from the original decibel spectrogram
                if (@do.Contains(DifferenceSpectrogram))
                {
                    //var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0;
                    var differenceImage = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold);
                    differenceImage = BaseSonogram.GetImageAnnotatedWithLinearHertzScale(
                        differenceImage,
                        sampleRate,
                        frameStep,
                        $"DECIBEL DIFFERENCE SPECTROGRAM ({sourceRecordingName})",
                        ImageTags[DifferenceSpectrogram]);
                    images.Add(DifferenceSpectrogram, differenceImage);
                }
            }

            // IMAGE 6) Cepstral Spectrogram
            if (@do.Contains(CepstralSpectrogram))
            {
                images.Add(
                    CepstralSpectrogram,
                    GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
            }

            // IMAGE 7) AmplitudeSpectrogram_LocalContrastNormalization
            if (@do.Contains(AmplitudeSpectrogramLocalContrastNormalization))
            {
                var neighborhoodSeconds  = config.NeighborhoodSeconds;
                var lcnContrastParameter = config.LcnContrastLevel;
                images.Add(
                    AmplitudeSpectrogramLocalContrastNormalization,
                    GetLcnSpectrogram(
                        sonoConfig,
                        recordingSegment,
                        sourceRecordingName,
                        neighborhoodSeconds,
                        lcnContrastParameter));
            }

            // now pick and combine images in order user specified
            var sortedImages = requestedImageTypes.Select(x => images[x]);

            // COMBINE THE SPECTROGRAM IMAGES
            result.CompositeImage = ImageTools.CombineImagesVertically(sortedImages.ToArray());
            return(result);
        }
예제 #10
0
        /// <summary>
        /// Calculates the following spectrograms as per content of config.yml file:
        /// Waveform: true.
        /// DifferenceSpectrogram: true.
        /// DecibelSpectrogram: true.
        /// DecibelSpectrogram_NoiseReduced: true.
        /// DecibelSpectrogram_Ridges: true.
        /// AmplitudeSpectrogram_LocalContrastNormalization: true.
        /// SoxSpectrogram: false.
        /// Experimental: true.
        /// </summary>
        /// <param name="sourceRecording">The name of the original recording.</param>
        /// <param name="configInfo">Contains parameter info to make spectrograms.</param>
        /// <param name="sourceRecordingName">.Name of source recording. Required only spectrogram labels.</param>
        public static AudioToSonogramResult GenerateSpectrogramImages(
            FileInfo sourceRecording,
            AnalyzerConfig configInfo,
            string sourceRecordingName)
        {
            //int signalLength = recordingSegment.WavReader.GetChannel(0).Length;
            var recordingSegment = new AudioRecording(sourceRecording.FullName);
            int sampleRate       = recordingSegment.WavReader.SampleRate;
            var result           = new AudioToSonogramResult();

            // init the image stack
            var list = new List <Image>();

            bool doWaveForm                = configInfo.GetBoolOrNull("Waveform") ?? false;
            bool doDecibelSpectrogram      = configInfo.GetBoolOrNull("DecibelSpectrogram") ?? false;
            bool doNoiseReducedSpectrogram = configInfo.GetBoolOrNull("DecibelSpectrogram_NoiseReduced") ?? true;
            bool doDifferenceSpectrogram   = configInfo.GetBoolOrNull("DifferenceSpectrogram") ?? false;
            bool doLcnSpectrogram          = configInfo.GetBoolOrNull("AmplitudeSpectrogram_LocalContrastNormalization") ?? false;
            bool doCepstralSpectrogram     = configInfo.GetBoolOrNull("CepstralSpectrogram") ?? false;
            bool doExperimentalSpectrogram = configInfo.GetBoolOrNull("Experimental") ?? false;

            //Don't do SOX spectrogram.
            //bool doSoxSpectrogram = configInfo.GetBool("SoxSpectrogram");

            int frameSize = configInfo.GetIntOrNull("FrameLength") ?? 512;
            int frameStep = configInfo.GetIntOrNull("FrameStep") ?? 0;

            // must calculate this because used later on.
            double frameOverlap = (frameSize - frameStep) / (double)frameSize;

            // Default noiseReductionType = Standard
            var bgNoiseThreshold = configInfo.GetDoubleOrNull("BgNoiseThreshold") ?? 3.0;

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep);

            var sonoConfig = new SonogramConfig()
            {
                epsilon                 = recordingSegment.Epsilon,
                SampleRate              = sampleRate,
                WindowSize              = frameSize,
                WindowStep              = frameStep,
                WindowOverlap           = frameOverlap,
                WindowPower             = dspOutput1.WindowPower,
                Duration                = recordingSegment.Duration,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = bgNoiseThreshold,
            };

            // IMAGE 1) draw the WAVEFORM
            if (doWaveForm)
            {
                var minValues     = dspOutput1.MinFrameValues;
                var maxValues     = dspOutput1.MaxFrameValues;
                int height        = configInfo.GetIntOrNull("WaveformHeight") ?? 180;
                var waveformImage = GetWaveformImage(minValues, maxValues, height);

                // add in the title bar and time scales.
                string   title              = $"WAVEFORM - {sourceRecordingName} (min value={dspOutput1.MinSignalValue:f3}, max value={dspOutput1.MaxSignalValue:f3})";
                var      titleBar           = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(title, waveformImage.Width);
                var      startTime          = TimeSpan.Zero;
                var      xAxisTicInterval   = TimeSpan.FromSeconds(1);
                TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(frameStep / (double)sampleRate);
                var      labelInterval      = TimeSpan.FromSeconds(5);
                waveformImage = BaseSonogram.FrameSonogram(waveformImage, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval);
                list.Add(waveformImage);
            }

            // Draw various decibel spectrograms
            if (doDecibelSpectrogram || doNoiseReducedSpectrogram || doDifferenceSpectrogram || doExperimentalSpectrogram)
            {
                // disable noise removal for first spectrogram
                var disabledNoiseReductionType = sonoConfig.NoiseReductionType;
                sonoConfig.NoiseReductionType = NoiseReductionType.None;

                //Get the decibel spectrogram
                var decibelSpectrogram = new SpectrogramStandard(sonoConfig, dspOutput1.AmplitudeSpectrogram);
                result.DecibelSpectrogram   = decibelSpectrogram;
                double[,] dbSpectrogramData = (double[, ])decibelSpectrogram.Data.Clone();

                // IMAGE 2) DecibelSpectrogram
                if (doDecibelSpectrogram)
                {
                    var image3 = decibelSpectrogram.GetImageFullyAnnotated($"DECIBEL SPECTROGRAM ({sourceRecordingName})");
                    list.Add(image3);
                }

                if (doNoiseReducedSpectrogram || doExperimentalSpectrogram || doDifferenceSpectrogram)
                {
                    sonoConfig.NoiseReductionType      = disabledNoiseReductionType;
                    sonoConfig.NoiseReductionParameter = bgNoiseThreshold;
                    double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram.Data);
                    decibelSpectrogram.Data = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram.Data, spectralDecibelBgn);
                    decibelSpectrogram.Data = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram.Data, nhThreshold: bgNoiseThreshold);

                    // IMAGE 3) DecibelSpectrogram - noise reduced
                    if (doNoiseReducedSpectrogram)
                    {
                        var image4 = decibelSpectrogram.GetImageFullyAnnotated($"DECIBEL SPECTROGRAM + Lamel noise subtraction. ({sourceRecordingName})");
                        list.Add(image4);
                    }

                    // IMAGE 4) EXPERIMENTAL Spectrogram
                    if (doExperimentalSpectrogram)
                    {
                        sonoConfig.NoiseReductionType = disabledNoiseReductionType;
                        var image5 = GetDecibelSpectrogram_Ridges(dbSpectrogramData, decibelSpectrogram, sourceRecordingName);
                        list.Add(image5);
                    }

                    // IMAGE 5) draw difference spectrogram
                    if (doDifferenceSpectrogram)
                    {
                        var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0;
                        var image6 = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold);
                        image6 = BaseSonogram.GetImageAnnotatedWithLinearHertzScale(image6, sampleRate, frameStep, $"DECIBEL DIFFERENCE SPECTROGRAM ({sourceRecordingName})");
                        list.Add(image6);
                    }
                }
            }

            // IMAGE 6) Cepstral Spectrogram
            if (doCepstralSpectrogram)
            {
                var image6 = GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName);
                list.Add(image6);
            }

            // 7) AmplitudeSpectrogram_LocalContrastNormalization
            if (doLcnSpectrogram)
            {
                var neighbourhoodSeconds = configInfo.GetDoubleOrNull("NeighbourhoodSeconds") ?? 0.5;
                var lcnContrastParameter = configInfo.GetDoubleOrNull("LcnContrastLevel") ?? 0.4;
                var image8 = GetLcnSpectrogram(sonoConfig, recordingSegment, sourceRecordingName, neighbourhoodSeconds, lcnContrastParameter);
                list.Add(image8);
            }

            // 8) SOX SPECTROGRAM
            //if (doSoxSpectrogram)
            //{
            //Log.Warn("SoX spectrogram set to true but is ignored when running as an IAnalyzer");

            // The following parameters were once used to implement a sox spectrogram.
            //bool makeSoxSonogram = configuration.GetBoolOrNull(AnalysisKeys.MakeSoxSonogram) ?? false;
            //configDict[AnalysisKeys.SonogramTitle] = configuration[AnalysisKeys.SonogramTitle] ?? "Sonogram";
            //configDict[AnalysisKeys.SonogramComment] = configuration[AnalysisKeys.SonogramComment] ?? "Sonogram produced using SOX";
            //configDict[AnalysisKeys.SonogramColored] = configuration[AnalysisKeys.SonogramColored] ?? "false";
            //configDict[AnalysisKeys.SonogramQuantisation] = configuration[AnalysisKeys.SonogramQuantisation] ?? "128";
            //configDict[AnalysisKeys.AddTimeScale] = configuration[AnalysisKeys.AddTimeScale] ?? "true";
            //configDict[AnalysisKeys.AddAxes] = configuration[AnalysisKeys.AddAxes] ?? "true";
            //configDict[AnalysisKeys.AddSegmentationTrack] = configuration[AnalysisKeys.AddSegmentationTrack] ?? "true";
            //    var soxFile = new FileInfo(Path.Combine(output.FullName, sourceName + "SOX.png"));
            //    SpectrogramTools.MakeSonogramWithSox(sourceRecording, configDict, path2SoxSpectrogram);
            // list.Add(image7);
            //}

            // COMBINE THE SPECTROGRAM IMAGES
            result.CompositeImage = ImageTools.CombineImagesVertically(list);
            return(result);
        }
예제 #11
0
        /// <summary>
        /// Initializes a new instance of the <see cref="BaseSonogram"/> class.
        /// BASE CONSTRUCTOR
        /// This constructor contains all steps required to prepare the amplitude spectrogram.
        /// The third boolean parameter is simply a place-filler to ensure a different Constructor signature.
        /// from the principle Constructor which follows.
        /// </summary>
        /// <param name="config">config file to use.</param>
        /// <param name="wav">wav.</param>
        /// <param name="dummy">filler boolean. Calculate in method.</param>
        public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy)
            : this(config)
        {
            // As of 28 March 2017 drop capability to get sub-band of spectrogram because was not being used.
            // can be recovered later if desired.
            //bool doExtractSubband = this.SubBandMinHz > 0 || this.SubBandMaxHz < this.NyquistFrequency;

            this.Duration = wav.Time;
            double minDuration = 0.2;
            if (this.Duration.TotalSeconds < minDuration)
            {
                LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration);
                return;
            }

            //set config params to the current recording
            this.SampleRate               = wav.SampleRate;
            this.Configuration.Duration   = wav.Time;
            this.Configuration.SampleRate = wav.SampleRate; //also set the Nyquist
            this.MaxAmplitude             = wav.CalculateMaximumAmplitude();

            var recording = new AudioRecording(wav);
            var fftData   = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                config.WindowSize,
                config.WindowOverlap,
                this.Configuration.WindowFunction);

            // now recover required data
            //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value.
            this.Configuration.epsilon     = fftData.Epsilon;
            this.Configuration.WindowPower = fftData.WindowPower;
            this.FrameCount       = fftData.FrameCount;
            this.DecibelsPerFrame = fftData.FrameDecibels;

            //init normalised signal energy array but do nothing with it. This has to be done from outside
            this.DecibelsNormalised = new double[this.FrameCount];
            this.Data = fftData.AmplitudeSpectrogram;

            // ENERGY PER FRAME and NORMALISED dB PER FRAME AND SNR
            // currently DoSnr = true by default
            if (config.DoSnr)
            {
                // If the FractionOfHighEnergyFrames PRIOR to noise removal exceeds SNR.FractionalBoundForMode,
                // then Lamel's noise removal algorithm may not work well.
                if (fftData.FractionOfHighEnergyFrames > SNR.FractionalBoundForMode)
                {
                    Log.WriteIfVerbose("\nWARNING ##############");
                    Log.WriteIfVerbose(
                        "\t############### BaseSonogram(): This is a high energy recording. Percent of high energy frames = {0:f0} > {1:f0}%",
                        fftData.FractionOfHighEnergyFrames * 100,
                        SNR.FractionalBoundForMode * 100);
                    Log.WriteIfVerbose("\t############### Noise reduction algorithm may not work well in this instance!\n");
                }

                //AUDIO SEGMENTATION/END POINT DETECTION - based on Lamel et al
                // Setting segmentation/endpoint detection parameters is broken as of September 2014.
                // The next line is a hack replacement
                EndpointDetectionConfiguration.SetDefaultSegmentationConfig();
                this.SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsPerFrame, this.FrameStep);
            }

            /* AS OF 30 MARCH 2017, NO LONGER IMPLEMENT SUB-BAND THINGS, because not being used for years.
             * // EXTRACT REQUIRED FREQUENCY BAND
             * if (doExtractSubband)
             * {
             *  this.Data = SpectrogramTools.ExtractFreqSubband(
             *      this.Data,
             *      this.subBandMinHz,
             *      this.subBandMaxHz,
             *      this.Configuration.DoMelScale,
             *      this.Configuration.FreqBinCount,
             *      this.FBinWidth);
             *  this.CalculateSubbandSNR(this.Data);
             * }
             */
        }
예제 #12
0
        public static void Execute(Arguments arguments)
        {
            const string Title = "# DETERMINING SIGNAL TO NOISE RATIO IN RECORDING";
            string       date  = "# DATE AND TIME: " + DateTime.Now;

            Log.WriteLine(Title);
            Log.WriteLine(date);
            Log.Verbosity = 1;

            var input                    = arguments.Source;
            var sourceFileName           = input.Name;
            var outputDir                = arguments.Output;
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(input.FullName);
            var outputTxtPath            = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".txt").ToFileInfo();

            Log.WriteIfVerbose("# Recording file: " + input.FullName);
            Log.WriteIfVerbose("# Config file:    " + arguments.Config);
            Log.WriteIfVerbose("# Output folder =" + outputDir.FullName);
            FileTools.WriteTextFile(outputTxtPath.FullName, date + "\n# Recording file: " + input.FullName);

            //READ PARAMETER VALUES FROM INI FILE
            // load YAML configuration
            Config configuration = ConfigFile.Deserialize(arguments.Config);

            //ii: SET SONOGRAM CONFIGURATION
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName        = input.FullName;
            sonoConfig.WindowSize         = configuration.GetIntOrNull(AnalysisKeys.KeyFrameSize) ?? 512;
            sonoConfig.WindowOverlap      = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.5;
            sonoConfig.WindowFunction     = configuration[AnalysisKeys.KeyWindowFunction];
            sonoConfig.NPointSmoothFFT    = configuration.GetIntOrNull(AnalysisKeys.KeyNPointSmoothFft) ?? 256;
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType(configuration[AnalysisKeys.NoiseReductionType]);

            int    minHz          = configuration.GetIntOrNull("MIN_HZ") ?? 0;
            int    maxHz          = configuration.GetIntOrNull("MAX_HZ") ?? 11050;
            double segK1          = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K1") ?? 0;
            double segK2          = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K2") ?? 0;
            double latency        = configuration.GetDoubleOrNull("K1_K2_LATENCY") ?? 0;
            double vocalGap       = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0;
            double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0;

            //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true;    //options to draw sonogram

            //double intensityThreshold = Acoustics.AED.Default.intensityThreshold;
            //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]);
            //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold;
            //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD))   smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]);

            // COnvert input recording into wav
            var convertParameters = new AudioUtilityRequest {
                TargetSampleRate = 17640
            };
            var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav"));

            if (File.Exists(fileToAnalyse.FullName))
            {
                File.Delete(fileToAnalyse.FullName);
            }

            var convertedFileInfo = AudioFilePreparer.PrepareFile(
                input,
                fileToAnalyse,
                convertParameters,
                outputDir);

            // (A) ##########################################################################################################################
            AudioRecording recording              = new AudioRecording(fileToAnalyse.FullName);
            int            signalLength           = recording.WavReader.Samples.Length;
            TimeSpan       wavDuration            = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            double         frameDurationInSeconds = sonoConfig.WindowSize / (double)recording.SampleRate;
            TimeSpan       frameDuration          = TimeSpan.FromTicks((long)(frameDurationInSeconds * TimeSpan.TicksPerSecond));
            int            stepSize = (int)Math.Floor(sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap));
            double         stepDurationInSeconds = sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap)
                                                   / recording.SampleRate;
            TimeSpan stepDuration    = TimeSpan.FromTicks((long)(stepDurationInSeconds * TimeSpan.TicksPerSecond));
            double   framesPerSecond = 1 / stepDuration.TotalSeconds;
            int      frameCount      = signalLength / stepSize;

            // (B) ################################## EXTRACT ENVELOPE and SPECTROGRAM ##################################
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                sonoConfig.WindowSize,
                sonoConfig.WindowOverlap);

            //double[] avAbsolute = dspOutput.Average; //average absolute value over the minute recording

            // (C) ################################## GET SIGNAL WAVEFORM ##################################
            double[] signalEnvelope   = dspOutput.Envelope;
            double   avSignalEnvelope = signalEnvelope.Average();

            // (D) ################################## GET Amplitude Spectrogram ##################################
            double[,] amplitudeSpectrogram = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.

            // (E) ################################## Generate deciBel spectrogram from amplitude spectrogram
            double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1);

            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(
                dspOutput.AmplitudeSpectrogram,
                dspOutput.WindowPower,
                recording.SampleRate,
                epsilon);

            LoggedConsole.WriteLine("# Finished calculating decibel spectrogram.");

            StringBuilder sb = new StringBuilder();

            sb.AppendLine("\nSIGNAL PARAMETERS");
            sb.AppendLine("Signal Duration     =" + wavDuration);
            sb.AppendLine("Sample Rate         =" + recording.SampleRate);
            sb.AppendLine("Min Signal Value    =" + dspOutput.MinSignalValue);
            sb.AppendLine("Max Signal Value    =" + dspOutput.MaxSignalValue);
            sb.AppendLine("Max Absolute Ampl   =" + signalEnvelope.Max().ToString("F3") + "  (See Note 1)");
            sb.AppendLine("Epsilon Ampl (1 bit)=" + epsilon);

            sb.AppendLine("\nFRAME PARAMETERS");
            sb.AppendLine("Window Size    =" + sonoConfig.WindowSize);
            sb.AppendLine("Frame Count    =" + frameCount);
            sb.AppendLine("Envelope length=" + signalEnvelope.Length);
            sb.AppendLine("Frame Duration =" + frameDuration.TotalMilliseconds.ToString("F3") + " ms");
            sb.AppendLine("Frame overlap  =" + sonoConfig.WindowOverlap);
            sb.AppendLine("Step Size      =" + stepSize);
            sb.AppendLine("Step duration  =" + stepDuration.TotalMilliseconds.ToString("F3") + " ms");
            sb.AppendLine("Frames Per Sec =" + framesPerSecond.ToString("F1"));

            sb.AppendLine("\nFREQUENCY PARAMETERS");
            sb.AppendLine("Nyquist Freq    =" + dspOutput.NyquistFreq + " Hz");
            sb.AppendLine("Freq Bin Width  =" + dspOutput.FreqBinWidth.ToString("F2") + " Hz");
            sb.AppendLine("Nyquist Bin     =" + dspOutput.NyquistBin);

            sb.AppendLine("\nENERGY PARAMETERS");
            double val = dspOutput.FrameEnergy.Min();

            sb.AppendLine(
                "Minimum dB / frame       =" + (10 * Math.Log10(val)).ToString("F2") + "  (See Notes 2, 3 & 4)");
            val = dspOutput.FrameEnergy.Max();
            sb.AppendLine("Maximum dB / frame       =" + (10 * Math.Log10(val)).ToString("F2"));

            sb.AppendLine("\ndB NOISE SUBTRACTION");
            double noiseRange = 2.0;

            //sb.AppendLine("Noise (estimate of mode) =" + sonogram.SnrData.NoiseSubtracted.ToString("F3") + " dB   (See Note 5)");
            //double noiseSpan = sonogram.SnrData.NoiseRange;
            //sb.AppendLine("Noise range              =" + noiseSpan.ToString("F2") + " to +" + (noiseSpan * -1).ToString("F2") + " dB   (See Note 6)");
            //sb.AppendLine("SNR (max frame-noise)    =" + sonogram.SnrData.Snr.ToString("F2") + " dB   (See Note 7)");

            //sb.Append("\nSEGMENTATION PARAMETERS");
            //sb.Append("Segment Thresholds K1: {0:f2}.  K2: {1:f2}  (See Note 8)", segK1, segK2);
            //sb.Append("# Event Count = " + predictedEvents.Count());

            FileTools.Append2TextFile(outputTxtPath.FullName, sb.ToString());
            FileTools.Append2TextFile(outputTxtPath.FullName, GetSNRNotes(noiseRange).ToString());

            // (F) ################################## DRAW IMAGE 1: original spectorgram
            Log.WriteLine("# Start drawing noise reduced sonograms.");
            TimeSpan X_AxisInterval = TimeSpan.FromSeconds(1);

            //int Y_AxisInterval = (int)Math.Round(1000 / dspOutput.FreqBinWidth);
            int nyquist    = recording.SampleRate / 2;
            int hzInterval = 1000;

            var image1 = DrawSonogram(deciBelSpectrogram, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval);

            // (G) ################################## Calculate modal background noise spectrum in decibels
            //double SD_COUNT = -0.5; // number of SDs above the mean for noise removal
            //NoiseReductionType nrt = NoiseReductionType.MODAL;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, SD_COUNT);

            //double upperPercentileBound = 0.2;    // lowest percentile for noise removal
            //NoiseReductionType nrt = NoiseReductionType.LOWEST_PERCENTILE;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound);

            // (H) ################################## Calculate BRIGGS noise removal from amplitude spectrum
            int percentileBound = 20; // low energy percentile for noise removal

            //double binaryThreshold   = 0.6;   //works for higher SNR recordings
            double binaryThreshold = 0.4; //works for lower SNR recordings

            //double binaryThreshold = 0.3;   //works for lower SNR recordings
            double[,] m = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetMask(
                amplitudeSpectrogram,
                percentileBound,
                binaryThreshold);

            string title  = "TITLE";
            var    image2 = NoiseRemoval_Briggs.DrawSonogram(
                m,
                wavDuration,
                X_AxisInterval,
                stepDuration,
                nyquist, hzInterval,
                title);

            //Image image2 = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetSonograms(amplitudeSpectrogram, upperPercentileBound, binaryThreshold,
            //                                                                          wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval);

            // (I) ################################## Calculate MEDIAN noise removal from amplitude spectrum

            //double upperPercentileBound = 0.8;    // lowest percentile for noise removal
            //NoiseReductionType nrt = NoiseReductionType.MEDIAN;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound);

            //double[,] noiseReducedSpectrogram1 = tuple.Item1;  //
            //double[] noiseProfile              = tuple.Item2;  // smoothed modal profile

            //SNR.NoiseProfile dBProfile = SNR.CalculateNoiseProfile(deciBelSpectrogram, SD_COUNT);       // calculate noise value for each freq bin.
            //double[] noiseProfile = DataTools.filterMovingAverage(dBProfile.noiseThresholds, 7);        // smooth modal profile
            //double[,] noiseReducedSpectrogram1 = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, dBProfile.noiseThresholds);
            //Image image2 = DrawSonogram(noiseReducedSpectrogram1, wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval);

            var combinedImage = ImageTools.CombineImagesVertically(image1, image2);

            string imagePath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".png");

            combinedImage.Save(imagePath);

            // (G) ################################## Calculate modal background noise spectrum in decibels

            Log.WriteLine("# Finished recording:- " + input.Name);
        }
        //////public static IndexCalculateResult Analysis(
        public static SpectralIndexValuesForContentDescription Analysis(
            AudioRecording recording,
            TimeSpan segmentOffsetTimeSpan,
            int sampleRateOfOriginalAudioFile,
            bool returnSonogramInfo = false)
        {
            // returnSonogramInfo = true; // if debugging
            double epsilon    = recording.Epsilon;
            int    sampleRate = recording.WavReader.SampleRate;

            //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds);

            // Get FRAME parameters for the calculation of Acoustic Indices
            int frameSize = ContentSignatures.FrameSize;
            int frameStep = frameSize;                                 // that is, windowOverlap = zero

            double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            var    frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            // INITIALISE a RESULTS STRUCTURE TO return
            // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc.
            var config          = new IndexCalculateConfig(); // sets some default values
            int freqBinCount    = frameSize / 2;
            var indexProperties = GetIndexProperties();
            ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config);
            var spectralIndices = new SpectralIndexValuesForContentDescription();

            ///////result.SummaryIndexValues = null;
            ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues;

            // set up default spectrogram to return
            ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null;
            ///////result.Hits = null;
            ///////result.TrackScores = new List<Plot>();

            // ################################## FINISHED SET-UP
            // ################################## NOW GET THE AMPLITUDE SPECTROGRAM

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns.
            var dspOutput1           = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep);
            var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram;

            // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ##################################
            // Get the oscillation spectral index OSC separately from signal because need a different frame size etc.

            var sampleLength       = Oscillations2014.DefaultSampleLength;
            var frameLength        = Oscillations2014.DefaultFrameLength;
            var sensitivity        = Oscillations2014.DefaultSensitivityThreshold;
            var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity);

            // double length of the vector because want to work with 256 element vector for spectrogram purposes
            spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort);

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ##################################

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2.
            // original sample rate can be anything 11.0-44.1 kHz.
            int originalNyquist = sampleRateOfOriginalAudioFile / 2;

            // if up-sampling has been done
            if (dspOutput1.NyquistFreq > originalNyquist)
            {
                dspOutput1.NyquistFreq = originalNyquist;
                dspOutput1.NyquistBin  = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change
            }

            // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram);

            // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration
            double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram);
            for (int i = 0; i < temporalEntropySpectrum.Length; i++)
            {
                temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i];
            }

            spectralIndices.ENT = temporalEntropySpectrum;

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ##################################

            // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);
            spectralIndices.BGN = spectralDecibelBgn;

            // ii: Calculate the noise reduced decibel spectrogram derived from segment recording.
            //     REUSE the var decibelSpectrogram but this time using dspOutput1.
            decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM
            spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram);

            // ######################################################################################################################################################
            // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0
            //           FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth
            // dB THRESHOLD for calculating spectral coverage
            double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb;

            // Calculate lower and upper boundary bin ids.
            // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony.
            int midFreqBound   = config.MidFreqBound;
            int lowFreqBound   = config.LowFreqBound;
            int lowerBinBound  = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth);
            int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth);
            var spActivity     = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound);

            //spectralIndices.CVR = spActivity.CoverSpectrum;
            spectralIndices.EVN = spActivity.EventSpectrum;

            ///////result.TrackScores = null;
            ///////return result;
            return(spectralIndices);
        } // end calculation of Six Spectral Indices
        /// <summary>
        /// Does the Analysis
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="configDict"></param>
        /// <param name="diOutputDir"></param>
        /// <param name="opFileName"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="config"></param>
        /// <param name="segmentAudioFile"></param>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, DirectoryInfo diOutputDir, string opFileName, TimeSpan segmentStartOffset)
        {
            //set default values
            int    bandWidth          = 500; //detect bars in bands of this width.
            int    frameSize          = 1024;
            double windowOverlap      = 0.0;
            double intensityThreshold = double.Parse(configDict[key_INTENSITY_THRESHOLD]);
            //intensityThreshold = 0.01;

            AudioRecording recording = AudioRecording.GetAudioRecording(fiSegmentOfSourceFile, RESAMPLE_RATE, diOutputDir.FullName, opFileName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("############ WARNING: Recording could not be obtained - likely file does not exist.");
                return(null);
            }
            int      sr                   = recording.SampleRate;
            double   binWidth             = recording.SampleRate / (double)frameSize;
            double   frameDuration        = frameSize / (double)sr;
            double   frameOffset          = frameDuration * (1 - windowOverlap); //seconds between start of each frame
            double   framesPerSecond      = 1 / frameOffset;
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      colStep              = (int)Math.Round(bandWidth / binWidth);

            //i: GET SONOGRAM AS MATRIX
            double epsilon  = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, sr, epsilon, frameSize, windowOverlap);

            double[] avAbsolute = results2.Average;                //average absolute value over the minute recording
            //double[] envelope = results2.Item2;
            double[,] spectrogram = results2.AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            double windowPower = results2.WindowPower;

            //############################ NEXT LINE FOR DEBUGGING ONLY
            //spectrogram = GetTestSpectrogram(spectrogram.GetLength(0), spectrogram.GetLength(1), 0.01, 0.03);

            var output         = DetectGratingEvents(spectrogram, colStep, intensityThreshold);
            var amplitudeArray = output.Item2; //for debug purposes only

            //convert List of Dictionary events to List of ACousticevents.
            //also set up the hits matrix.
            int rowCount       = spectrogram.GetLength(0);
            int colCount       = spectrogram.GetLength(1);
            var hitsMatrix     = new double[rowCount, colCount];
            var acousticEvents = new List <AcousticEvent>();

            double minFrameCount = 8; //this assumes that the minimum grid is 2 * 4 = 8 long

            foreach (Dictionary <string, double> item in output.Item1)
            {
                int minRow     = (int)item[key_START_FRAME];
                int maxRow     = (int)item[key_END_FRAME];
                int frameCount = maxRow - minRow + 1;
                if (frameCount < minFrameCount)
                {
                    continue;                             //only want events that are over a minimum length
                }
                int    minCol      = (int)item[key_MIN_FREQBIN];
                int    maxCol      = (int)item[key_MAX_FREQBIN];
                double periodicity = item[key_PERIODICITY];

                double[] subarray = DataTools.Subarray(avAbsolute, minRow, maxRow - minRow + 1);
                double   severity = 0.1;
                int[]    bounds   = DataTools.Peaks_CropToFirstAndLast(subarray, severity);
                minRow = minRow + bounds[0];
                maxRow = minRow + bounds[1];
                if (maxRow >= rowCount)
                {
                    maxRow = rowCount - 1;
                }

                Oblong o  = new Oblong(minRow, minCol, maxRow, maxCol);
                var    ae = new AcousticEvent(segmentStartOffset, o, results2.NyquistFreq, frameSize, frameDuration, frameOffset, frameCount);
                ae.Name            = string.Format("p={0:f0}", periodicity);
                ae.Score           = item[key_SCORE];
                ae.ScoreNormalised = item[key_SCORE] / 0.5;
                acousticEvents.Add(ae);

                //display event on the hits matrix
                for (int r = minRow; r < maxRow; r++)
                {
                    for (int c = minCol; c < maxCol; c++)
                    {
                        hitsMatrix[r, c] = periodicity;
                    }
                }
            } //foreach

            //set up the songogram to return. Use the existing amplitude sonogram
            int bitsPerSample = recording.WavReader.BitsPerSample;
            //NoiseReductionType nrt = SNR.Key2NoiseReductionType("NONE");
            NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD");
            var sonogram           = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(recording.BaseName, frameSize, windowOverlap, bitsPerSample, windowPower, sr, tsRecordingtDuration, nrt, spectrogram);

            sonogram.DecibelsNormalised = new double[sonogram.FrameCount];
            for (int i = 0; i < sonogram.FrameCount; i++) //foreach frame or time step
            {
                sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]);
            }
            sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised);

            return(Tuple.Create(sonogram, hitsMatrix, amplitudeArray, acousticEvents, tsRecordingtDuration));
        } //Analysis()
예제 #15
0
        public static void Main(Arguments arguments)
        {
            //1. set up the necessary files
            //DirectoryInfo diSource = arguments.Source.Directory;
            FileInfo fiSourceRecording = arguments.Source;
            FileInfo fiConfig          = arguments.Config.ToFileInfo();
            FileInfo fiImage           = arguments.Output.ToFileInfo();

            fiImage.CreateParentDirectories();

            string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING";
            string date  = "# DATE AND TIME: " + DateTime.Now;

            LoggedConsole.WriteLine(title);
            LoggedConsole.WriteLine(date);
            LoggedConsole.WriteLine("# Input  audio file: " + fiSourceRecording.Name);
            LoggedConsole.WriteLine("# Output image file: " + fiImage);

            //2. get the config dictionary
            Config configuration = ConfigFile.Deserialize(fiConfig);

            //below three lines are examples of retrieving info from Config config
            //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName];
            //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false;
            //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold;

            //3 transfer conogram parameters to a dictionary to be passed around
            var configDict = new Dictionary <string, string>();

            // #Resample rate must be 2 X the desired Nyquist. Default is that of recording.
            configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString();
            configDict["FrameLength"]  = configuration[AnalysisKeys.FrameLength] ?? "512";
            int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512;

            // #Frame Overlap as fraction: default=0.0
            configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0";
            double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0;

            // #MinHz: 500
            // #MaxHz: 3500
            // #NOISE REDUCTION PARAMETERS
            configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true";
            configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0";

            configDict["ADD_AXES"]             = configuration["ADD_AXES"] ?? "true";
            configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true";

            // 3: GET RECORDING
            var startOffsetMins = TimeSpan.Zero;
            var endOffsetMins   = TimeSpan.Zero;

            FileInfo fiOutputSegment = fiSourceRecording;

            if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero))
            {
                var buffer = new TimeSpan(0, 0, 0);
                fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav"));

                //This method extracts segment and saves to disk at the location fiOutputSegment.
                var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate;
                AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment);
            }

            var recording = new AudioRecording(fiOutputSegment.FullName);

            // EXTRACT ENVELOPE and SPECTROGRAM
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap);

            // average absolute value over the minute recording
            ////double[] avAbsolute = dspOutput.Average;

            // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ##################################
            // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            // double totalSeconds = wavDuration.TotalSeconds;

            // double[] signalEnvelope = dspOutput.Envelope;
            // double avSignalEnvelope = signalEnvelope.Average();
            // double[] frameEnergy = dspOutput.FrameEnergy;
            // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds;
            // double binWidth = dspOutput.BinWidth;
            // int nyquistBin = dspOutput.NyquistBin;
            // dspOutput.WindowPower,
            // dspOutput.FreqBinWidth
            int    nyquistFreq = dspOutput.NyquistFreq;
            double epsilon     = recording.Epsilon;

            // i: prepare amplitude spectrogram
            double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.
            var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData));

            // ii: prepare decibel spectrogram prior to noise removal
            double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon);
            decibelSpectrogramdata           = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata);
            var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // iii: Calculate background noise spectrum in decibels
            // Calculate noise value for each freq bin.
            double sdCount        = 0.0; // number of SDs above the mean for noise removal
            var    decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount);

            // DataTools.writeBarGraph(dBProfile.NoiseMode);

            // iv: Prepare noise reduced spectrogram
            decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds);

            //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background
            //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold);
            var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // prepare new sonogram config and draw second image going down different code pathway
            var config = new SonogramConfig
            {
                MinFreqBand             = 0,
                MaxFreqBand             = 10000,
                NoiseReductionType      = SNR.KeyToNoiseReductionType("Standard"),
                NoiseReductionParameter = 1.0,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,
            };

            //var mfccConfig = new MfccConfiguration(config);
            int  bandCount  = config.mfccConfig.FilterbankCount;
            bool doMelScale = config.mfccConfig.DoMelScale;
            int  ccCount    = config.mfccConfig.CcCount;
            int  fftBins    = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band
            int  minHz      = config.MinFreqBand ?? 0;
            int  maxHz      = config.MaxFreqBand ?? nyquistFreq;

            var standardSonogram = new SpectrogramStandard(config, recording.WavReader);
            var image4           = standardSonogram.GetImage();

            // TODO next line crashes - does not produce cepstral sonogram.
            //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader);
            //Image image5 = cepSng.GetImage();

            //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict);
            //var image = mti.GetImage();

            //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config);
            //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict);

            //prepare sonogram images
            var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false));

            protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond));
            protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width));
            protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram));
            var image6 = protoImage6.GetImage();

            var list = new List <Image <Rgb24> >();

            list.Add(image1); // amplitude spectrogram
            list.Add(image2); // decibel spectrogram before noise removal
            list.Add(image3); // decibel spectrogram after noise removal
            list.Add(image4); // second version of noise reduced spectrogram

            //list.Add(image5); // ceptral sonogram
            list.Add(image6.CloneAs <Rgb24>()); // multitrack image

            Image finalImage = ImageTools.CombineImagesVertically(list);

            finalImage.Save(fiImage.FullName);

            ////2: NOISE REMOVAL
            //double[,] originalSg = sonogram.Data;
            //double[,] mnr        = sonogram.Data;
            //mnr = ImageTools.WienerFilter(mnr, 3);

            //double backgroundThreshold = 4.0;   //SETS MIN DECIBEL BOUND
            //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold);

            //double ConfigRange = 70;        //sets the the max dB
            //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange);

            ////3: Spectral tracks sonogram
            //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr);
            //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10);
            //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary);
            ////binary = MatrixTools.PickOutLines(binary); //syntactic approach

            //sonogram.SetBinarySpectrum(binary);
            ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg);

            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_tracks.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn);

            //3: prepare image of spectral peaks sonogram
            //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_peaks.png";
            //image.Save(fn);

            //LoggedConsole.WriteLine("Spectral peaks  sonogram to file: " + fn);

            //4: Sobel approach
            //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_sobel.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn);

            // I1.txt contains the sonogram matrix produced by matlab
            //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt";
            //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166);

            //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]);
            //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]);

            // TODO put this back once sonogram issues resolved

            /*
             * LoggedConsole.WriteLine("START: AED");
             * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix);
             * LoggedConsole.WriteLine("END: AED");
             *
             *
             * //set up static variables for init Acoustic events
             * //AcousticEvent.   doMelScale = config.DoMelScale;
             * AcousticEvent.FreqBinCount = config.FreqBinCount;
             * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount;
             * //  int minF        = (int)config.MinFreqBand;
             * //  int maxF        = (int)config.MaxFreqBand;
             * AcousticEvent.FrameDuration = config.GetFrameOffset();
             *
             *
             * var events = new List<EventPatternRecog.Rectangle>();
             * foreach (Oblong o in oblongs)
             * {
             *  var e = new AcousticEvent(o);
             *  events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq));
             *  //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq);
             * }
             *
             * LoggedConsole.WriteLine("# AED events: " + events.Count);
             *
             * LoggedConsole.WriteLine("START: EPR");
             * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events);
             * LoggedConsole.WriteLine("END: EPR");
             *
             * var eprEvents = new List<AcousticEvent>();
             * foreach (EventPatternRecog.Rectangle r in eprRects)
             * {
             *  var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false);
             *  LoggedConsole.WriteLine(ae.WriteProperties());
             *  eprEvents.Add(ae);
             * }
             *
             * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png");
             *
             * bool doHighlightSubband = false; bool add1kHzLines = true;
             * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
             * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
             * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
             * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
             * image.AddEvents(eprEvents);
             * image.Save(outputFolder + wavFileName + ".png");
             */

            LoggedConsole.WriteLine("\nFINISHED!");
        }
        /// <summary>
        /// returns the duration of that part of frame not overlapped with following frame.
        /// Duration is given in seconds.
        /// Assumes window size and overlap fraction already known.
        /// </summary>
        public static TimeSpan GetFrameOffset(int windowSize, double windowOverlap, int sampleRate)
        {
            int step = DSP_Frames.FrameStep(windowSize, windowOverlap);

            return(TimeSpan.FromSeconds(step / (double)sampleRate));
        }
예제 #17
0
        public static void SimilarityIndex(double[] channelL, double[] channelR, double epsilon, int sampleRate,
                                           out double similarityIndex, out double decibelIndex,
                                           out double avDecibelBias, out double medianDecibelBias,
                                           out double lowFreqDbBias, out double midFreqDbBias, out double hiFreqDbBias)
        {
            //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep);
            int frameSize = 512;
            int frameStep = 512;

            frameSize *= 16; // take longer window to get low freq
            frameStep *= 16;

            var dspOutputL  = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
            var avSpectrumL = MatrixTools.GetColumnAverages(dspOutputL.AmplitudeSpectrogram);

            //var medianSpectrumL = MatrixTools.GetColumnMedians(dspOutputL.amplitudeSpectrogram);

            var dspOutputR  = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
            var avSpectrumR = MatrixTools.GetColumnAverages(dspOutputR.AmplitudeSpectrogram);

            //var medianSpectrumR = MatrixTools.GetColumnMedians(dspOutputR.amplitudeSpectrogram);

            similarityIndex = 0.0;
            decibelIndex    = 0.0;
            for (int i = 0; i < avSpectrumR.Length; i++)
            {
                double min = Math.Min(avSpectrumL[i], avSpectrumR[i]);
                double max = Math.Max(avSpectrumL[i], avSpectrumR[i]);
                if (max <= 0.000001)
                {
                    max = 0.000001;  // to prevent division by zero.
                }

                // index = min / max;
                double index = min * min / (max * max);
                similarityIndex += index;

                double dBmin = 20 * Math.Log10(min);
                double dBmax = 20 * Math.Log10(max);
                decibelIndex += dBmax - dBmin;
            }

            similarityIndex /= avSpectrumR.Length;
            decibelIndex    /= avSpectrumR.Length;

            double medianLeft  = Statistics.GetMedian(avSpectrumL);
            double medianRight = Statistics.GetMedian(avSpectrumR);

            medianDecibelBias = medianLeft - medianRight;

            // init values
            avDecibelBias = 0.0;
            lowFreqDbBias = 0.0;

            // calculate the freq band bounds for 2kHz and 7khz.
            int lowBound = frameSize * 2000 / sampleRate;
            int midBound = frameSize * 7000 / sampleRate;

            for (int i = 0; i < lowBound; i++)
            {
                double dbLeft  = 20 * Math.Log10(avSpectrumL[i]);
                double dbRight = 20 * Math.Log10(avSpectrumR[i]);
                avDecibelBias += dbLeft - dbRight;
                lowFreqDbBias += dbLeft - dbRight;
            }

            midFreqDbBias = 0.0;
            for (int i = lowBound; i < midBound; i++)
            {
                double dbLeft  = 20 * Math.Log10(avSpectrumL[i]);
                double dbRight = 20 * Math.Log10(avSpectrumR[i]);
                avDecibelBias += dbLeft - dbRight;
                midFreqDbBias += dbLeft - dbRight;
            }

            hiFreqDbBias = 0.0;
            for (int i = midBound; i < avSpectrumR.Length; i++)
            {
                double dbLeft  = 20 * Math.Log10(avSpectrumL[i]);
                double dbRight = 20 * Math.Log10(avSpectrumR[i]);
                avDecibelBias += dbLeft - dbRight;
                hiFreqDbBias  += dbLeft - dbRight;
            }

            avDecibelBias /= avSpectrumR.Length;
            lowFreqDbBias /= lowBound;
            midFreqDbBias /= midBound - lowBound;
            hiFreqDbBias  /= avSpectrumR.Length - midBound;
        }
        public void TestEnvelopeAndFft2()
        {
            var recording  = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));
            int windowSize = 512;

            // window overlap is used only for sonograms. It is not used when calculating acoustic indices.
            double windowOverlap  = 0.0;
            var    windowFunction = WindowFunctions.HAMMING.ToString();

            var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                windowSize,
                windowOverlap,
                windowFunction);

            // Now recover the data

            /*
             * // The following data is required when constructing sonograms
             * var duration = recording.WavReader.Time;
             * var sr = recording.SampleRate;
             * var frameCount = fftdata.FrameCount;
             * var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames;
             * var epislon = fftdata.Epsilon;
             * var windowPower = fftdata.WindowPower;
             * var amplSpectrogram = fftdata.AmplitudeSpectrogram;
             */

            // The below info is only used when calculating spectral and summary indices
            // energy level information
            int    clipCount   = fftdata.ClipCount;
            int    maxAmpCount = fftdata.HighAmplitudeCount;
            double maxSig      = fftdata.MaxSignalValue;
            double minSig      = fftdata.MinSignalValue;

            // envelope info
            var avArray       = fftdata.Average;
            var envelope      = fftdata.Envelope;
            var frameEnergy   = fftdata.FrameEnergy;
            var frameDecibels = fftdata.FrameDecibels;

            // freq scale info
            var nyquistBin   = fftdata.NyquistBin;
            var nyquistFreq  = fftdata.NyquistFreq;
            var freqBinWidth = fftdata.FreqBinWidth;

            // DO THE TESTS of clipping and signal level info
            // energy level information
            Assert.AreEqual(0, clipCount);
            Assert.AreEqual(0, maxAmpCount);
            Assert.AreEqual(-0.250434888760033, minSig, 0.000001);
            Assert.AreEqual(0.255165257728813, maxSig, 0.000001);

            // DO THE TESTS of energy array info


            // first write to here and move binary file to resources folder.
            // var averageArrayFile = new FileInfo(this.outputDirectory + @"\BAC2_20071008-085040_AvSigArray.bin");
            // Binary.Serialize(averageArrayFile, avArray);
            var averageFile     = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_AvSigArray.bin");
            var expectedAvArray = Binary.Deserialize <double[]>(averageFile);

            CollectionAssert.AreEqual(expectedAvArray, avArray);

            // var envelopeArrayFile = new FileInfo(this.outputDirectory + @"\BAC2_20071008-085040_EnvelopeArray.bin");
            // Binary.Serialize(envelopeArrayFile, envelope);
            var envelopeFile     = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_EnvelopeArray.bin");
            var expectedEnvelope = Binary.Deserialize <double[]>(envelopeFile);

            CollectionAssert.AreEqual(expectedEnvelope, envelope);

            var frameEnergyFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_FrameEnergyArray.bin");

            // uncomment this to update the binary data. Should be rarely needed
            // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c
            //Binary.Serialize(frameEnergyFile, frameEnergy);

            var expectedFrameEnergy = Binary.Deserialize <double[]>(frameEnergyFile);

            CollectionAssert.AreEqual(expectedFrameEnergy, frameEnergy);

            var frameDecibelsFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_FrameDecibelsArray.bin");

            // uncomment this to update the binary data. Should be rarely needed
            // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c
            //Binary.Serialize(frameDecibelsFile, frameDecibels);

            var expectedFrameDecibels = Binary.Deserialize <double[]>(frameDecibelsFile);

            CollectionAssert.That.AreEqual(expectedFrameDecibels, frameDecibels, Delta);

            // freq info
            Assert.AreEqual(255, nyquistBin);
            Assert.AreEqual(11025, nyquistFreq);
            Assert.AreEqual(43.0664, freqBinWidth, 0.00001);
        }
예제 #19
0
        public static IndexCalculateResult Analysis(
            AudioRecording recording,
            TimeSpan subsegmentOffsetTimeSpan,
            Dictionary <string, IndexProperties> indexProperties,
            int sampleRateOfOriginalAudioFile,
            TimeSpan segmentStartOffset,
            IndexCalculateConfig config,
            bool returnSonogramInfo = false)
        {
            // returnSonogramInfo = true; // if debugging
            double epsilon                  = recording.Epsilon;
            int    signalLength             = recording.WavReader.GetChannel(0).Length;
            int    sampleRate               = recording.WavReader.SampleRate;
            var    segmentDuration          = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            var    indexCalculationDuration = config.IndexCalculationDurationTimeSpan;
            int    nyquist                  = sampleRate / 2;

            // Get FRAME parameters for the calculation of Acoustic Indices
            //WARNING: DO NOT USE Frame Overlap when calculating acoustic indices.
            //         It yields ACI, BGN, POW and EVN results that are significantly different from the default.
            //         I have not had time to check if the difference is meaningful. Best to avoid.
            //int frameSize = (int?)config[AnalysisKeys.FrameLength] ?? IndexCalculateConfig.DefaultWindowSize;
            int frameSize = config.FrameLength;
            int frameStep = frameSize;                                 // that is, windowOverlap = zero

            double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            var    frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            int midFreqBound = config.MidFreqBound;
            int lowFreqBound = config.LowFreqBound;

            int freqBinCount = frameSize / 2;

            // double freqBinWidth = recording.Nyquist / (double)freqBinCount;

            // get duration in seconds and sample count and frame count
            double subsegmentDurationInSeconds = indexCalculationDuration.TotalSeconds;
            int    subsegmentSampleCount       = (int)(subsegmentDurationInSeconds * sampleRate);
            double subsegmentFrameCount        = subsegmentSampleCount / (double)frameStep;

            subsegmentFrameCount = (int)Math.Ceiling(subsegmentFrameCount);

            // In order not to lose the last fractional frame, round up the frame number
            // and get the exact number of samples in the integer number of frames.
            // Do this because when IndexCalculationDuration = 100ms, the number of frames is only 8.
            subsegmentSampleCount = (int)(subsegmentFrameCount * frameStep);

            // get start and end samples of the subsegment and noise segment
            double localOffsetInSeconds = subsegmentOffsetTimeSpan.TotalSeconds - segmentStartOffset.TotalSeconds;
            int    startSample          = (int)(localOffsetInSeconds * sampleRate);
            int    endSample            = startSample + subsegmentSampleCount - 1;

            // Default behaviour: set SUBSEGMENT = total recording
            var subsegmentRecording = recording;

            // But if the indexCalculationDuration < segmentDuration
            if (indexCalculationDuration < segmentDuration)
            {
                // minimum samples needed to calculate acoustic indices. This value was chosen somewhat arbitrarily.
                // It allowes for case where IndexCalculationDuration = 100ms which is approx 8 frames
                int minimumViableSampleCount = frameSize * 8;
                int availableSignal          = signalLength - startSample;

                // if (the required audio is beyond recording OR insufficient for analysis) then backtrack.
                if (availableSignal < minimumViableSampleCount)
                {
                    // Back-track so we can fill a whole result.
                    // This is a silent correction, equivalent to having a segment overlap for the last segment.
                    var oldStart = startSample;
                    startSample = signalLength - subsegmentSampleCount;
                    endSample   = signalLength;

                    Logger.Trace("  Backtrack subsegment to fill missing data from imperfect audio cuts because not enough samples available. " + (oldStart - startSample) + " samples overlap.");
                }

                var subsamples = DataTools.Subarray(recording.WavReader.Samples, startSample, subsegmentSampleCount);
                var wr         = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate);
                subsegmentRecording = new AudioRecording(wr);
            }

            // INITIALISE a RESULTS STRUCTURE TO return
            // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc.
            var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, subsegmentOffsetTimeSpan, config);
            SummaryIndexValues  summaryIndices  = result.SummaryIndexValues;
            SpectralIndexValues spectralIndices = result.SpectralIndexValues;

            // set up default spectrogram to return
            result.Sg          = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null;
            result.Hits        = null;
            result.TrackScores = new List <Plot>();

            // ################################## FINSIHED SET-UP
            // ################################## NOW GET THE AMPLITUDE SPECTORGRAMS

            // EXTRACT ENVELOPE and SPECTROGRAM FROM SUBSEGMENT
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(subsegmentRecording, frameSize, frameStep);

            // Select band according to min and max bandwidth
            int minBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MinBandWidth);
            int maxBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MaxBandWidth) - 1;

            dspOutput1.AmplitudeSpectrogram = MatrixTools.Submatrix(
                dspOutput1.AmplitudeSpectrogram,
                0,
                minBand,
                dspOutput1.AmplitudeSpectrogram.GetLength(0) - 1,
                maxBand);

            // TODO: Michael to review whether bandwidth filter should be moved to DSP_Frames??
            // Recalculate NyquistBin and FreqBinWidth, because they change with band selection
            //dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1;
            //dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2;

            // Linear or Octave or Mel frequency scale? Set Linear as default.
            var  freqScale     = new FrequencyScale(nyquist: nyquist, frameSize: frameSize, hertzGridInterval: 1000);
            var  freqScaleType = config.FrequencyScale;
            bool octaveScale   = freqScaleType == FreqScaleType.Linear125Octaves7Tones28Nyquist32000;
            bool melScale      = freqScaleType == FreqScaleType.Mel;

            if (octaveScale)
            {
                // only allow one octave scale at the moment - for Jasco marine recordings.
                // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS
                // If you wish to use other octave scale types then need to put in the config file and and set up recovery here.
                freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000);

                // Recalculate the spectrogram according to octave scale. This option works only when have high SR recordings.
                dspOutput1.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra(
                    dspOutput1.AmplitudeSpectrogram,
                    dspOutput1.WindowPower,
                    sampleRate,
                    epsilon,
                    freqScale);
                dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!!
            }
            else if (melScale)
            {
                int minFreq = 0;
                int maxFreq = recording.Nyquist;
                dspOutput1.AmplitudeSpectrogram = MFCCStuff.MelFilterBank(
                    dspOutput1.AmplitudeSpectrogram,
                    config.MelScale,
                    recording.Nyquist,
                    minFreq,
                    maxFreq);

                dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1;

                // TODO: This doesn't make any sense, since the frequency width changes for each bin. Probably need to set this to NaN.
                // TODO: Whatever uses this value below, should probably be changed to not be depending on it.
                dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2;
            }

            // NOW EXTRACT SIGNAL FOR BACKGROUND NOISE CALCULATION
            // If the index calculation duration >= 30 seconds, then calculate BGN from the existing segment of recording.
            bool doSeparateBgnNoiseCalculation = indexCalculationDuration.TotalSeconds + (2 * config.BgNoiseBuffer.TotalSeconds) < segmentDuration.TotalSeconds / 2;
            var  dspOutput2 = dspOutput1;

            if (doSeparateBgnNoiseCalculation)
            {
                // GET a longer SUBSEGMENT FOR NOISE calculation with 5 sec buffer on either side.
                // If the index calculation duration is shorter than 30 seconds, then need to calculate BGN noise from a longer length of recording
                //      i.e. need to add noiseBuffer either side. Typical noiseBuffer value = 5 seconds
                int sampleBuffer = (int)(config.BgNoiseBuffer.TotalSeconds * sampleRate);
                var bgnRecording = AudioRecording.GetRecordingSubsegment(recording, startSample, endSample, sampleBuffer);

                // EXTRACT ENVELOPE and SPECTROGRAM FROM BACKGROUND NOISE SUBSEGMENT
                dspOutput2 = DSP_Frames.ExtractEnvelopeAndFfts(bgnRecording, frameSize, frameStep);

                // If necessary, recalculate the spectrogram according to octave scale. This option works only when have high SR recordings.
                if (octaveScale)
                {
                    // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS
                    // If you wish to use other octave scale types then need to put in the config file and and set up recovery here.
                    dspOutput2.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra(
                        dspOutput2.AmplitudeSpectrogram,
                        dspOutput2.WindowPower,
                        sampleRate,
                        epsilon,
                        freqScale);
                    dspOutput2.NyquistBin = dspOutput2.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!!
                }
            }

            // ###################################### BEGIN CALCULATION OF INDICES ##################################

            // (A) ################################## EXTRACT SUMMARY INDICES FROM THE SIGNAL WAVEFORM ##################################
            // average absolute value over the minute recording - not useful
            // double[] avAbsolute = dspOutput1.Average;
            double[] signalEnvelope    = dspOutput1.Envelope;
            double   avgSignalEnvelope = signalEnvelope.Average();

            // 10 times log of amplitude squared
            summaryIndices.AvgSignalAmplitude = 20 * Math.Log10(avgSignalEnvelope);

            // Deal with case where the signal waveform is continuous flat with values < 0.001. Has happened!!
            // Although signal appears zero, this condition is required.
            if (avgSignalEnvelope < 0.0001)
            {
                Logger.Debug("Segment skipped because avSignalEnvelope is < 0.001!");
                summaryIndices.ZeroSignal = 1.0;
                return(result);
            }

            // i. Check for clipping and high amplitude rates per second
            summaryIndices.HighAmplitudeIndex = dspOutput1.HighAmplitudeCount / subsegmentDurationInSeconds;
            summaryIndices.ClippingIndex      = dspOutput1.ClipCount / subsegmentDurationInSeconds;

            // ii. Calculate bg noise in dB
            //    Convert signal envelope to dB and subtract background noise. Default noise SD to calculate threshold = ZERO
            double signalBgn = NoiseRemovalModal.CalculateBackgroundNoise(dspOutput2.Envelope);

            summaryIndices.BackgroundNoise = signalBgn;

            // iii: FRAME ENERGIES - convert signal to decibels and subtract background noise.
            double[] dBEnvelope          = SNR.Signal2Decibels(dspOutput1.Envelope);
            double[] dBEnvelopeSansNoise = SNR.SubtractAndTruncate2Zero(dBEnvelope, signalBgn);

            // iv: ACTIVITY for NOISE REDUCED SIGNAL ENVELOPE
            // Calculate fraction of frames having acoustic activity
            var activity = ActivityAndCover.CalculateActivity(dBEnvelopeSansNoise, frameStepTimeSpan);

            summaryIndices.Activity = activity.FractionOfActiveFrames;

            // v. average number of events per second whose duration > one frame
            // average event duration in milliseconds - no longer calculated
            //summaryIndices.AvgEventDuration = activity.avEventDuration;
            summaryIndices.EventsPerSecond = activity.EventCount / subsegmentDurationInSeconds;

            // vi. Calculate SNR and active frames SNR
            summaryIndices.Snr = dBEnvelopeSansNoise.Max();
            summaryIndices.AvgSnrOfActiveFrames = activity.ActiveAvDb;

            // vii. ENTROPY of ENERGY ENVELOPE -- 1-Ht because want measure of concentration of acoustic energy.
            double entropy = DataTools.EntropyNormalised(DataTools.SquareValues(signalEnvelope));

            summaryIndices.TemporalEntropy = 1 - entropy;

            // Note that the spectrogram has had the DC bin removed. i.e. has only 256 columns.
            double[,] amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // get amplitude spectrogram.

            // CALCULATE various NDSI (Normalised difference soundscape Index) FROM THE AMPLITUDE SPECTROGRAM
            // These options proved to be highly correlated. Therefore only use tuple.Item 1 which derived from Power Spectral Density.
            var tuple3 = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(amplitudeSpectrogram);

            summaryIndices.Ndsi = SpectrogramTools.CalculateNdsi(tuple3.Item1, sampleRate, 1000, 2000, 8000);

            // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ##################################
            // Get the oscillation spectral index OSC separately from signal because need a different frame size etc.

            var sampleLength       = Oscillations2014.DefaultSampleLength;
            var frameLength        = Oscillations2014.DefaultFrameLength;
            var sensitivity        = Oscillations2014.DefaultSensitivityThreshold;
            var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(subsegmentRecording, frameLength, sampleLength, sensitivity);

            // double length of the vector because want to work with 256 element vector for LDFC purposes
            spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort);

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ##################################

            // i: CALCULATE SPECTRUM OF THE SUM OF FREQ BIN AMPLITUDES - used for later calculation of ACI
            spectralIndices.SUM = MatrixTools.SumColumns(amplitudeSpectrogram);

            // Calculate lower and upper boundary bin ids.
            // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from biophony.
            // Boundary of upper bird-band is to avoid high freq artefacts due to mp3.
            int lowerBinBound  = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth);
            int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth);

            // calculate number of freq bins in the bird-band.
            int midBandBinCount = middleBinBound - lowerBinBound + 1;

            if (octaveScale)
            {
                // the above frequency bin bounds do not apply with octave scale. Need to recalculate them suitable for Octave scale recording.
                lowFreqBound  = freqScale.LinearBound;
                lowerBinBound = freqScale.GetBinIdForHerzValue(lowFreqBound);

                midFreqBound = 8000; // This value appears suitable for Jasco Marine recordings. Not much happens above 8kHz.

                //middleBinBound = freqScale.GetBinIdForHerzValue(midFreqBound);
                middleBinBound  = freqScale.GetBinIdInReducedSpectrogramForHerzValue(midFreqBound);
                midBandBinCount = middleBinBound - lowerBinBound + 1;
            }

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2.
            // original sample rate can be anything 11.0-44.1 kHz.
            int originalNyquist = sampleRateOfOriginalAudioFile / 2;

            // if upsampling has been done
            if (dspOutput1.NyquistFreq > originalNyquist)
            {
                dspOutput1.NyquistFreq = originalNyquist;
                dspOutput1.NyquistBin  = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that binwidth does not change
            }

            // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            spectralIndices.DIF = AcousticComplexityIndex.SumOfAmplitudeDifferences(amplitudeSpectrogram);

            double[] aciSpectrum = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram);
            spectralIndices.ACI = aciSpectrum;

            // remove low freq band of ACI spectrum and store average ACI value
            double[] reducedAciSpectrum = DataTools.Subarray(aciSpectrum, lowerBinBound, midBandBinCount);
            summaryIndices.AcousticComplexity = reducedAciSpectrum.Average();

            // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration
            double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram);
            for (int i = 0; i < temporalEntropySpectrum.Length; i++)
            {
                temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i];
            }

            spectralIndices.ENT = temporalEntropySpectrum;

            // iv: remove background noise from the amplitude spectrogram
            //     First calculate the noise profile from the amplitude sepctrogram
            double[] spectralAmplitudeBgn = NoiseProfile.CalculateBackgroundNoise(dspOutput2.AmplitudeSpectrogram);
            amplitudeSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(amplitudeSpectrogram, spectralAmplitudeBgn);

            // AMPLITUDE THRESHOLD for smoothing background, nhThreshold, assumes background noise ranges around -40dB.
            // This value corresponds to approximately 6dB above backgorund.
            amplitudeSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(amplitudeSpectrogram, nhThreshold: 0.015);
            ////ImageTools.DrawMatrix(spectrogramData, @"C:\SensorNetworks\WavFiles\Crows\image.png", false);
            ////DataTools.writeBarGraph(modalValues);

            result.AmplitudeSpectrogram = amplitudeSpectrogram;

            // v: ENTROPY OF AVERAGE SPECTRUM & VARIANCE SPECTRUM - at this point the spectrogram is a noise reduced amplitude spectrogram
            var tuple = AcousticEntropy.CalculateSpectralEntropies(amplitudeSpectrogram, lowerBinBound, midBandBinCount);

            // ENTROPY of spectral averages - Reverse the values i.e. calculate 1-Hs and 1-Hv, and 1-Hcov for energy concentration
            summaryIndices.EntropyOfAverageSpectrum = 1 - tuple.Item1;

            // ENTROPY of spectrum of Variance values
            summaryIndices.EntropyOfVarianceSpectrum = 1 - tuple.Item2;

            // ENTROPY of spectrum of Coefficient of Variation values
            summaryIndices.EntropyOfCoVSpectrum = 1 - tuple.Item3;

            // vi: ENTROPY OF DISTRIBUTION of maximum SPECTRAL PEAKS.
            //     First extract High band SPECTROGRAM which is now noise reduced
            double entropyOfPeaksSpectrum = AcousticEntropy.CalculateEntropyOfSpectralPeaks(amplitudeSpectrogram, lowerBinBound, middleBinBound);

            summaryIndices.EntropyOfPeaksSpectrum = 1 - entropyOfPeaksSpectrum;

            // ######################################################################################################################################################
            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ##################################

            // i: Convert amplitude spectrogram to deciBels and calculate the dB background noise profile
            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput2.AmplitudeSpectrogram, dspOutput2.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram);
            spectralIndices.BGN = spectralDecibelBgn;

            // ii: Calculate the noise reduced decibel spectrogram derived from segment recording.
            //     REUSE the var decibelSpectrogram but this time using dspOutput1.
            deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            deciBelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, spectralDecibelBgn);
            deciBelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(deciBelSpectrogram, nhThreshold: 2.0);

            // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM
            spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(deciBelSpectrogram);

            // iv: CALCULATE SPECTRAL COVER.
            //     NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0
            //           FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth
            double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // dB THRESHOLD for calculating spectral coverage
            var    spActivity  = ActivityAndCover.CalculateSpectralEvents(deciBelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound);

            spectralIndices.CVR = spActivity.CoverSpectrum;
            spectralIndices.EVN = spActivity.EventSpectrum;

            summaryIndices.HighFreqCover = spActivity.HighFreqBandCover;
            summaryIndices.MidFreqCover  = spActivity.MidFreqBandCover;
            summaryIndices.LowFreqCover  = spActivity.LowFreqBandCover;

            // ######################################################################################################################################################

            // v: CALCULATE SPECTRAL PEAK TRACKS and RIDGE indices.
            //    NOTE: at this point, the var decibelSpectrogram is noise reduced. i.e. all its values >= 0.0
            //    Detecting ridges or spectral peak tracks requires using a 5x5 mask which has edge effects.
            //    This becomes significant if we have a short indexCalculationDuration.
            //    Consequently if the indexCalculationDuration < 10 seconds then we revert back to the recording and cut out a recording segment that includes
            //    a buffer for edge effects. In most cases however, we can just use the decibel spectrogram already calculated and ignore the edge effects.
            double             peakThreshold = 6.0; //dB
            SpectralPeakTracks sptInfo;

            if (indexCalculationDuration.TotalSeconds < 10.0)
            {
                // calculate a new decibel spectrogram
                sptInfo = SpectralPeakTracks.CalculateSpectralPeakTracks(recording, startSample, endSample, frameSize, octaveScale, peakThreshold);
            }
            else
            {
                // use existing decibel spectrogram
                sptInfo = new SpectralPeakTracks(deciBelSpectrogram, peakThreshold);
            }

            spectralIndices.SPT       = sptInfo.SptSpectrum;
            spectralIndices.RHZ       = sptInfo.RhzSpectrum;
            spectralIndices.RVT       = sptInfo.RvtSpectrum;
            spectralIndices.RPS       = sptInfo.RpsSpectrum;
            spectralIndices.RNG       = sptInfo.RngSpectrum;
            summaryIndices.SptDensity = sptInfo.TrackDensity;

            // these are two other indices that I tried but they do not seem to add anything of interest.
            //summaryIndices.AvgSptDuration = sptInfo.AvTrackDuration;
            //summaryIndices.SptPerSecond = sptInfo.TotalTrackCount / subsegmentSecondsDuration;

            // ######################################################################################################################################################

            // vi: CLUSTERING - FIRST DETERMINE IF IT IS WORTH DOING
            // return if (activeFrameCount too small || eventCount == 0 || short index calc duration) because no point doing clustering
            if (activity.ActiveFrameCount <= 2 || Math.Abs(activity.EventCount) < 0.01 || indexCalculationDuration.TotalSeconds < 15)
            {
                // IN ADDITION return if indexCalculationDuration < 15 seconds because no point doing clustering on short time segment
                // NOTE: Activity was calculated with 3dB threshold AFTER backgroundnoise removal.
                //summaryIndices.AvgClusterDuration = TimeSpan.Zero;
                summaryIndices.ClusterCount   = 0;
                summaryIndices.ThreeGramCount = 0;
                return(result);
            }

            // YES WE WILL DO CLUSTERING! to determine cluster count (spectral diversity) and spectral persistence.
            // Only use midband decibel SPECTRUM. In June 2016, the mid-band (i.e. the bird-band) was set to lowerBound=1000Hz, upperBound=8000hz.
            // Actually do clustering of binary spectra. Must first threshold
            double binaryThreshold    = SpectralClustering.DefaultBinaryThresholdInDecibels;
            var    midBandSpectrogram = MatrixTools.Submatrix(deciBelSpectrogram, 0, lowerBinBound, deciBelSpectrogram.GetLength(0) - 1, middleBinBound);
            var    clusterInfo        = SpectralClustering.ClusterTheSpectra(midBandSpectrogram, lowerBinBound, middleBinBound, binaryThreshold);

            // Store two summary index values from cluster info
            summaryIndices.ClusterCount   = clusterInfo.ClusterCount;
            summaryIndices.ThreeGramCount = clusterInfo.TriGramUniqueCount;

            // As of May 2017, no longer store clustering results superimposed on spectrogram.
            // If you want to see this, then call the TEST methods in class SpectralClustering.cs.

            // #######################################################################################################################################################

            // vii: set up other info to return
            var freqPeaks = SpectralPeakTracks.ConvertSpectralPeaksToNormalisedArray(deciBelSpectrogram);
            var scores    = new List <Plot>
            {
                new Plot("Decibels", DataTools.normalise(dBEnvelopeSansNoise), ActivityAndCover.DefaultActivityThresholdDb),
                new Plot("Active Frames", DataTools.Bool2Binary(activity.ActiveFrames), 0.0),
                new Plot("Max Frequency", freqPeaks, 0.0), // relative location of freq maxima in spectra
            };

            result.Hits        = sptInfo.Peaks;
            result.TrackScores = scores;

            return(result);
        } // end Calculation of Summary and Spectral Indices
        public void TestEnvelopeAndFft1()
        {
            var recording  = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));
            int windowSize = 512;

            // window overlap is used only for sonograms. It is not used when calculating acoustic indices.
            double windowOverlap  = 0.0;
            var    windowFunction = WindowFunctions.HAMMING.ToString();

            var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                windowSize,
                windowOverlap,
                windowFunction);

            // Now recover the data
            // The following data is required when constructing sonograms
            var duration   = recording.WavReader.Time;
            var sr         = recording.SampleRate;
            var frameCount = fftdata.FrameCount;
            var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames;
            var epislon         = fftdata.Epsilon;
            var windowPower     = fftdata.WindowPower;
            var amplSpectrogram = fftdata.AmplitudeSpectrogram;

            // The below info is only used when calculating spectral and summary indices

            /*
             * // energy level information
             * int clipCount = fftdata.ClipCount;
             * int maxAmpCount = fftdata.MaxAmplitudeCount;
             * double maxSig = fftdata.MaxSignalValue;
             * double minSig = fftdata.MinSignalValue;
             *
             * // envelope info
             * var avArray = fftdata.Average;
             * var envelope = fftdata.Envelope;
             * var frameEnergy = fftdata.FrameEnergy;
             * var frameDecibels = fftdata.FrameDecibels;
             *
             * // freq scale info
             * var nyquistBin = fftdata.NyquistBin;
             * var nyquistFreq = fftdata.NyquistFreq;
             * var freqBinWidth = fftdata.FreqBinWidth;
             */

            // DO THE TESTS
            int expectedSR = 22050;

            Assert.AreEqual(expectedSR, sr);
            Assert.AreEqual("00:01:00.2450000", duration.ToString());
            Assert.AreEqual(2594, frameCount);
            int    expectedBitsPerSample = 16;
            double expectedEpsilon       = Math.Pow(0.5, expectedBitsPerSample - 1);

            Assert.AreEqual(expectedEpsilon, epislon);
            double expectedWindowPower = 203.0778;

            Assert.AreEqual(expectedWindowPower, windowPower, 0.0001);
            Assert.AreEqual(0.0, fractionOfHighEnergyFrames, 0.0000001);

            // Test sonogram data matrix by comparing the vector of column sums.
            double[] columnSums = MatrixTools.SumColumns(amplSpectrogram);

            var sumFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_DataColumnSums.bin");

            // uncomment this to update the binary data. Should be rarely needed
            // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c
            //Binary.Serialize(sumFile, columnSums);

            var expectedColSums = Binary.Deserialize <double[]>(sumFile);
            var totalDelta      = expectedColSums.Zip(columnSums, ValueTuple.Create).Select(x => Math.Abs(x.Item1 - x.Item2)).Sum();
            var avgDelta        = expectedColSums.Zip(columnSums, ValueTuple.Create).Select(x => Math.Abs(x.Item1 - x.Item2)).Average();

            Assert.AreEqual(expectedColSums[0], columnSums[0], Delta, $"\nE: {expectedColSums[0]:R}\nA: {columnSums[0]:R}\nD: {expectedColSums[0] - columnSums[0]:R}\nT: {totalDelta:R}\nA: {avgDelta}\nn: {expectedColSums.Length}");
            CollectionAssert.That.AreEqual(expectedColSums, columnSums, Delta);
        }
        } //Analysis()

        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent> > DetectHarmonics(
            AudioRecording recording,
            double intensityThreshold,
            int minHz,
            int minFormantgap,
            int maxFormantgap,
            double minDuration,
            int windowSize,
            double windowOverlap,
            TimeSpan segmentStartOffset)
        {
            //i: MAKE SONOGRAM
            int    numberOfBins    = 32;
            double binWidth        = recording.SampleRate / (double)windowSize;
            int    sr              = recording.SampleRate;
            double frameDuration   = windowSize / (double)sr;             // Duration of full frame or window in seconds
            double frameOffset     = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames
            double framesPerSecond = 1 / frameOffset;

            //double framesPerSecond = sr / (double)windowSize;
            //int frameOffset = (int)(windowSize * (1 - overlap));
            //int frameCount = (length - windowSize + frameOffset) / frameOffset;

            double epsilon  = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(
                recording.WavReader.Samples,
                sr,
                epsilon,
                windowSize,
                windowOverlap);

            double[] avAbsolute = results2.Average; //average absolute value over the minute recording

            //double[] envelope = results2.Item2;
            double[,]
            matrix = results2
                     .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            double windowPower = results2.WindowPower;

            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int minBin = (int)Math.Round(minHz / binWidth);
            int maxHz  = (int)Math.Round(minHz + (numberOfBins * binWidth));

            int rowCount = matrix.GetLength(0);
            int colCount = matrix.GetLength(1);
            int maxbin   = minBin + numberOfBins;

            double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 5; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1; //an array of periodicity scores
            double[] periodicity = results.Item2;

            //transfer periodicity info to a hits matrix.
            //intensity = DataTools.filterMovingAverage(intensity, 3);
            double[] scoreArray = new double[intensity.Length];
            var      hits       = new double[rowCount, colCount];

            for (int r = 0; r < rowCount; r++)
            {
                double relativePeriod = periodicity[r] / numberOfBins / 2;
                if (intensity[r] > intensityThreshold)
                {
                    for (int c = minBin; c < maxbin; c++)
                    {
                        hits[r, c] = relativePeriod;
                    }
                }

                double herzPeriod = periodicity[r] * binWidth;
                if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap)
                {
                    scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score.
                }
            }

            scoreArray = DataTools.filterMovingAverage(scoreArray, 11);

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                framesPerSecond,
                binWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            hits = null;

            //set up the songogram to return. Use the existing amplitude sonogram
            int                bitsPerSample = recording.WavReader.BitsPerSample;
            TimeSpan           duration      = recording.Duration;
            NoiseReductionType nrt           = SNR.KeyToNoiseReductionType("STANDARD");

            var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(
                recording.BaseName,
                windowSize,
                windowOverlap,
                bitsPerSample,
                windowPower,
                sr,
                duration,
                nrt,
                matrix);

            sonogram.DecibelsNormalised = new double[rowCount];

            //foreach frame or time step
            for (int i = 0; i < rowCount; i++)
            {
                sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]);
            }

            sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised);
            return(Tuple.Create(sonogram, hits, scoreArray, predictedEvents));
        } //end Execute_HDDetect
        /// <summary>
        /// Calculate summary statistics for supplied temporal and spectral targets.
        /// </summary>
        /// <remarks>
        /// The acoustic statistics calculated in this method are based on methods outlined in
        /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach",
        /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al.
        /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424
        /// ..
        /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram.
        /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count.
        ///    We use entropy instead because value not dependent on row or column count because it is normalized.
        /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event.
        /// 2. freq-mean
        /// 3. freq-variance
        /// 4. freq-skew and kurtosis
        /// 5. time-mean
        /// 6. time-variance
        /// 7. time-skew and kurtosis
        /// 8. freq-max (normalized)
        /// 9. time-max (normalized)
        /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could.
        /// ...
        /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels
        /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum.
        /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels.
        ///         There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to
        ///         energy values when calculating the statistics for the extracted acoustic event.
        /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis.
        ///         Ony mean and standard deviation.
        /// ..
        /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording,
        /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile
        /// BUT not so long as to cause memory constipation.
        /// </remarks>
        /// <param name="recording">as type AudioRecording which contains the event</param>
        /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param>
        /// <param name="spectralTarget">both bottom and top bounds in Hertz</param>
        /// <param name="config">parameters that determine the outcome of the analysis</param>
        /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param>
        /// <returns>an instance of EventStatistics</returns>
        public static EventStatistics AnalyzeAudioEvent(
            AudioRecording recording,
            Range <TimeSpan> temporalTarget,
            Range <double> spectralTarget,
            EventStatisticsConfiguration config,
            TimeSpan segmentStartOffset)
        {
            var stats = new EventStatistics
            {
                EventStartSeconds      = temporalTarget.Minimum.TotalSeconds,
                EventEndSeconds        = temporalTarget.Maximum.TotalSeconds,
                LowFrequencyHertz      = spectralTarget.Minimum,
                HighFrequencyHertz     = spectralTarget.Maximum,
                SegmentDurationSeconds = recording.Duration.TotalSeconds,
                SegmentStartSeconds    = segmentStartOffset.TotalSeconds,
            };

            // temporal target is supplied relative to recording, but not the supplied audio segment
            // shift coordinates relative to segment
            var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset);

            if (!recording
                .Duration
                .AsRangeFromZero(Topology.Inclusive)
                .Contains(localTemporalTarget))
            {
                stats.Error        = true;
                stats.ErrorMessage =
                    $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})";

                return(stats);
            }

            // convert recording to spectrogram
            int    sampleRate = recording.SampleRate;
            double epsilon    = recording.Epsilon;

            // extract the spectrogram
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep);

            double hertzBinWidth         = dspOutput1.FreqBinWidth;
            var    stepDurationInSeconds = config.FrameStep / (double)sampleRate;
            var    startFrame            = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds);

            // subtract 1 frame because want to end before start of end point.
            var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1;

            var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth);
            var topBin    = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth);

            // Events can have their high value set to the nyquist.
            // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in
            // these cases. So we just ask for the bin below.
            if (topBin >= config.FrameSize / 2)
            {
                topBin = (config.FrameSize / 2) - 1;
            }

            // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // extract the required acoustic event
            var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin);

            // Get the SNR of the event. This is just the max value in the matrix because noise reduced
            MatrixTools.MinMax(eventMatrix, out _, out double max);
            stats.SnrDecibels = max;

            // Now need to convert event matrix back to energy values before calculating other statistics
            eventMatrix = MatrixTools.Decibels2Power(eventMatrix);

            var columnAverages = MatrixTools.GetColumnAverages(eventMatrix);
            var rowAverages    = MatrixTools.GetRowAverages(eventMatrix);

            // calculate the mean and temporal standard deviation in decibels
            NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev);
            stats.MeanDecibels           = 10 * Math.Log10(mean);
            stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate the frequency standard deviation in decibels
            NormalDist.AverageAndSD(columnAverages, out mean, out stddev);
            stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate relative location of the temporal maximum
            int maxRowId = DataTools.GetMaxIndex(rowAverages);

            stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length;

            // calculate the entropy dispersion/concentration indices
            stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages);
            stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages);

            // calculate the spectral centroid and the dominant frequency
            double binCentroid = CalculateSpectralCentroid(columnAverages);

            stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum;
            int maxColumnId = DataTools.GetMaxIndex(columnAverages);

            stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum;

            // remainder of this method is to produce debugging images. Can comment out when not debugging.

            /*
             * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages);
             * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100);
             * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png";
             * image4.Save(path4);
             * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages);
             * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100);
             * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png";
             * image4.Save(path4);
             */
            return(stats);
        }
예제 #23
0
        public static Tuple <Dictionary <string, double>, TimeSpan> RainAnalyser(FileInfo fiAudioFile, AnalysisSettings analysisSettings, SourceMetadata originalFile)
        {
            Dictionary <string, string> config = analysisSettings.ConfigDict;

            // get parameters for the analysis
            int    frameSize     = IndexCalculateConfig.DefaultWindowSize;
            double windowOverlap = 0.0;
            int    lowFreqBound  = 1000;
            int    midFreqBound  = 8000;

            if (config.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, config);
            }
            if (config.ContainsKey(key_LOW_FREQ_BOUND))
            {
                lowFreqBound = ConfigDictionary.GetInt(key_LOW_FREQ_BOUND, config);
            }
            if (config.ContainsKey(key_MID_FREQ_BOUND))
            {
                midFreqBound = ConfigDictionary.GetInt(key_MID_FREQ_BOUND, config);
            }
            if (config.ContainsKey(AnalysisKeys.FrameOverlap))
            {
                windowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, config);
            }

            // get recording segment
            AudioRecording recording = new AudioRecording(fiAudioFile.FullName);

            // calculate duration/size of various quantities.
            int      signalLength  = recording.WavReader.Samples.Length;
            TimeSpan audioDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            double   duration      = frameSize * (1 - windowOverlap) / (double)recording.SampleRate;
            TimeSpan frameDuration = TimeSpan.FromTicks((long)(duration * TimeSpan.TicksPerSecond));

            int    chunkDuration   = 10; //seconds
            double framesPerSecond = 1 / frameDuration.TotalSeconds;
            int    chunkCount      = (int)Math.Round(audioDuration.TotalSeconds / (double)chunkDuration);
            int    framesPerChunk  = (int)(chunkDuration * framesPerSecond);

            string[] classifications = new string[chunkCount];

            //i: EXTRACT ENVELOPE and FFTs
            double epsilon       = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    signalextract = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, epsilon, frameSize, windowOverlap);

            double[] envelope = signalextract.Envelope;
            double[,] spectrogram = signalextract.AmplitudeSpectrogram;  //amplitude spectrogram
            int colCount = spectrogram.GetLength(1);

            int    nyquistFreq = recording.Nyquist;
            int    nyquistBin  = spectrogram.GetLength(1) - 1;
            double binWidth    = nyquistFreq / (double)spectrogram.GetLength(1);

            // calculate the bin id of boundary between mid and low frequency spectrum
            int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth);

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this iwll be less than 17640/2.
            int originalAudioNyquist = originalFile.SampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz.

            if (recording.Nyquist > originalAudioNyquist)
            {
                nyquistFreq = originalAudioNyquist;
                nyquistBin  = (int)Math.Floor(originalAudioNyquist / binWidth);
            }

            // vi: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            var subBandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, nyquistBin);

            double[] aciArray = AcousticComplexityIndex.CalculateACI(subBandSpectrogram);
            double   aci1     = aciArray.Average();

            // ii: FRAME ENERGIES -
            // convert signal to decibels and subtract background noise.
            double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction
            var    results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signalextract.Envelope), StandardDeviationCount);
            var    dBarray  = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal);

            //// vii: remove background noise from the full spectrogram i.e. BIN 1 to Nyquist
            //spectrogramData = MatrixTools.Submatrix(spectrogramData, 0, 1, spectrogramData.GetLength(0) - 1, nyquistBin);
            //const double SpectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background
            //double[] modalValues = SNR.CalculateModalValues(spectrogramData); // calculate modal value for each freq bin.
            //modalValues = DataTools.filterMovingAverage(modalValues, 7);      // smooth the modal profile
            //spectrogramData = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogramData, modalValues);
            //spectrogramData = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogramData, SpectralBgThreshold);

            //set up the output
            if (Verbose)
            {
                LoggedConsole.WriteLine("{0:d2}, {1},  {2},    {3},    {4},    {5},   {6},     {7},     {8},    {9},   {10},   {11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2");
            }
            StringBuilder sb = null;

            if (WriteOutputFile)
            {
                string header = string.Format("{0:d2},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2");
                sb = new StringBuilder(header + "\n");
            }

            Dictionary <string, double> dict = RainIndices.GetIndices(envelope, audioDuration, frameDuration, spectrogram, lowFreqBound, midFreqBound, binWidth);

            return(Tuple.Create(dict, audioDuration));
        } //Analysis()
        /// <summary>
        /// returns the duration of that part of frame not overlapped with follwoing frame.
        /// Duration is given in seconds.
        /// Assumes window size and overlap fraction already known.
        /// </summary>
        public double GetFrameOffset(int sampleRate)
        {
            int step = DSP_Frames.FrameStep(this.WindowSize, this.WindowOverlap);

            return(step / (double)sampleRate);
        }
        FrogRibbitRecognizer(AudioRecording recording, string filterName, int midBandFreq, double windowDuration = 5.0, double windowOverlap = 0.5,
                             double dctDuration = 0.5, double dctThreshold = 0.4, bool normaliseDCT = false, int minOscilRate = 11, int maxOscilRate = 17, double maxOscilScore = 20.0)
        {
            int    sr              = recording.SampleRate;
            int    windowSize      = (int)(windowDuration * sr / 1000.0);
            double frameStep       = windowDuration * (1 - windowOverlap);
            double framesPerSecond = 1000 / frameStep;

            //i: Apply filter
            Log.WriteLine("#   Filter: " + filterName);
            var filteredRecording = AudioRecording.Filter_IIR(recording, filterName); //return new filtered audio recording.
            int signalLength      = filteredRecording.WavReader.Samples.Length;

            //ii: FRAMING
            int[,] frameIDs = DSP_Frames.FrameStartEnds(signalLength, windowSize, windowOverlap);
            int frameCount = frameIDs.GetLength(0);

            //iii: EXTRACT ENVELOPE and ZERO-CROSSINGS
            Log.WriteLine("#   Extract Envelope and Zero-crossings.");
            var results2 = DSP_Frames.ExtractEnvelopeAndZeroCrossings(filteredRecording.WavReader.Samples, sr, windowSize, windowOverlap);

            //double[] average       = results2.Item1;
            double[] envelope      = results2.Item2;
            double[] zeroCrossings = results2.Item3;
            //double[] sampleZCs     = results2.Item4;
            double[] sampleStd = results2.Item5;

            Log.WriteLine("#   Normalize values.");
            //iv: FRAME ENERGIES
            double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction
            var    results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(envelope), StandardDeviationCount);
            var    dBarray  = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal);

            //v: CONVERSIONS: ZERO CROSSINGS to herz - then NORMALIZE to Fuzzy freq
            int[] freq      = DSP_Frames.ConvertZeroCrossings2Hz(zeroCrossings, windowSize, sr);
            int   sideBand  = (int)(midBandFreq * 0.1);
            var   fuzzyFreq = FuzzyFrequency(freq, midBandFreq, sideBand);

            //vi: CONVERSIONS: convert sample std deviations to milliseconds - then NORMALIZE to PROBs
            double[] tsd = DSP_Frames.ConvertSamples2Milliseconds(sampleStd, sr); //time standard deviation
            //for (int i = 0; i < tsd.Length; i++) if (tsd[i]) LoggedConsole.WriteLine(i + " = " + tsd[i]);
            //filter the freq array to remove values derived from frames with high standard deviation
            double[] tsdScores = NormalDist.Values2Probabilities(tsd);

            //vii: GET OSCILLATION SCORE AND NORMALIZE
            double[] rawOscillations = Oscillations2010.DetectOscillationsInScoreArray(dBarray, dctDuration, framesPerSecond, dctThreshold, normaliseDCT, minOscilRate, maxOscilRate);
            //NormaliseMatrixValues oscillation scores wrt scores obtained on a training.
            //double maxOscillationScore = rawOscillations[DataTools.GetMaxIndex(rawOscillations)];
            //LoggedConsole.WriteLine("maxOscillationScore=" + maxOscillationScore);
            var oscillations = new double[dBarray.Length];

            for (int i = 0; i < dBarray.Length; i++)
            {
                oscillations[i] = rawOscillations[i] / maxOscilScore;
                if (oscillations[i] > 1.0)
                {
                    oscillations[i] = 1.0;
                }
            }

            //viii: COMBINE the SCORES
            Log.WriteLine("#   Combine Scores.");
            var combinedScores = new double[dBarray.Length];

            for (int i = 0; i < dBarray.Length; i++)
            {
                combinedScores[i] = fuzzyFreq[i] * tsdScores[i] * oscillations[i];
            }

            //ix: fill in the oscillation scores
            combinedScores = Oscillations2010.FillScoreArray(combinedScores, dctDuration, framesPerSecond);
            return(Tuple.Create(combinedScores, filteredRecording, dBarray, tsd));
        }