Esempio n. 1
0
        public static Tuple <Dictionary <string, double>, TimeSpan> RainAnalyser(FileInfo fiAudioFile, AnalysisSettings analysisSettings, SourceMetadata originalFile)
        {
            Dictionary <string, string> config = analysisSettings.ConfigDict;

            // get parameters for the analysis
            int    frameSize     = IndexCalculateConfig.DefaultWindowSize;
            double windowOverlap = 0.0;
            int    lowFreqBound  = 1000;
            int    midFreqBound  = 8000;

            if (config.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, config);
            }
            if (config.ContainsKey(key_LOW_FREQ_BOUND))
            {
                lowFreqBound = ConfigDictionary.GetInt(key_LOW_FREQ_BOUND, config);
            }
            if (config.ContainsKey(key_MID_FREQ_BOUND))
            {
                midFreqBound = ConfigDictionary.GetInt(key_MID_FREQ_BOUND, config);
            }
            if (config.ContainsKey(AnalysisKeys.FrameOverlap))
            {
                windowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, config);
            }

            // get recording segment
            AudioRecording recording = new AudioRecording(fiAudioFile.FullName);

            // calculate duration/size of various quantities.
            int      signalLength  = recording.WavReader.Samples.Length;
            TimeSpan audioDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            double   duration      = frameSize * (1 - windowOverlap) / (double)recording.SampleRate;
            TimeSpan frameDuration = TimeSpan.FromTicks((long)(duration * TimeSpan.TicksPerSecond));

            int    chunkDuration   = 10; //seconds
            double framesPerSecond = 1 / frameDuration.TotalSeconds;
            int    chunkCount      = (int)Math.Round(audioDuration.TotalSeconds / (double)chunkDuration);
            int    framesPerChunk  = (int)(chunkDuration * framesPerSecond);

            string[] classifications = new string[chunkCount];

            //i: EXTRACT ENVELOPE and FFTs
            double epsilon       = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    signalextract = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, epsilon, frameSize, windowOverlap);

            double[] envelope = signalextract.Envelope;
            double[,] spectrogram = signalextract.AmplitudeSpectrogram;  //amplitude spectrogram
            int colCount = spectrogram.GetLength(1);

            int    nyquistFreq = recording.Nyquist;
            int    nyquistBin  = spectrogram.GetLength(1) - 1;
            double binWidth    = nyquistFreq / (double)spectrogram.GetLength(1);

            // calculate the bin id of boundary between mid and low frequency spectrum
            int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth);

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this iwll be less than 17640/2.
            int originalAudioNyquist = originalFile.SampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz.

            if (recording.Nyquist > originalAudioNyquist)
            {
                nyquistFreq = originalAudioNyquist;
                nyquistBin  = (int)Math.Floor(originalAudioNyquist / binWidth);
            }

            // vi: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            var subBandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, nyquistBin);

            double[] aciArray = AcousticComplexityIndex.CalculateACI(subBandSpectrogram);
            double   aci1     = aciArray.Average();

            // ii: FRAME ENERGIES -
            // convert signal to decibels and subtract background noise.
            double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction
            var    results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signalextract.Envelope), StandardDeviationCount);
            var    dBarray  = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal);

            //// vii: remove background noise from the full spectrogram i.e. BIN 1 to Nyquist
            //spectrogramData = MatrixTools.Submatrix(spectrogramData, 0, 1, spectrogramData.GetLength(0) - 1, nyquistBin);
            //const double SpectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background
            //double[] modalValues = SNR.CalculateModalValues(spectrogramData); // calculate modal value for each freq bin.
            //modalValues = DataTools.filterMovingAverage(modalValues, 7);      // smooth the modal profile
            //spectrogramData = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogramData, modalValues);
            //spectrogramData = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogramData, SpectralBgThreshold);

            //set up the output
            if (Verbose)
            {
                LoggedConsole.WriteLine("{0:d2}, {1},  {2},    {3},    {4},    {5},   {6},     {7},     {8},    {9},   {10},   {11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2");
            }
            StringBuilder sb = null;

            if (WriteOutputFile)
            {
                string header = string.Format("{0:d2},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2");
                sb = new StringBuilder(header + "\n");
            }

            Dictionary <string, double> dict = RainIndices.GetIndices(envelope, audioDuration, frameDuration, spectrogram, lowFreqBound, midFreqBound, binWidth);

            return(Tuple.Create(dict, audioDuration));
        } //Analysis()
Esempio n. 2
0
        } //Analysis()

        /// <summary>
        /// returns some indices relevant to rain and cicadas from a short (10seconds) chunk of audio
        /// </summary>
        /// <param name="signal">signal envelope of a 10s chunk of audio</param>
        /// <param name="spectrogram">spectrogram of a 10s chunk of audio</param>
        /// <param name="lowFreqBound"></param>
        /// <param name="midFreqBound"></param>
        /// <param name="binWidth"></param>
        /// <returns></returns>
        public static RainStruct Get10SecondIndices(double[] signal, double[,] spectrogram, int lowFreqBound, int midFreqBound,
                                                    TimeSpan frameDuration, double binWidth)
        {
            // i: FRAME ENERGIES -
            double StandardDeviationCount = 0.1;
            var    results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signal), StandardDeviationCount); //use Lamel et al.
            var    dBarray  = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal);

            bool[] activeFrames = new bool[dBarray.Length]; //record frames with activity >= threshold dB above background and count
            for (int i = 0; i < dBarray.Length; i++)
            {
                if (dBarray[i] >= ActivityAndCover.DefaultActivityThresholdDb)
                {
                    activeFrames[i] = true;
                }
            }

            //int activeFrameCount = dBarray.Count((x) => (x >= AcousticIndices.DEFAULT_activityThreshold_dB));
            int activeFrameCount = DataTools.CountTrues(activeFrames);

            double spikeThreshold = 0.05;
            double spikeIndex     = CalculateSpikeIndex(signal, spikeThreshold);

            //Console.WriteLine("spikeIndex=" + spikeIndex);
            //DataTools.writeBarGraph(signal);

            RainStruct rainIndices;                                                                    // struct in which to store all indices

            rainIndices.activity        = activeFrameCount / (double)dBarray.Length;                   //fraction of frames having acoustic activity
            rainIndices.bgNoise         = results3.NoiseMode;                                          //bg noise in dB
            rainIndices.snr             = results3.Snr;                                                //snr
            rainIndices.avSig_dB        = 20 * Math.Log10(signal.Average());                           //10 times log of amplitude squared
            rainIndices.temporalEntropy = DataTools.EntropyNormalised(DataTools.SquareValues(signal)); //ENTROPY of ENERGY ENVELOPE
            rainIndices.spikes          = spikeIndex;

            // ii: calculate the bin id of boundary between mid and low frequency spectrum
            int lowBinBound        = (int)Math.Ceiling(lowFreqBound / binWidth);
            var midbandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1);

            // iii: ENTROPY OF AVERAGE SPECTRUM and VARIANCE SPECTRUM - at this point the spectrogram is still an amplitude spectrogram
            var tuple = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(midbandSpectrogram);

            rainIndices.spectralEntropy = DataTools.EntropyNormalised(tuple.Item1); //ENTROPY of spectral averages
            if (double.IsNaN(rainIndices.spectralEntropy))
            {
                rainIndices.spectralEntropy = 1.0;
            }

            // iv: CALCULATE Acoustic Complexity Index on the AMPLITUDE SPECTRUM
            var aciArray = AcousticComplexityIndex.CalculateAci(midbandSpectrogram);

            rainIndices.ACI = aciArray.Average();

            //v: remove background noise from the spectrogram
            double spectralBgThreshold = 0.015;      // SPECTRAL AMPLITUDE THRESHOLD for smoothing background

            //double[] modalValues = SNR.CalculateModalValues(spectrogram); //calculate modal value for each freq bin.
            //modalValues = DataTools.filterMovingAverage(modalValues, 7);  //smooth the modal profile
            //spectrogram = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogram, modalValues);
            //spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, spectralBgThreshold);

            //vi: SPECTROGRAM ANALYSIS - SPECTRAL COVER. NOTE: spectrogram is still a noise reduced amplitude spectrogram
            SpectralActivity sa = ActivityAndCover.CalculateSpectralEvents(spectrogram, spectralBgThreshold, frameDuration, lowFreqBound, midFreqBound, binWidth);

            rainIndices.lowFreqCover = sa.LowFreqBandCover;
            rainIndices.midFreqCover = sa.MidFreqBandCover;
            rainIndices.hiFreqCover  = sa.HighFreqBandCover;

            //double[] coverSpectrum = sa.coverSpectrum;
            //double[] eventSpectrum = sa.eventSpectrum;

            return(rainIndices);
        }
        FrogRibbitRecognizer(AudioRecording recording, string filterName, int midBandFreq, double windowDuration = 5.0, double windowOverlap = 0.5,
                             double dctDuration = 0.5, double dctThreshold = 0.4, bool normaliseDCT = false, int minOscilRate = 11, int maxOscilRate = 17, double maxOscilScore = 20.0)
        {
            int    sr              = recording.SampleRate;
            int    windowSize      = (int)(windowDuration * sr / 1000.0);
            double frameStep       = windowDuration * (1 - windowOverlap);
            double framesPerSecond = 1000 / frameStep;

            //i: Apply filter
            Log.WriteLine("#   Filter: " + filterName);
            var filteredRecording = AudioRecording.Filter_IIR(recording, filterName); //return new filtered audio recording.
            int signalLength      = filteredRecording.WavReader.Samples.Length;

            //ii: FRAMING
            int[,] frameIDs = DSP_Frames.FrameStartEnds(signalLength, windowSize, windowOverlap);
            int frameCount = frameIDs.GetLength(0);

            //iii: EXTRACT ENVELOPE and ZERO-CROSSINGS
            Log.WriteLine("#   Extract Envelope and Zero-crossings.");
            var results2 = DSP_Frames.ExtractEnvelopeAndZeroCrossings(filteredRecording.WavReader.Samples, sr, windowSize, windowOverlap);

            //double[] average       = results2.Item1;
            double[] envelope      = results2.Item2;
            double[] zeroCrossings = results2.Item3;
            //double[] sampleZCs     = results2.Item4;
            double[] sampleStd = results2.Item5;

            Log.WriteLine("#   Normalize values.");
            //iv: FRAME ENERGIES
            double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction
            var    results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(envelope), StandardDeviationCount);
            var    dBarray  = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal);

            //v: CONVERSIONS: ZERO CROSSINGS to herz - then NORMALIZE to Fuzzy freq
            int[] freq      = DSP_Frames.ConvertZeroCrossings2Hz(zeroCrossings, windowSize, sr);
            int   sideBand  = (int)(midBandFreq * 0.1);
            var   fuzzyFreq = FuzzyFrequency(freq, midBandFreq, sideBand);

            //vi: CONVERSIONS: convert sample std deviations to milliseconds - then NORMALIZE to PROBs
            double[] tsd = DSP_Frames.ConvertSamples2Milliseconds(sampleStd, sr); //time standard deviation
            //for (int i = 0; i < tsd.Length; i++) if (tsd[i]) LoggedConsole.WriteLine(i + " = " + tsd[i]);
            //filter the freq array to remove values derived from frames with high standard deviation
            double[] tsdScores = NormalDist.Values2Probabilities(tsd);

            //vii: GET OSCILLATION SCORE AND NORMALIZE
            double[] rawOscillations = Oscillations2010.DetectOscillationsInScoreArray(dBarray, dctDuration, framesPerSecond, dctThreshold, normaliseDCT, minOscilRate, maxOscilRate);
            //NormaliseMatrixValues oscillation scores wrt scores obtained on a training.
            //double maxOscillationScore = rawOscillations[DataTools.GetMaxIndex(rawOscillations)];
            //LoggedConsole.WriteLine("maxOscillationScore=" + maxOscillationScore);
            var oscillations = new double[dBarray.Length];

            for (int i = 0; i < dBarray.Length; i++)
            {
                oscillations[i] = rawOscillations[i] / maxOscilScore;
                if (oscillations[i] > 1.0)
                {
                    oscillations[i] = 1.0;
                }
            }

            //viii: COMBINE the SCORES
            Log.WriteLine("#   Combine Scores.");
            var combinedScores = new double[dBarray.Length];

            for (int i = 0; i < dBarray.Length; i++)
            {
                combinedScores[i] = fuzzyFreq[i] * tsdScores[i] * oscillations[i];
            }

            //ix: fill in the oscillation scores
            combinedScores = Oscillations2010.FillScoreArray(combinedScores, dctDuration, framesPerSecond);
            return(Tuple.Create(combinedScores, filteredRecording, dBarray, tsd));
        }