public static Tuple <Dictionary <string, double>, TimeSpan> RainAnalyser(FileInfo fiAudioFile, AnalysisSettings analysisSettings, SourceMetadata originalFile) { Dictionary <string, string> config = analysisSettings.ConfigDict; // get parameters for the analysis int frameSize = IndexCalculateConfig.DefaultWindowSize; double windowOverlap = 0.0; int lowFreqBound = 1000; int midFreqBound = 8000; if (config.ContainsKey(AnalysisKeys.FrameLength)) { frameSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, config); } if (config.ContainsKey(key_LOW_FREQ_BOUND)) { lowFreqBound = ConfigDictionary.GetInt(key_LOW_FREQ_BOUND, config); } if (config.ContainsKey(key_MID_FREQ_BOUND)) { midFreqBound = ConfigDictionary.GetInt(key_MID_FREQ_BOUND, config); } if (config.ContainsKey(AnalysisKeys.FrameOverlap)) { windowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, config); } // get recording segment AudioRecording recording = new AudioRecording(fiAudioFile.FullName); // calculate duration/size of various quantities. int signalLength = recording.WavReader.Samples.Length; TimeSpan audioDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); double duration = frameSize * (1 - windowOverlap) / (double)recording.SampleRate; TimeSpan frameDuration = TimeSpan.FromTicks((long)(duration * TimeSpan.TicksPerSecond)); int chunkDuration = 10; //seconds double framesPerSecond = 1 / frameDuration.TotalSeconds; int chunkCount = (int)Math.Round(audioDuration.TotalSeconds / (double)chunkDuration); int framesPerChunk = (int)(chunkDuration * framesPerSecond); string[] classifications = new string[chunkCount]; //i: EXTRACT ENVELOPE and FFTs double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var signalextract = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, epsilon, frameSize, windowOverlap); double[] envelope = signalextract.Envelope; double[,] spectrogram = signalextract.AmplitudeSpectrogram; //amplitude spectrogram int colCount = spectrogram.GetLength(1); int nyquistFreq = recording.Nyquist; int nyquistBin = spectrogram.GetLength(1) - 1; double binWidth = nyquistFreq / (double)spectrogram.GetLength(1); // calculate the bin id of boundary between mid and low frequency spectrum int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth); // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this iwll be less than 17640/2. int originalAudioNyquist = originalFile.SampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz. if (recording.Nyquist > originalAudioNyquist) { nyquistFreq = originalAudioNyquist; nyquistBin = (int)Math.Floor(originalAudioNyquist / binWidth); } // vi: CALCULATE THE ACOUSTIC COMPLEXITY INDEX var subBandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, nyquistBin); double[] aciArray = AcousticComplexityIndex.CalculateACI(subBandSpectrogram); double aci1 = aciArray.Average(); // ii: FRAME ENERGIES - // convert signal to decibels and subtract background noise. double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signalextract.Envelope), StandardDeviationCount); var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); //// vii: remove background noise from the full spectrogram i.e. BIN 1 to Nyquist //spectrogramData = MatrixTools.Submatrix(spectrogramData, 0, 1, spectrogramData.GetLength(0) - 1, nyquistBin); //const double SpectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background //double[] modalValues = SNR.CalculateModalValues(spectrogramData); // calculate modal value for each freq bin. //modalValues = DataTools.filterMovingAverage(modalValues, 7); // smooth the modal profile //spectrogramData = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogramData, modalValues); //spectrogramData = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogramData, SpectralBgThreshold); //set up the output if (Verbose) { LoggedConsole.WriteLine("{0:d2}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2"); } StringBuilder sb = null; if (WriteOutputFile) { string header = string.Format("{0:d2},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2"); sb = new StringBuilder(header + "\n"); } Dictionary <string, double> dict = RainIndices.GetIndices(envelope, audioDuration, frameDuration, spectrogram, lowFreqBound, midFreqBound, binWidth); return(Tuple.Create(dict, audioDuration)); } //Analysis()
} //Analysis() /// <summary> /// returns some indices relevant to rain and cicadas from a short (10seconds) chunk of audio /// </summary> /// <param name="signal">signal envelope of a 10s chunk of audio</param> /// <param name="spectrogram">spectrogram of a 10s chunk of audio</param> /// <param name="lowFreqBound"></param> /// <param name="midFreqBound"></param> /// <param name="binWidth"></param> /// <returns></returns> public static RainStruct Get10SecondIndices(double[] signal, double[,] spectrogram, int lowFreqBound, int midFreqBound, TimeSpan frameDuration, double binWidth) { // i: FRAME ENERGIES - double StandardDeviationCount = 0.1; var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signal), StandardDeviationCount); //use Lamel et al. var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); bool[] activeFrames = new bool[dBarray.Length]; //record frames with activity >= threshold dB above background and count for (int i = 0; i < dBarray.Length; i++) { if (dBarray[i] >= ActivityAndCover.DefaultActivityThresholdDb) { activeFrames[i] = true; } } //int activeFrameCount = dBarray.Count((x) => (x >= AcousticIndices.DEFAULT_activityThreshold_dB)); int activeFrameCount = DataTools.CountTrues(activeFrames); double spikeThreshold = 0.05; double spikeIndex = CalculateSpikeIndex(signal, spikeThreshold); //Console.WriteLine("spikeIndex=" + spikeIndex); //DataTools.writeBarGraph(signal); RainStruct rainIndices; // struct in which to store all indices rainIndices.activity = activeFrameCount / (double)dBarray.Length; //fraction of frames having acoustic activity rainIndices.bgNoise = results3.NoiseMode; //bg noise in dB rainIndices.snr = results3.Snr; //snr rainIndices.avSig_dB = 20 * Math.Log10(signal.Average()); //10 times log of amplitude squared rainIndices.temporalEntropy = DataTools.EntropyNormalised(DataTools.SquareValues(signal)); //ENTROPY of ENERGY ENVELOPE rainIndices.spikes = spikeIndex; // ii: calculate the bin id of boundary between mid and low frequency spectrum int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth); var midbandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1); // iii: ENTROPY OF AVERAGE SPECTRUM and VARIANCE SPECTRUM - at this point the spectrogram is still an amplitude spectrogram var tuple = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(midbandSpectrogram); rainIndices.spectralEntropy = DataTools.EntropyNormalised(tuple.Item1); //ENTROPY of spectral averages if (double.IsNaN(rainIndices.spectralEntropy)) { rainIndices.spectralEntropy = 1.0; } // iv: CALCULATE Acoustic Complexity Index on the AMPLITUDE SPECTRUM var aciArray = AcousticComplexityIndex.CalculateAci(midbandSpectrogram); rainIndices.ACI = aciArray.Average(); //v: remove background noise from the spectrogram double spectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background //double[] modalValues = SNR.CalculateModalValues(spectrogram); //calculate modal value for each freq bin. //modalValues = DataTools.filterMovingAverage(modalValues, 7); //smooth the modal profile //spectrogram = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogram, modalValues); //spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, spectralBgThreshold); //vi: SPECTROGRAM ANALYSIS - SPECTRAL COVER. NOTE: spectrogram is still a noise reduced amplitude spectrogram SpectralActivity sa = ActivityAndCover.CalculateSpectralEvents(spectrogram, spectralBgThreshold, frameDuration, lowFreqBound, midFreqBound, binWidth); rainIndices.lowFreqCover = sa.LowFreqBandCover; rainIndices.midFreqCover = sa.MidFreqBandCover; rainIndices.hiFreqCover = sa.HighFreqBandCover; //double[] coverSpectrum = sa.coverSpectrum; //double[] eventSpectrum = sa.eventSpectrum; return(rainIndices); }
FrogRibbitRecognizer(AudioRecording recording, string filterName, int midBandFreq, double windowDuration = 5.0, double windowOverlap = 0.5, double dctDuration = 0.5, double dctThreshold = 0.4, bool normaliseDCT = false, int minOscilRate = 11, int maxOscilRate = 17, double maxOscilScore = 20.0) { int sr = recording.SampleRate; int windowSize = (int)(windowDuration * sr / 1000.0); double frameStep = windowDuration * (1 - windowOverlap); double framesPerSecond = 1000 / frameStep; //i: Apply filter Log.WriteLine("# Filter: " + filterName); var filteredRecording = AudioRecording.Filter_IIR(recording, filterName); //return new filtered audio recording. int signalLength = filteredRecording.WavReader.Samples.Length; //ii: FRAMING int[,] frameIDs = DSP_Frames.FrameStartEnds(signalLength, windowSize, windowOverlap); int frameCount = frameIDs.GetLength(0); //iii: EXTRACT ENVELOPE and ZERO-CROSSINGS Log.WriteLine("# Extract Envelope and Zero-crossings."); var results2 = DSP_Frames.ExtractEnvelopeAndZeroCrossings(filteredRecording.WavReader.Samples, sr, windowSize, windowOverlap); //double[] average = results2.Item1; double[] envelope = results2.Item2; double[] zeroCrossings = results2.Item3; //double[] sampleZCs = results2.Item4; double[] sampleStd = results2.Item5; Log.WriteLine("# Normalize values."); //iv: FRAME ENERGIES double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(envelope), StandardDeviationCount); var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); //v: CONVERSIONS: ZERO CROSSINGS to herz - then NORMALIZE to Fuzzy freq int[] freq = DSP_Frames.ConvertZeroCrossings2Hz(zeroCrossings, windowSize, sr); int sideBand = (int)(midBandFreq * 0.1); var fuzzyFreq = FuzzyFrequency(freq, midBandFreq, sideBand); //vi: CONVERSIONS: convert sample std deviations to milliseconds - then NORMALIZE to PROBs double[] tsd = DSP_Frames.ConvertSamples2Milliseconds(sampleStd, sr); //time standard deviation //for (int i = 0; i < tsd.Length; i++) if (tsd[i]) LoggedConsole.WriteLine(i + " = " + tsd[i]); //filter the freq array to remove values derived from frames with high standard deviation double[] tsdScores = NormalDist.Values2Probabilities(tsd); //vii: GET OSCILLATION SCORE AND NORMALIZE double[] rawOscillations = Oscillations2010.DetectOscillationsInScoreArray(dBarray, dctDuration, framesPerSecond, dctThreshold, normaliseDCT, minOscilRate, maxOscilRate); //NormaliseMatrixValues oscillation scores wrt scores obtained on a training. //double maxOscillationScore = rawOscillations[DataTools.GetMaxIndex(rawOscillations)]; //LoggedConsole.WriteLine("maxOscillationScore=" + maxOscillationScore); var oscillations = new double[dBarray.Length]; for (int i = 0; i < dBarray.Length; i++) { oscillations[i] = rawOscillations[i] / maxOscilScore; if (oscillations[i] > 1.0) { oscillations[i] = 1.0; } } //viii: COMBINE the SCORES Log.WriteLine("# Combine Scores."); var combinedScores = new double[dBarray.Length]; for (int i = 0; i < dBarray.Length; i++) { combinedScores[i] = fuzzyFreq[i] * tsdScores[i] * oscillations[i]; } //ix: fill in the oscillation scores combinedScores = Oscillations2010.FillScoreArray(combinedScores, dctDuration, framesPerSecond); return(Tuple.Create(combinedScores, filteredRecording, dBarray, tsd)); }